add ability to support gemini nano banana image generation

This commit is contained in:
Henry 2025-11-28 11:46:50 +00:00
parent 069ba28bc0
commit 92d4d179ed
6 changed files with 637 additions and 257 deletions

View File

@ -647,6 +647,12 @@
"input_cost": 0.00002, "input_cost": 0.00002,
"output_cost": 0.00012 "output_cost": 0.00012
}, },
{
"label": "gemini-3-pro-image-preview",
"name": "gemini-3-pro-image-preview",
"input_cost": 0.00002,
"output_cost": 0.00012
},
{ {
"label": "gemini-2.5-pro", "label": "gemini-2.5-pro",
"name": "gemini-2.5-pro", "name": "gemini-2.5-pro",
@ -659,6 +665,12 @@
"input_cost": 1.25e-6, "input_cost": 1.25e-6,
"output_cost": 0.00001 "output_cost": 0.00001
}, },
{
"label": "gemini-2.5-flash-image",
"name": "gemini-2.5-flash-image",
"input_cost": 1.25e-6,
"output_cost": 0.00001
},
{ {
"label": "gemini-2.5-flash-lite", "label": "gemini-2.5-flash-lite",
"name": "gemini-2.5-flash-lite", "name": "gemini-2.5-flash-lite",

View File

@ -22,21 +22,16 @@ import zodToJsonSchema from 'zod-to-json-schema'
import { getErrorMessage } from '../../../src/error' import { getErrorMessage } from '../../../src/error'
import { DataSource } from 'typeorm' import { DataSource } from 'typeorm'
import { import {
addImageArtifactsToMessages,
extractArtifactsFromResponse,
getPastChatHistoryImageMessages, getPastChatHistoryImageMessages,
getUniqueImageMessages, getUniqueImageMessages,
processMessagesWithImages, processMessagesWithImages,
replaceBase64ImagesWithFileReferences, replaceBase64ImagesWithFileReferences,
replaceInlineDataWithFileReferences,
updateFlowState updateFlowState
} from '../utils' } from '../utils'
import { import { convertMultiOptionsToStringArray, processTemplateVariables, configureStructuredOutput } from '../../../src/utils'
convertMultiOptionsToStringArray,
getCredentialData,
getCredentialParam,
processTemplateVariables,
configureStructuredOutput
} from '../../../src/utils'
import { addSingleFileToStorage } from '../../../src/storageUtils'
import fetch from 'node-fetch'
interface ITool { interface ITool {
agentSelectedTool: string agentSelectedTool: string
@ -1072,12 +1067,6 @@ class Agent_Agentflow implements INode {
llmIds = await analyticHandlers.onLLMStart(llmLabel, messages, options.parentTraceIds) llmIds = await analyticHandlers.onLLMStart(llmLabel, messages, options.parentTraceIds)
} }
// Track execution time
const startTime = Date.now()
// Get initial response from LLM
const sseStreamer: IServerSideEventStreamer | undefined = options.sseStreamer
// Handle tool calls with support for recursion // Handle tool calls with support for recursion
let usedTools: IUsedTool[] = [] let usedTools: IUsedTool[] = []
let sourceDocuments: Array<any> = [] let sourceDocuments: Array<any> = []
@ -1090,12 +1079,24 @@ class Agent_Agentflow implements INode {
const messagesBeforeToolCalls = [...messages] const messagesBeforeToolCalls = [...messages]
let _toolCallMessages: BaseMessageLike[] = [] let _toolCallMessages: BaseMessageLike[] = []
/**
* Add image artifacts from previous assistant responses as user messages
* Images are converted from FILE-STORAGE::<image_path> to base 64 image_url format
*/
await addImageArtifactsToMessages(messages, options)
// Check if this is hummanInput for tool calls // Check if this is hummanInput for tool calls
const _humanInput = nodeData.inputs?.humanInput const _humanInput = nodeData.inputs?.humanInput
const humanInput: IHumanInput = typeof _humanInput === 'string' ? JSON.parse(_humanInput) : _humanInput const humanInput: IHumanInput = typeof _humanInput === 'string' ? JSON.parse(_humanInput) : _humanInput
const humanInputAction = options.humanInputAction const humanInputAction = options.humanInputAction
const iterationContext = options.iterationContext const iterationContext = options.iterationContext
// Track execution time
const startTime = Date.now()
// Get initial response from LLM
const sseStreamer: IServerSideEventStreamer | undefined = options.sseStreamer
if (humanInput) { if (humanInput) {
if (humanInput.type !== 'proceed' && humanInput.type !== 'reject') { if (humanInput.type !== 'proceed' && humanInput.type !== 'reject') {
throw new Error(`Invalid human input type. Expected 'proceed' or 'reject', but got '${humanInput.type}'`) throw new Error(`Invalid human input type. Expected 'proceed' or 'reject', but got '${humanInput.type}'`)
@ -1234,9 +1235,15 @@ class Agent_Agentflow implements INode {
// Prepare final response and output object // Prepare final response and output object
let finalResponse = '' let finalResponse = ''
if (response.content && Array.isArray(response.content)) { if (response.content && Array.isArray(response.content)) {
finalResponse = response.content.map((item: any) => item.text).join('\n') finalResponse = response.content
.filter((item: any) => item.text)
.map((item: any) => item.text)
.join('\n')
} else if (response.content && typeof response.content === 'string') { } else if (response.content && typeof response.content === 'string') {
finalResponse = response.content finalResponse = response.content
} else if (response.content === '') {
// Empty response content, this could happen when there is only image data
finalResponse = ''
} else { } else {
finalResponse = JSON.stringify(response, null, 2) finalResponse = JSON.stringify(response, null, 2)
} }
@ -1252,10 +1259,13 @@ class Agent_Agentflow implements INode {
} }
} }
// Extract artifacts from annotations in response metadata // Extract artifacts from annotations in response metadata and replace inline data
if (response.response_metadata) { if (response.response_metadata) {
const { artifacts: extractedArtifacts, fileAnnotations: extractedFileAnnotations } = const {
await this.extractArtifactsFromResponse(response.response_metadata, newNodeData, options) artifacts: extractedArtifacts,
fileAnnotations: extractedFileAnnotations,
savedInlineImages
} = await extractArtifactsFromResponse(response.response_metadata, newNodeData, options)
if (extractedArtifacts.length > 0) { if (extractedArtifacts.length > 0) {
artifacts = [...artifacts, ...extractedArtifacts] artifacts = [...artifacts, ...extractedArtifacts]
@ -1273,6 +1283,11 @@ class Agent_Agentflow implements INode {
sseStreamer.streamFileAnnotationsEvent(chatId, fileAnnotations) sseStreamer.streamFileAnnotationsEvent(chatId, fileAnnotations)
} }
} }
// Replace inlineData base64 with file references in the response
if (savedInlineImages && savedInlineImages.length > 0) {
replaceInlineDataWithFileReferences(response, savedInlineImages)
}
} }
// Replace sandbox links with proper download URLs. Example: [Download the script](sandbox:/mnt/data/dummy_bar_graph.py) // Replace sandbox links with proper download URLs. Example: [Download the script](sandbox:/mnt/data/dummy_bar_graph.py)
@ -1331,9 +1346,15 @@ class Agent_Agentflow implements INode {
// Process template variables in state // Process template variables in state
newState = processTemplateVariables(newState, finalResponse) newState = processTemplateVariables(newState, finalResponse)
/**
* Remove the temporarily added image artifact messages before storing
* This is to avoid storing the actual base64 data into database
*/
const messagesToStore = messages.filter((msg: any) => !msg._isTemporaryImageMessage)
// Replace the actual messages array with one that includes the file references for images instead of base64 data // Replace the actual messages array with one that includes the file references for images instead of base64 data
const messagesWithFileReferences = replaceBase64ImagesWithFileReferences( const messagesWithFileReferences = replaceBase64ImagesWithFileReferences(
messages, messagesToStore,
runtimeImageMessagesWithFileRef, runtimeImageMessagesWithFileRef,
pastImageMessagesWithFileRef pastImageMessagesWithFileRef
) )
@ -1500,44 +1521,6 @@ class Agent_Agentflow implements INode {
return builtInUsedTools return builtInUsedTools
} }
/**
* Saves base64 image data to storage and returns file information
*/
private async saveBase64Image(
outputItem: any,
options: ICommonObject
): Promise<{ filePath: string; fileName: string; totalSize: number } | null> {
try {
if (!outputItem.result) {
return null
}
// Extract base64 data and create buffer
const base64Data = outputItem.result
const imageBuffer = Buffer.from(base64Data, 'base64')
// Determine file extension and MIME type
const outputFormat = outputItem.output_format || 'png'
const fileName = `generated_image_${outputItem.id || Date.now()}.${outputFormat}`
const mimeType = outputFormat === 'png' ? 'image/png' : 'image/jpeg'
// Save the image using the existing storage utility
const { path, totalSize } = await addSingleFileToStorage(
mimeType,
imageBuffer,
fileName,
options.orgId,
options.chatflowid,
options.chatId
)
return { filePath: path, fileName, totalSize }
} catch (error) {
console.error('Error saving base64 image:', error)
return null
}
}
/** /**
* Handles memory management based on the specified memory type * Handles memory management based on the specified memory type
*/ */
@ -2484,190 +2467,6 @@ class Agent_Agentflow implements INode {
return { response: newResponse, usedTools, sourceDocuments, artifacts, totalTokens, isWaitingForHumanInput } return { response: newResponse, usedTools, sourceDocuments, artifacts, totalTokens, isWaitingForHumanInput }
} }
/**
* Extracts artifacts from response metadata (both annotations and built-in tools)
*/
private async extractArtifactsFromResponse(
responseMetadata: any,
modelNodeData: INodeData,
options: ICommonObject
): Promise<{ artifacts: any[]; fileAnnotations: any[] }> {
const artifacts: any[] = []
const fileAnnotations: any[] = []
if (!responseMetadata?.output || !Array.isArray(responseMetadata.output)) {
return { artifacts, fileAnnotations }
}
for (const outputItem of responseMetadata.output) {
// Handle container file citations from annotations
if (outputItem.type === 'message' && outputItem.content && Array.isArray(outputItem.content)) {
for (const contentItem of outputItem.content) {
if (contentItem.annotations && Array.isArray(contentItem.annotations)) {
for (const annotation of contentItem.annotations) {
if (annotation.type === 'container_file_citation' && annotation.file_id && annotation.filename) {
try {
// Download and store the file content
const downloadResult = await this.downloadContainerFile(
annotation.container_id,
annotation.file_id,
annotation.filename,
modelNodeData,
options
)
if (downloadResult) {
const fileType = this.getArtifactTypeFromFilename(annotation.filename)
if (fileType === 'png' || fileType === 'jpeg' || fileType === 'jpg') {
const artifact = {
type: fileType,
data: downloadResult.filePath
}
artifacts.push(artifact)
} else {
fileAnnotations.push({
filePath: downloadResult.filePath,
fileName: annotation.filename
})
}
}
} catch (error) {
console.error('Error processing annotation:', error)
}
}
}
}
}
}
// Handle built-in tool artifacts (like image generation)
if (outputItem.type === 'image_generation_call' && outputItem.result) {
try {
const savedImageResult = await this.saveBase64Image(outputItem, options)
if (savedImageResult) {
// Replace the base64 result with the file path in the response metadata
outputItem.result = savedImageResult.filePath
// Create artifact in the same format as other image artifacts
const fileType = this.getArtifactTypeFromFilename(savedImageResult.fileName)
artifacts.push({
type: fileType,
data: savedImageResult.filePath
})
}
} catch (error) {
console.error('Error processing image generation artifact:', error)
}
}
}
return { artifacts, fileAnnotations }
}
/**
* Downloads file content from container file citation
*/
private async downloadContainerFile(
containerId: string,
fileId: string,
filename: string,
modelNodeData: INodeData,
options: ICommonObject
): Promise<{ filePath: string; totalSize: number } | null> {
try {
const credentialData = await getCredentialData(modelNodeData.credential ?? '', options)
const openAIApiKey = getCredentialParam('openAIApiKey', credentialData, modelNodeData)
if (!openAIApiKey) {
console.warn('No OpenAI API key available for downloading container file')
return null
}
// Download the file using OpenAI Container API
const response = await fetch(`https://api.openai.com/v1/containers/${containerId}/files/${fileId}/content`, {
method: 'GET',
headers: {
Accept: '*/*',
Authorization: `Bearer ${openAIApiKey}`
}
})
if (!response.ok) {
console.warn(
`Failed to download container file ${fileId} from container ${containerId}: ${response.status} ${response.statusText}`
)
return null
}
// Extract the binary data from the Response object
const data = await response.arrayBuffer()
const dataBuffer = Buffer.from(data)
const mimeType = this.getMimeTypeFromFilename(filename)
// Store the file using the same storage utility as OpenAIAssistant
const { path, totalSize } = await addSingleFileToStorage(
mimeType,
dataBuffer,
filename,
options.orgId,
options.chatflowid,
options.chatId
)
return { filePath: path, totalSize }
} catch (error) {
console.error('Error downloading container file:', error)
return null
}
}
/**
* Gets MIME type from filename extension
*/
private getMimeTypeFromFilename(filename: string): string {
const extension = filename.toLowerCase().split('.').pop()
const mimeTypes: { [key: string]: string } = {
png: 'image/png',
jpg: 'image/jpeg',
jpeg: 'image/jpeg',
gif: 'image/gif',
pdf: 'application/pdf',
txt: 'text/plain',
csv: 'text/csv',
json: 'application/json',
html: 'text/html',
xml: 'application/xml'
}
return mimeTypes[extension || ''] || 'application/octet-stream'
}
/**
* Gets artifact type from filename extension for UI rendering
*/
private getArtifactTypeFromFilename(filename: string): string {
const extension = filename.toLowerCase().split('.').pop()
const artifactTypes: { [key: string]: string } = {
png: 'png',
jpg: 'jpeg',
jpeg: 'jpeg',
html: 'html',
htm: 'html',
md: 'markdown',
markdown: 'markdown',
json: 'json',
js: 'javascript',
javascript: 'javascript',
tex: 'latex',
latex: 'latex',
txt: 'text',
csv: 'text',
pdf: 'text'
}
return artifactTypes[extension || ''] || 'text'
}
/** /**
* Processes sandbox links in the response text and converts them to file annotations * Processes sandbox links in the response text and converts them to file annotations
*/ */

View File

@ -5,10 +5,13 @@ import { DEFAULT_SUMMARIZER_TEMPLATE } from '../prompt'
import { AnalyticHandler } from '../../../src/handler' import { AnalyticHandler } from '../../../src/handler'
import { ILLMMessage } from '../Interface.Agentflow' import { ILLMMessage } from '../Interface.Agentflow'
import { import {
addImageArtifactsToMessages,
extractArtifactsFromResponse,
getPastChatHistoryImageMessages, getPastChatHistoryImageMessages,
getUniqueImageMessages, getUniqueImageMessages,
processMessagesWithImages, processMessagesWithImages,
replaceBase64ImagesWithFileReferences, replaceBase64ImagesWithFileReferences,
replaceInlineDataWithFileReferences,
updateFlowState updateFlowState
} from '../utils' } from '../utils'
import { processTemplateVariables, configureStructuredOutput } from '../../../src/utils' import { processTemplateVariables, configureStructuredOutput } from '../../../src/utils'
@ -448,6 +451,12 @@ class LLM_Agentflow implements INode {
} }
delete nodeData.inputs?.llmMessages delete nodeData.inputs?.llmMessages
/**
* Add image artifacts from previous assistant responses as user messages
* Images are converted from FILE-STORAGE::<image_path> to base 64 image_url format
*/
await addImageArtifactsToMessages(messages, options)
// Configure structured output if specified // Configure structured output if specified
const isStructuredOutput = _llmStructuredOutput && Array.isArray(_llmStructuredOutput) && _llmStructuredOutput.length > 0 const isStructuredOutput = _llmStructuredOutput && Array.isArray(_llmStructuredOutput) && _llmStructuredOutput.length > 0
if (isStructuredOutput) { if (isStructuredOutput) {
@ -467,9 +476,11 @@ class LLM_Agentflow implements INode {
// Track execution time // Track execution time
const startTime = Date.now() const startTime = Date.now()
const sseStreamer: IServerSideEventStreamer | undefined = options.sseStreamer const sseStreamer: IServerSideEventStreamer | undefined = options.sseStreamer
/*
* Invoke LLM
*/
if (isStreamable) { if (isStreamable) {
response = await this.handleStreamingResponse(sseStreamer, llmNodeInstance, messages, chatId, abortController) response = await this.handleStreamingResponse(sseStreamer, llmNodeInstance, messages, chatId, abortController)
} else { } else {
@ -494,6 +505,40 @@ class LLM_Agentflow implements INode {
const endTime = Date.now() const endTime = Date.now()
const timeDelta = endTime - startTime const timeDelta = endTime - startTime
// Extract artifacts and file annotations from response metadata
let artifacts: any[] = []
let fileAnnotations: any[] = []
if (response.response_metadata) {
const {
artifacts: extractedArtifacts,
fileAnnotations: extractedFileAnnotations,
savedInlineImages
} = await extractArtifactsFromResponse(response.response_metadata, newNodeData, options)
if (extractedArtifacts.length > 0) {
artifacts = extractedArtifacts
// Stream artifacts if this is the last node
if (isLastNode && sseStreamer) {
sseStreamer.streamArtifactsEvent(chatId, artifacts)
}
}
if (extractedFileAnnotations.length > 0) {
fileAnnotations = extractedFileAnnotations
// Stream file annotations if this is the last node
if (isLastNode && sseStreamer) {
sseStreamer.streamFileAnnotationsEvent(chatId, fileAnnotations)
}
}
// Replace inlineData base64 with file references in the response
if (savedInlineImages && savedInlineImages.length > 0) {
replaceInlineDataWithFileReferences(response, savedInlineImages)
}
}
// Update flow state if needed // Update flow state if needed
let newState = { ...state } let newState = { ...state }
if (_llmUpdateState && Array.isArray(_llmUpdateState) && _llmUpdateState.length > 0) { if (_llmUpdateState && Array.isArray(_llmUpdateState) && _llmUpdateState.length > 0) {
@ -513,10 +558,22 @@ class LLM_Agentflow implements INode {
finalResponse = response.content.map((item: any) => item.text).join('\n') finalResponse = response.content.map((item: any) => item.text).join('\n')
} else if (response.content && typeof response.content === 'string') { } else if (response.content && typeof response.content === 'string') {
finalResponse = response.content finalResponse = response.content
} else if (response.content === '') {
// Empty response content, this could happen when there is only image data
finalResponse = ''
} else { } else {
finalResponse = JSON.stringify(response, null, 2) finalResponse = JSON.stringify(response, null, 2)
} }
const output = this.prepareOutputObject(response, finalResponse, startTime, endTime, timeDelta, isStructuredOutput) const output = this.prepareOutputObject(
response,
finalResponse,
startTime,
endTime,
timeDelta,
isStructuredOutput,
artifacts,
fileAnnotations
)
// End analytics tracking // End analytics tracking
if (analyticHandlers && llmIds) { if (analyticHandlers && llmIds) {
@ -528,12 +585,23 @@ class LLM_Agentflow implements INode {
this.sendStreamingEvents(options, chatId, response) this.sendStreamingEvents(options, chatId, response)
} }
// Stream file annotations if any were extracted
if (fileAnnotations.length > 0 && isLastNode && sseStreamer) {
sseStreamer.streamFileAnnotationsEvent(chatId, fileAnnotations)
}
// Process template variables in state // Process template variables in state
newState = processTemplateVariables(newState, finalResponse) newState = processTemplateVariables(newState, finalResponse)
/**
* Remove the temporarily added image artifact messages before storing
* This is to avoid storing the actual base64 data into database
*/
const messagesToStore = messages.filter((msg: any) => !msg._isTemporaryImageMessage)
// Replace the actual messages array with one that includes the file references for images instead of base64 data // Replace the actual messages array with one that includes the file references for images instead of base64 data
const messagesWithFileReferences = replaceBase64ImagesWithFileReferences( const messagesWithFileReferences = replaceBase64ImagesWithFileReferences(
messages, messagesToStore,
runtimeImageMessagesWithFileRef, runtimeImageMessagesWithFileRef,
pastImageMessagesWithFileRef pastImageMessagesWithFileRef
) )
@ -584,7 +652,13 @@ class LLM_Agentflow implements INode {
{ {
role: returnRole, role: returnRole,
content: finalResponse, content: finalResponse,
name: nodeData?.label ? nodeData?.label.toLowerCase().replace(/\s/g, '_').trim() : nodeData?.id name: nodeData?.label ? nodeData?.label.toLowerCase().replace(/\s/g, '_').trim() : nodeData?.id,
...(((artifacts && artifacts.length > 0) || (fileAnnotations && fileAnnotations.length > 0)) && {
additional_kwargs: {
...(artifacts && artifacts.length > 0 && { artifacts }),
...(fileAnnotations && fileAnnotations.length > 0 && { fileAnnotations })
}
})
} }
] ]
} }
@ -805,7 +879,9 @@ class LLM_Agentflow implements INode {
startTime: number, startTime: number,
endTime: number, endTime: number,
timeDelta: number, timeDelta: number,
isStructuredOutput: boolean isStructuredOutput: boolean,
artifacts: any[] = [],
fileAnnotations: any[] = []
): any { ): any {
const output: any = { const output: any = {
content: finalResponse, content: finalResponse,
@ -824,6 +900,10 @@ class LLM_Agentflow implements INode {
output.usageMetadata = response.usage_metadata output.usageMetadata = response.usage_metadata
} }
if (response.response_metadata) {
output.responseMetadata = response.response_metadata
}
if (isStructuredOutput && typeof response === 'object') { if (isStructuredOutput && typeof response === 'object') {
const structuredOutput = response as Record<string, any> const structuredOutput = response as Record<string, any>
for (const key in structuredOutput) { for (const key in structuredOutput) {
@ -833,6 +913,14 @@ class LLM_Agentflow implements INode {
} }
} }
if (artifacts && artifacts.length > 0) {
output.artifacts = flatten(artifacts)
}
if (fileAnnotations && fileAnnotations.length > 0) {
output.fileAnnotations = fileAnnotations
}
return output return output
} }

View File

@ -1,10 +1,11 @@
import { BaseMessage, MessageContentImageUrl } from '@langchain/core/messages' import { BaseMessage, MessageContentImageUrl, AIMessageChunk } from '@langchain/core/messages'
import { getImageUploads } from '../../src/multiModalUtils' import { getImageUploads } from '../../src/multiModalUtils'
import { getFileFromStorage } from '../../src/storageUtils' import { addSingleFileToStorage, getFileFromStorage } from '../../src/storageUtils'
import { ICommonObject, IFileUpload } from '../../src/Interface' import { ICommonObject, IFileUpload, INodeData } from '../../src/Interface'
import { BaseMessageLike } from '@langchain/core/messages' import { BaseMessageLike } from '@langchain/core/messages'
import { IFlowState } from './Interface.Agentflow' import { IFlowState } from './Interface.Agentflow'
import { handleEscapeCharacters, mapMimeTypeToInputField } from '../../src/utils' import { getCredentialData, getCredentialParam, handleEscapeCharacters, mapMimeTypeToInputField } from '../../src/utils'
import fetch from 'node-fetch'
export const addImagesToMessages = async ( export const addImagesToMessages = async (
options: ICommonObject, options: ICommonObject,
@ -18,7 +19,8 @@ export const addImagesToMessages = async (
for (const upload of imageUploads) { for (const upload of imageUploads) {
let bf = upload.data let bf = upload.data
if (upload.type == 'stored-file') { if (upload.type == 'stored-file') {
const contents = await getFileFromStorage(upload.name, options.orgId, options.chatflowid, options.chatId) const fileName = upload.name.replace(/^FILE-STORAGE::/, '')
const contents = await getFileFromStorage(fileName, options.orgId, options.chatflowid, options.chatId)
// as the image is stored in the server, read the file and convert it to base64 // as the image is stored in the server, read the file and convert it to base64
bf = 'data:' + upload.mime + ';base64,' + contents.toString('base64') bf = 'data:' + upload.mime + ';base64,' + contents.toString('base64')
@ -89,8 +91,9 @@ export const processMessagesWithImages = async (
if (item.type === 'stored-file' && item.name && item.mime.startsWith('image/')) { if (item.type === 'stored-file' && item.name && item.mime.startsWith('image/')) {
hasImageReferences = true hasImageReferences = true
try { try {
const fileName = item.name.replace(/^FILE-STORAGE::/, '')
// Get file contents from storage // Get file contents from storage
const contents = await getFileFromStorage(item.name, options.orgId, options.chatflowid, options.chatId) const contents = await getFileFromStorage(fileName, options.orgId, options.chatflowid, options.chatId)
// Create base64 data URL // Create base64 data URL
const base64Data = 'data:' + item.mime + ';base64,' + contents.toString('base64') const base64Data = 'data:' + item.mime + ';base64,' + contents.toString('base64')
@ -322,7 +325,8 @@ export const getPastChatHistoryImageMessages = async (
const imageContents: MessageContentImageUrl[] = [] const imageContents: MessageContentImageUrl[] = []
for (const upload of uploads) { for (const upload of uploads) {
if (upload.type === 'stored-file' && upload.mime.startsWith('image/')) { if (upload.type === 'stored-file' && upload.mime.startsWith('image/')) {
const fileData = await getFileFromStorage(upload.name, options.orgId, options.chatflowid, options.chatId) const fileName = upload.name.replace(/^FILE-STORAGE::/, '')
const fileData = await getFileFromStorage(fileName, options.orgId, options.chatflowid, options.chatId)
// as the image is stored in the server, read the file and convert it to base64 // as the image is stored in the server, read the file and convert it to base64
const bf = 'data:' + upload.mime + ';base64,' + fileData.toString('base64') const bf = 'data:' + upload.mime + ';base64,' + fileData.toString('base64')
@ -456,6 +460,437 @@ export const getPastChatHistoryImageMessages = async (
} }
} }
/**
* Gets MIME type from filename extension
*/
export const getMimeTypeFromFilename = (filename: string): string => {
const extension = filename.toLowerCase().split('.').pop()
const mimeTypes: { [key: string]: string } = {
png: 'image/png',
jpg: 'image/jpeg',
jpeg: 'image/jpeg',
gif: 'image/gif',
pdf: 'application/pdf',
txt: 'text/plain',
csv: 'text/csv',
json: 'application/json',
html: 'text/html',
xml: 'application/xml'
}
return mimeTypes[extension || ''] || 'application/octet-stream'
}
/**
* Gets artifact type from filename extension for UI rendering
*/
export const getArtifactTypeFromFilename = (filename: string): string => {
const extension = filename.toLowerCase().split('.').pop()
const artifactTypes: { [key: string]: string } = {
png: 'png',
jpg: 'jpeg',
jpeg: 'jpeg',
html: 'html',
htm: 'html',
md: 'markdown',
markdown: 'markdown',
json: 'json',
js: 'javascript',
javascript: 'javascript',
tex: 'latex',
latex: 'latex',
txt: 'text',
csv: 'text',
pdf: 'text'
}
return artifactTypes[extension || ''] || 'text'
}
/**
* Saves base64 image data to storage and returns file information
*/
export const saveBase64Image = async (
outputItem: any,
options: ICommonObject
): Promise<{ filePath: string; fileName: string; totalSize: number } | null> => {
try {
if (!outputItem.result) {
return null
}
// Extract base64 data and create buffer
const base64Data = outputItem.result
const imageBuffer = Buffer.from(base64Data, 'base64')
// Determine file extension and MIME type
const outputFormat = outputItem.output_format || 'png'
const fileName = `generated_image_${outputItem.id || Date.now()}.${outputFormat}`
const mimeType = outputFormat === 'png' ? 'image/png' : 'image/jpeg'
// Save the image using the existing storage utility
const { path, totalSize } = await addSingleFileToStorage(
mimeType,
imageBuffer,
fileName,
options.orgId,
options.chatflowid,
options.chatId
)
return { filePath: path, fileName, totalSize }
} catch (error) {
console.error('Error saving base64 image:', error)
return null
}
}
/**
* Saves Gemini inline image data to storage and returns file information
*/
export const saveGeminiInlineImage = async (
inlineItem: any,
options: ICommonObject
): Promise<{ filePath: string; fileName: string; totalSize: number } | null> => {
try {
if (!inlineItem.data || !inlineItem.mimeType) {
return null
}
// Extract base64 data and create buffer
const base64Data = inlineItem.data
const imageBuffer = Buffer.from(base64Data, 'base64')
// Determine file extension from MIME type
const mimeType = inlineItem.mimeType
let extension = 'png'
if (mimeType.includes('jpeg') || mimeType.includes('jpg')) {
extension = 'jpg'
} else if (mimeType.includes('png')) {
extension = 'png'
} else if (mimeType.includes('gif')) {
extension = 'gif'
} else if (mimeType.includes('webp')) {
extension = 'webp'
}
const fileName = `gemini_generated_image_${Date.now()}.${extension}`
// Save the image using the existing storage utility
const { path, totalSize } = await addSingleFileToStorage(
mimeType,
imageBuffer,
fileName,
options.orgId,
options.chatflowid,
options.chatId
)
return { filePath: path, fileName, totalSize }
} catch (error) {
console.error('Error saving Gemini inline image:', error)
return null
}
}
/**
* Downloads file content from container file citation
*/
export const downloadContainerFile = async (
containerId: string,
fileId: string,
filename: string,
modelNodeData: INodeData,
options: ICommonObject
): Promise<{ filePath: string; totalSize: number } | null> => {
try {
const credentialData = await getCredentialData(modelNodeData.credential ?? '', options)
const openAIApiKey = getCredentialParam('openAIApiKey', credentialData, modelNodeData)
if (!openAIApiKey) {
console.warn('No OpenAI API key available for downloading container file')
return null
}
// Download the file using OpenAI Container API
const response = await fetch(`https://api.openai.com/v1/containers/${containerId}/files/${fileId}/content`, {
method: 'GET',
headers: {
Accept: '*/*',
Authorization: `Bearer ${openAIApiKey}`
}
})
if (!response.ok) {
console.warn(
`Failed to download container file ${fileId} from container ${containerId}: ${response.status} ${response.statusText}`
)
return null
}
// Extract the binary data from the Response object
const data = await response.arrayBuffer()
const dataBuffer = Buffer.from(data)
const mimeType = getMimeTypeFromFilename(filename)
// Store the file using the same storage utility as OpenAIAssistant
const { path, totalSize } = await addSingleFileToStorage(
mimeType,
dataBuffer,
filename,
options.orgId,
options.chatflowid,
options.chatId
)
return { filePath: path, totalSize }
} catch (error) {
console.error('Error downloading container file:', error)
return null
}
}
/**
* Replace inlineData base64 with file references in the response content
*/
export const replaceInlineDataWithFileReferences = (
response: AIMessageChunk,
savedInlineImages: Array<{ filePath: string; fileName: string; mimeType: string }>
): void => {
// Check if content is an array
if (!Array.isArray(response.content)) {
return
}
// Replace base64 data with file references in response content
let savedImageIndex = 0
for (let i = 0; i < response.content.length; i++) {
const contentItem = response.content[i]
if (
typeof contentItem === 'object' &&
contentItem.type === 'inlineData' &&
contentItem.inlineData &&
savedImageIndex < savedInlineImages.length
) {
const savedImage = savedInlineImages[savedImageIndex]
// Replace with file reference
response.content[i] = {
type: 'stored-file',
name: savedImage.fileName,
mime: savedImage.mimeType,
path: savedImage.filePath
}
savedImageIndex++
}
}
// Clear the inlineData from response_metadata to avoid duplication
if (response.response_metadata?.inlineData) {
delete response.response_metadata.inlineData
}
}
/**
* Extracts artifacts from response metadata (both annotations and built-in tools)
*/
export const extractArtifactsFromResponse = async (
responseMetadata: any,
modelNodeData: INodeData,
options: ICommonObject
): Promise<{
artifacts: any[]
fileAnnotations: any[]
savedInlineImages?: Array<{ filePath: string; fileName: string; mimeType: string }>
}> => {
const artifacts: any[] = []
const fileAnnotations: any[] = []
const savedInlineImages: Array<{ filePath: string; fileName: string; mimeType: string }> = []
// Handle Gemini inline data (image generation)
if (responseMetadata?.inlineData && Array.isArray(responseMetadata.inlineData)) {
for (const inlineItem of responseMetadata.inlineData) {
if (inlineItem.type === 'gemini_inline_data' && inlineItem.data && inlineItem.mimeType) {
try {
const savedImageResult = await saveGeminiInlineImage(inlineItem, options)
if (savedImageResult) {
// Create artifact in the same format as other image artifacts
const fileType = getArtifactTypeFromFilename(savedImageResult.fileName)
artifacts.push({
type: fileType,
data: savedImageResult.filePath
})
// Track saved image for replacing base64 data in content
savedInlineImages.push({
filePath: savedImageResult.filePath,
fileName: savedImageResult.fileName,
mimeType: inlineItem.mimeType
})
}
} catch (error) {
console.error('Error processing Gemini inline image artifact:', error)
}
}
}
}
if (!responseMetadata?.output || !Array.isArray(responseMetadata.output)) {
return { artifacts, fileAnnotations, savedInlineImages: savedInlineImages.length > 0 ? savedInlineImages : undefined }
}
for (const outputItem of responseMetadata.output) {
// Handle container file citations from annotations
if (outputItem.type === 'message' && outputItem.content && Array.isArray(outputItem.content)) {
for (const contentItem of outputItem.content) {
if (contentItem.annotations && Array.isArray(contentItem.annotations)) {
for (const annotation of contentItem.annotations) {
if (annotation.type === 'container_file_citation' && annotation.file_id && annotation.filename) {
try {
// Download and store the file content
const downloadResult = await downloadContainerFile(
annotation.container_id,
annotation.file_id,
annotation.filename,
modelNodeData,
options
)
if (downloadResult) {
const fileType = getArtifactTypeFromFilename(annotation.filename)
if (fileType === 'png' || fileType === 'jpeg' || fileType === 'jpg') {
const artifact = {
type: fileType,
data: downloadResult.filePath
}
artifacts.push(artifact)
} else {
fileAnnotations.push({
filePath: downloadResult.filePath,
fileName: annotation.filename
})
}
}
} catch (error) {
console.error('Error processing annotation:', error)
}
}
}
}
}
}
// Handle built-in tool artifacts (like image generation)
if (outputItem.type === 'image_generation_call' && outputItem.result) {
try {
const savedImageResult = await saveBase64Image(outputItem, options)
if (savedImageResult) {
// Replace the base64 result with the file path in the response metadata
outputItem.result = savedImageResult.filePath
// Create artifact in the same format as other image artifacts
const fileType = getArtifactTypeFromFilename(savedImageResult.fileName)
artifacts.push({
type: fileType,
data: savedImageResult.filePath
})
}
} catch (error) {
console.error('Error processing image generation artifact:', error)
}
}
}
return { artifacts, fileAnnotations, savedInlineImages: savedInlineImages.length > 0 ? savedInlineImages : undefined }
}
/**
* Add image artifacts from previous assistant messages as user messages
* This allows the LLM to see and reference the generated images in the conversation
* Messages are marked with a special flag for later removal
*/
export const addImageArtifactsToMessages = async (messages: BaseMessageLike[], options: ICommonObject): Promise<void> => {
const imageExtensions = ['png', 'jpg', 'jpeg', 'gif', 'webp']
const messagesToInsert: Array<{ index: number; message: any }> = []
// Iterate through messages to find assistant messages with image artifacts
for (let i = 0; i < messages.length; i++) {
const message = messages[i] as any
// Check if this is an assistant message with artifacts
if (
(message.role === 'assistant' || message.role === 'ai') &&
message.additional_kwargs?.artifacts &&
Array.isArray(message.additional_kwargs.artifacts)
) {
const artifacts = message.additional_kwargs.artifacts
const imageArtifacts: Array<{ type: string; name: string; mime: string }> = []
// Extract image artifacts
for (const artifact of artifacts) {
if (artifact.type && artifact.data) {
// Check if this is an image artifact by file type
if (imageExtensions.includes(artifact.type.toLowerCase())) {
// Extract filename from the file path
const fileName = artifact.data.split('/').pop() || artifact.data
const mimeType = `image/${artifact.type.toLowerCase()}`
imageArtifacts.push({
type: 'stored-file',
name: fileName,
mime: mimeType
})
}
}
}
// If we found image artifacts, prepare to insert a user message after this assistant message
if (imageArtifacts.length > 0) {
// Check if the next message already contains these image artifacts to avoid duplicates
const nextMessage = messages[i + 1] as any
const shouldInsert =
!nextMessage ||
nextMessage.role !== 'user' ||
!Array.isArray(nextMessage.content) ||
!nextMessage.content.some(
(item: any) =>
(item.type === 'stored-file' || item.type === 'image_url') &&
imageArtifacts.some((artifact) => {
// Compare with and without FILE-STORAGE:: prefix
const artifactName = artifact.name.replace('FILE-STORAGE::', '')
const itemName = item.name?.replace('FILE-STORAGE::', '') || ''
return artifactName === itemName
})
)
if (shouldInsert) {
messagesToInsert.push({
index: i + 1,
message: {
role: 'user',
content: imageArtifacts,
_isTemporaryImageMessage: true // Mark for later removal
}
})
}
}
}
}
// Insert messages in reverse order to maintain correct indices
for (let i = messagesToInsert.length - 1; i >= 0; i--) {
const { index, message } = messagesToInsert[i]
messages.splice(index, 0, message)
}
// Convert stored-file references to base64 image_url format
if (messagesToInsert.length > 0) {
const { updatedMessages } = await processMessagesWithImages(messages, options)
// Replace the messages array content with the updated messages
messages.length = 0
messages.push(...updatedMessages)
}
}
/** /**
* Updates the flow state with new values * Updates the flow state with new values
*/ */

View File

@ -607,7 +607,12 @@ export class LangchainChatGoogleGenerativeAI
private client: GenerativeModel private client: GenerativeModel
get _isMultimodalModel() { get _isMultimodalModel() {
return this.model.includes('vision') || this.model.startsWith('gemini-1.5') || this.model.startsWith('gemini-2') return (
this.model.includes('vision') ||
this.model.startsWith('gemini-1.5') ||
this.model.startsWith('gemini-2') ||
this.model.startsWith('gemini-3')
)
} }
constructor(fields: GoogleGenerativeAIChatInput) { constructor(fields: GoogleGenerativeAIChatInput) {

View File

@ -452,6 +452,7 @@ export function mapGenerateContentResultToChatResult(
const [candidate] = response.candidates const [candidate] = response.candidates
const { content: candidateContent, ...generationInfo } = candidate const { content: candidateContent, ...generationInfo } = candidate
let content: MessageContent | undefined let content: MessageContent | undefined
const inlineDataItems: any[] = []
if (Array.isArray(candidateContent?.parts) && candidateContent.parts.length === 1 && candidateContent.parts[0].text) { if (Array.isArray(candidateContent?.parts) && candidateContent.parts.length === 1 && candidateContent.parts[0].text) {
content = candidateContent.parts[0].text content = candidateContent.parts[0].text
@ -472,6 +473,18 @@ export function mapGenerateContentResultToChatResult(
type: 'codeExecutionResult', type: 'codeExecutionResult',
codeExecutionResult: p.codeExecutionResult codeExecutionResult: p.codeExecutionResult
} }
} else if ('inlineData' in p && p.inlineData) {
// Extract inline image data for processing by Agent
inlineDataItems.push({
type: 'gemini_inline_data',
mimeType: p.inlineData.mimeType,
data: p.inlineData.data
})
// Return the inline data as part of the content structure
return {
type: 'inlineData',
inlineData: p.inlineData
}
} }
return p return p
}) })
@ -488,6 +501,12 @@ export function mapGenerateContentResultToChatResult(
text = block?.text ?? text text = block?.text ?? text
} }
// Build response_metadata with inline data if present
const response_metadata: any = {}
if (inlineDataItems.length > 0) {
response_metadata.inlineData = inlineDataItems
}
const generation: ChatGeneration = { const generation: ChatGeneration = {
text, text,
message: new AIMessage({ message: new AIMessage({
@ -502,7 +521,8 @@ export function mapGenerateContentResultToChatResult(
additional_kwargs: { additional_kwargs: {
...generationInfo ...generationInfo
}, },
usage_metadata: extra?.usageMetadata usage_metadata: extra?.usageMetadata,
response_metadata: Object.keys(response_metadata).length > 0 ? response_metadata : undefined
}), }),
generationInfo generationInfo
} }
@ -533,6 +553,8 @@ export function convertResponseContentToChatGenerationChunk(
const [candidate] = response.candidates const [candidate] = response.candidates
const { content: candidateContent, ...generationInfo } = candidate const { content: candidateContent, ...generationInfo } = candidate
let content: MessageContent | undefined let content: MessageContent | undefined
const inlineDataItems: any[] = []
// Checks if some parts do not have text. If false, it means that the content is a string. // Checks if some parts do not have text. If false, it means that the content is a string.
if (Array.isArray(candidateContent?.parts) && candidateContent.parts.every((p) => 'text' in p)) { if (Array.isArray(candidateContent?.parts) && candidateContent.parts.every((p) => 'text' in p)) {
content = candidateContent.parts.map((p) => p.text).join('') content = candidateContent.parts.map((p) => p.text).join('')
@ -553,6 +575,18 @@ export function convertResponseContentToChatGenerationChunk(
type: 'codeExecutionResult', type: 'codeExecutionResult',
codeExecutionResult: p.codeExecutionResult codeExecutionResult: p.codeExecutionResult
} }
} else if ('inlineData' in p && p.inlineData) {
// Extract inline image data for processing by Agent
inlineDataItems.push({
type: 'gemini_inline_data',
mimeType: p.inlineData.mimeType,
data: p.inlineData.data
})
// Return the inline data as part of the content structure
return {
type: 'inlineData',
inlineData: p.inlineData
}
} }
return p return p
}) })
@ -582,6 +616,12 @@ export function convertResponseContentToChatGenerationChunk(
) )
} }
// Build response_metadata with inline data if present
const response_metadata: any = {}
if (inlineDataItems.length > 0) {
response_metadata.inlineData = inlineDataItems
}
return new ChatGenerationChunk({ return new ChatGenerationChunk({
text, text,
message: new AIMessageChunk({ message: new AIMessageChunk({
@ -591,7 +631,8 @@ export function convertResponseContentToChatGenerationChunk(
// Each chunk can have unique "generationInfo", and merging strategy is unclear, // Each chunk can have unique "generationInfo", and merging strategy is unclear,
// so leave blank for now. // so leave blank for now.
additional_kwargs: {}, additional_kwargs: {},
usage_metadata: extra.usageMetadata usage_metadata: extra.usageMetadata,
response_metadata: Object.keys(response_metadata).length > 0 ? response_metadata : undefined
}), }),
generationInfo generationInfo
}) })