diff --git a/packages/components/models.json b/packages/components/models.json index b5c6c73d8..93ec31e7d 100644 --- a/packages/components/models.json +++ b/packages/components/models.json @@ -647,6 +647,12 @@ "input_cost": 0.00002, "output_cost": 0.00012 }, + { + "label": "gemini-3-pro-image-preview", + "name": "gemini-3-pro-image-preview", + "input_cost": 0.00002, + "output_cost": 0.00012 + }, { "label": "gemini-2.5-pro", "name": "gemini-2.5-pro", @@ -659,6 +665,12 @@ "input_cost": 1.25e-6, "output_cost": 0.00001 }, + { + "label": "gemini-2.5-flash-image", + "name": "gemini-2.5-flash-image", + "input_cost": 1.25e-6, + "output_cost": 0.00001 + }, { "label": "gemini-2.5-flash-lite", "name": "gemini-2.5-flash-lite", diff --git a/packages/components/nodes/agentflow/Agent/Agent.ts b/packages/components/nodes/agentflow/Agent/Agent.ts index b8aa80222..7993cd729 100644 --- a/packages/components/nodes/agentflow/Agent/Agent.ts +++ b/packages/components/nodes/agentflow/Agent/Agent.ts @@ -22,21 +22,16 @@ import zodToJsonSchema from 'zod-to-json-schema' import { getErrorMessage } from '../../../src/error' import { DataSource } from 'typeorm' import { + addImageArtifactsToMessages, + extractArtifactsFromResponse, getPastChatHistoryImageMessages, getUniqueImageMessages, processMessagesWithImages, replaceBase64ImagesWithFileReferences, + replaceInlineDataWithFileReferences, updateFlowState } from '../utils' -import { - convertMultiOptionsToStringArray, - getCredentialData, - getCredentialParam, - processTemplateVariables, - configureStructuredOutput -} from '../../../src/utils' -import { addSingleFileToStorage } from '../../../src/storageUtils' -import fetch from 'node-fetch' +import { convertMultiOptionsToStringArray, processTemplateVariables, configureStructuredOutput } from '../../../src/utils' interface ITool { agentSelectedTool: string @@ -1072,12 +1067,6 @@ class Agent_Agentflow implements INode { llmIds = await analyticHandlers.onLLMStart(llmLabel, messages, options.parentTraceIds) } - // Track execution time - const startTime = Date.now() - - // Get initial response from LLM - const sseStreamer: IServerSideEventStreamer | undefined = options.sseStreamer - // Handle tool calls with support for recursion let usedTools: IUsedTool[] = [] let sourceDocuments: Array = [] @@ -1090,12 +1079,24 @@ class Agent_Agentflow implements INode { const messagesBeforeToolCalls = [...messages] let _toolCallMessages: BaseMessageLike[] = [] + /** + * Add image artifacts from previous assistant responses as user messages + * Images are converted from FILE-STORAGE:: to base 64 image_url format + */ + await addImageArtifactsToMessages(messages, options) + // Check if this is hummanInput for tool calls const _humanInput = nodeData.inputs?.humanInput const humanInput: IHumanInput = typeof _humanInput === 'string' ? JSON.parse(_humanInput) : _humanInput const humanInputAction = options.humanInputAction const iterationContext = options.iterationContext + // Track execution time + const startTime = Date.now() + + // Get initial response from LLM + const sseStreamer: IServerSideEventStreamer | undefined = options.sseStreamer + if (humanInput) { if (humanInput.type !== 'proceed' && humanInput.type !== 'reject') { throw new Error(`Invalid human input type. Expected 'proceed' or 'reject', but got '${humanInput.type}'`) @@ -1234,9 +1235,15 @@ class Agent_Agentflow implements INode { // Prepare final response and output object let finalResponse = '' if (response.content && Array.isArray(response.content)) { - finalResponse = response.content.map((item: any) => item.text).join('\n') + finalResponse = response.content + .filter((item: any) => item.text) + .map((item: any) => item.text) + .join('\n') } else if (response.content && typeof response.content === 'string') { finalResponse = response.content + } else if (response.content === '') { + // Empty response content, this could happen when there is only image data + finalResponse = '' } else { finalResponse = JSON.stringify(response, null, 2) } @@ -1252,10 +1259,13 @@ class Agent_Agentflow implements INode { } } - // Extract artifacts from annotations in response metadata + // Extract artifacts from annotations in response metadata and replace inline data if (response.response_metadata) { - const { artifacts: extractedArtifacts, fileAnnotations: extractedFileAnnotations } = - await this.extractArtifactsFromResponse(response.response_metadata, newNodeData, options) + const { + artifacts: extractedArtifacts, + fileAnnotations: extractedFileAnnotations, + savedInlineImages + } = await extractArtifactsFromResponse(response.response_metadata, newNodeData, options) if (extractedArtifacts.length > 0) { artifacts = [...artifacts, ...extractedArtifacts] @@ -1273,6 +1283,11 @@ class Agent_Agentflow implements INode { sseStreamer.streamFileAnnotationsEvent(chatId, fileAnnotations) } } + + // Replace inlineData base64 with file references in the response + if (savedInlineImages && savedInlineImages.length > 0) { + replaceInlineDataWithFileReferences(response, savedInlineImages) + } } // Replace sandbox links with proper download URLs. Example: [Download the script](sandbox:/mnt/data/dummy_bar_graph.py) @@ -1331,9 +1346,15 @@ class Agent_Agentflow implements INode { // Process template variables in state newState = processTemplateVariables(newState, finalResponse) + /** + * Remove the temporarily added image artifact messages before storing + * This is to avoid storing the actual base64 data into database + */ + const messagesToStore = messages.filter((msg: any) => !msg._isTemporaryImageMessage) + // Replace the actual messages array with one that includes the file references for images instead of base64 data const messagesWithFileReferences = replaceBase64ImagesWithFileReferences( - messages, + messagesToStore, runtimeImageMessagesWithFileRef, pastImageMessagesWithFileRef ) @@ -1500,44 +1521,6 @@ class Agent_Agentflow implements INode { return builtInUsedTools } - /** - * Saves base64 image data to storage and returns file information - */ - private async saveBase64Image( - outputItem: any, - options: ICommonObject - ): Promise<{ filePath: string; fileName: string; totalSize: number } | null> { - try { - if (!outputItem.result) { - return null - } - - // Extract base64 data and create buffer - const base64Data = outputItem.result - const imageBuffer = Buffer.from(base64Data, 'base64') - - // Determine file extension and MIME type - const outputFormat = outputItem.output_format || 'png' - const fileName = `generated_image_${outputItem.id || Date.now()}.${outputFormat}` - const mimeType = outputFormat === 'png' ? 'image/png' : 'image/jpeg' - - // Save the image using the existing storage utility - const { path, totalSize } = await addSingleFileToStorage( - mimeType, - imageBuffer, - fileName, - options.orgId, - options.chatflowid, - options.chatId - ) - - return { filePath: path, fileName, totalSize } - } catch (error) { - console.error('Error saving base64 image:', error) - return null - } - } - /** * Handles memory management based on the specified memory type */ @@ -2484,190 +2467,6 @@ class Agent_Agentflow implements INode { return { response: newResponse, usedTools, sourceDocuments, artifacts, totalTokens, isWaitingForHumanInput } } - /** - * Extracts artifacts from response metadata (both annotations and built-in tools) - */ - private async extractArtifactsFromResponse( - responseMetadata: any, - modelNodeData: INodeData, - options: ICommonObject - ): Promise<{ artifacts: any[]; fileAnnotations: any[] }> { - const artifacts: any[] = [] - const fileAnnotations: any[] = [] - - if (!responseMetadata?.output || !Array.isArray(responseMetadata.output)) { - return { artifacts, fileAnnotations } - } - - for (const outputItem of responseMetadata.output) { - // Handle container file citations from annotations - if (outputItem.type === 'message' && outputItem.content && Array.isArray(outputItem.content)) { - for (const contentItem of outputItem.content) { - if (contentItem.annotations && Array.isArray(contentItem.annotations)) { - for (const annotation of contentItem.annotations) { - if (annotation.type === 'container_file_citation' && annotation.file_id && annotation.filename) { - try { - // Download and store the file content - const downloadResult = await this.downloadContainerFile( - annotation.container_id, - annotation.file_id, - annotation.filename, - modelNodeData, - options - ) - - if (downloadResult) { - const fileType = this.getArtifactTypeFromFilename(annotation.filename) - - if (fileType === 'png' || fileType === 'jpeg' || fileType === 'jpg') { - const artifact = { - type: fileType, - data: downloadResult.filePath - } - - artifacts.push(artifact) - } else { - fileAnnotations.push({ - filePath: downloadResult.filePath, - fileName: annotation.filename - }) - } - } - } catch (error) { - console.error('Error processing annotation:', error) - } - } - } - } - } - } - - // Handle built-in tool artifacts (like image generation) - if (outputItem.type === 'image_generation_call' && outputItem.result) { - try { - const savedImageResult = await this.saveBase64Image(outputItem, options) - if (savedImageResult) { - // Replace the base64 result with the file path in the response metadata - outputItem.result = savedImageResult.filePath - - // Create artifact in the same format as other image artifacts - const fileType = this.getArtifactTypeFromFilename(savedImageResult.fileName) - artifacts.push({ - type: fileType, - data: savedImageResult.filePath - }) - } - } catch (error) { - console.error('Error processing image generation artifact:', error) - } - } - } - - return { artifacts, fileAnnotations } - } - - /** - * Downloads file content from container file citation - */ - private async downloadContainerFile( - containerId: string, - fileId: string, - filename: string, - modelNodeData: INodeData, - options: ICommonObject - ): Promise<{ filePath: string; totalSize: number } | null> { - try { - const credentialData = await getCredentialData(modelNodeData.credential ?? '', options) - const openAIApiKey = getCredentialParam('openAIApiKey', credentialData, modelNodeData) - - if (!openAIApiKey) { - console.warn('No OpenAI API key available for downloading container file') - return null - } - - // Download the file using OpenAI Container API - const response = await fetch(`https://api.openai.com/v1/containers/${containerId}/files/${fileId}/content`, { - method: 'GET', - headers: { - Accept: '*/*', - Authorization: `Bearer ${openAIApiKey}` - } - }) - - if (!response.ok) { - console.warn( - `Failed to download container file ${fileId} from container ${containerId}: ${response.status} ${response.statusText}` - ) - return null - } - - // Extract the binary data from the Response object - const data = await response.arrayBuffer() - const dataBuffer = Buffer.from(data) - const mimeType = this.getMimeTypeFromFilename(filename) - - // Store the file using the same storage utility as OpenAIAssistant - const { path, totalSize } = await addSingleFileToStorage( - mimeType, - dataBuffer, - filename, - options.orgId, - options.chatflowid, - options.chatId - ) - - return { filePath: path, totalSize } - } catch (error) { - console.error('Error downloading container file:', error) - return null - } - } - - /** - * Gets MIME type from filename extension - */ - private getMimeTypeFromFilename(filename: string): string { - const extension = filename.toLowerCase().split('.').pop() - const mimeTypes: { [key: string]: string } = { - png: 'image/png', - jpg: 'image/jpeg', - jpeg: 'image/jpeg', - gif: 'image/gif', - pdf: 'application/pdf', - txt: 'text/plain', - csv: 'text/csv', - json: 'application/json', - html: 'text/html', - xml: 'application/xml' - } - return mimeTypes[extension || ''] || 'application/octet-stream' - } - - /** - * Gets artifact type from filename extension for UI rendering - */ - private getArtifactTypeFromFilename(filename: string): string { - const extension = filename.toLowerCase().split('.').pop() - const artifactTypes: { [key: string]: string } = { - png: 'png', - jpg: 'jpeg', - jpeg: 'jpeg', - html: 'html', - htm: 'html', - md: 'markdown', - markdown: 'markdown', - json: 'json', - js: 'javascript', - javascript: 'javascript', - tex: 'latex', - latex: 'latex', - txt: 'text', - csv: 'text', - pdf: 'text' - } - return artifactTypes[extension || ''] || 'text' - } - /** * Processes sandbox links in the response text and converts them to file annotations */ diff --git a/packages/components/nodes/agentflow/LLM/LLM.ts b/packages/components/nodes/agentflow/LLM/LLM.ts index a5bf4deb7..e6035166b 100644 --- a/packages/components/nodes/agentflow/LLM/LLM.ts +++ b/packages/components/nodes/agentflow/LLM/LLM.ts @@ -5,10 +5,13 @@ import { DEFAULT_SUMMARIZER_TEMPLATE } from '../prompt' import { AnalyticHandler } from '../../../src/handler' import { ILLMMessage } from '../Interface.Agentflow' import { + addImageArtifactsToMessages, + extractArtifactsFromResponse, getPastChatHistoryImageMessages, getUniqueImageMessages, processMessagesWithImages, replaceBase64ImagesWithFileReferences, + replaceInlineDataWithFileReferences, updateFlowState } from '../utils' import { processTemplateVariables, configureStructuredOutput } from '../../../src/utils' @@ -448,6 +451,12 @@ class LLM_Agentflow implements INode { } delete nodeData.inputs?.llmMessages + /** + * Add image artifacts from previous assistant responses as user messages + * Images are converted from FILE-STORAGE:: to base 64 image_url format + */ + await addImageArtifactsToMessages(messages, options) + // Configure structured output if specified const isStructuredOutput = _llmStructuredOutput && Array.isArray(_llmStructuredOutput) && _llmStructuredOutput.length > 0 if (isStructuredOutput) { @@ -467,9 +476,11 @@ class LLM_Agentflow implements INode { // Track execution time const startTime = Date.now() - const sseStreamer: IServerSideEventStreamer | undefined = options.sseStreamer + /* + * Invoke LLM + */ if (isStreamable) { response = await this.handleStreamingResponse(sseStreamer, llmNodeInstance, messages, chatId, abortController) } else { @@ -494,6 +505,40 @@ class LLM_Agentflow implements INode { const endTime = Date.now() const timeDelta = endTime - startTime + // Extract artifacts and file annotations from response metadata + let artifacts: any[] = [] + let fileAnnotations: any[] = [] + if (response.response_metadata) { + const { + artifacts: extractedArtifacts, + fileAnnotations: extractedFileAnnotations, + savedInlineImages + } = await extractArtifactsFromResponse(response.response_metadata, newNodeData, options) + + if (extractedArtifacts.length > 0) { + artifacts = extractedArtifacts + + // Stream artifacts if this is the last node + if (isLastNode && sseStreamer) { + sseStreamer.streamArtifactsEvent(chatId, artifacts) + } + } + + if (extractedFileAnnotations.length > 0) { + fileAnnotations = extractedFileAnnotations + + // Stream file annotations if this is the last node + if (isLastNode && sseStreamer) { + sseStreamer.streamFileAnnotationsEvent(chatId, fileAnnotations) + } + } + + // Replace inlineData base64 with file references in the response + if (savedInlineImages && savedInlineImages.length > 0) { + replaceInlineDataWithFileReferences(response, savedInlineImages) + } + } + // Update flow state if needed let newState = { ...state } if (_llmUpdateState && Array.isArray(_llmUpdateState) && _llmUpdateState.length > 0) { @@ -513,10 +558,22 @@ class LLM_Agentflow implements INode { finalResponse = response.content.map((item: any) => item.text).join('\n') } else if (response.content && typeof response.content === 'string') { finalResponse = response.content + } else if (response.content === '') { + // Empty response content, this could happen when there is only image data + finalResponse = '' } else { finalResponse = JSON.stringify(response, null, 2) } - const output = this.prepareOutputObject(response, finalResponse, startTime, endTime, timeDelta, isStructuredOutput) + const output = this.prepareOutputObject( + response, + finalResponse, + startTime, + endTime, + timeDelta, + isStructuredOutput, + artifacts, + fileAnnotations + ) // End analytics tracking if (analyticHandlers && llmIds) { @@ -528,12 +585,23 @@ class LLM_Agentflow implements INode { this.sendStreamingEvents(options, chatId, response) } + // Stream file annotations if any were extracted + if (fileAnnotations.length > 0 && isLastNode && sseStreamer) { + sseStreamer.streamFileAnnotationsEvent(chatId, fileAnnotations) + } + // Process template variables in state newState = processTemplateVariables(newState, finalResponse) + /** + * Remove the temporarily added image artifact messages before storing + * This is to avoid storing the actual base64 data into database + */ + const messagesToStore = messages.filter((msg: any) => !msg._isTemporaryImageMessage) + // Replace the actual messages array with one that includes the file references for images instead of base64 data const messagesWithFileReferences = replaceBase64ImagesWithFileReferences( - messages, + messagesToStore, runtimeImageMessagesWithFileRef, pastImageMessagesWithFileRef ) @@ -584,7 +652,13 @@ class LLM_Agentflow implements INode { { role: returnRole, content: finalResponse, - name: nodeData?.label ? nodeData?.label.toLowerCase().replace(/\s/g, '_').trim() : nodeData?.id + name: nodeData?.label ? nodeData?.label.toLowerCase().replace(/\s/g, '_').trim() : nodeData?.id, + ...(((artifacts && artifacts.length > 0) || (fileAnnotations && fileAnnotations.length > 0)) && { + additional_kwargs: { + ...(artifacts && artifacts.length > 0 && { artifacts }), + ...(fileAnnotations && fileAnnotations.length > 0 && { fileAnnotations }) + } + }) } ] } @@ -805,7 +879,9 @@ class LLM_Agentflow implements INode { startTime: number, endTime: number, timeDelta: number, - isStructuredOutput: boolean + isStructuredOutput: boolean, + artifacts: any[] = [], + fileAnnotations: any[] = [] ): any { const output: any = { content: finalResponse, @@ -824,6 +900,10 @@ class LLM_Agentflow implements INode { output.usageMetadata = response.usage_metadata } + if (response.response_metadata) { + output.responseMetadata = response.response_metadata + } + if (isStructuredOutput && typeof response === 'object') { const structuredOutput = response as Record for (const key in structuredOutput) { @@ -833,6 +913,14 @@ class LLM_Agentflow implements INode { } } + if (artifacts && artifacts.length > 0) { + output.artifacts = flatten(artifacts) + } + + if (fileAnnotations && fileAnnotations.length > 0) { + output.fileAnnotations = fileAnnotations + } + return output } diff --git a/packages/components/nodes/agentflow/utils.ts b/packages/components/nodes/agentflow/utils.ts index 14d832c8a..56bb2fb26 100644 --- a/packages/components/nodes/agentflow/utils.ts +++ b/packages/components/nodes/agentflow/utils.ts @@ -1,10 +1,11 @@ -import { BaseMessage, MessageContentImageUrl } from '@langchain/core/messages' +import { BaseMessage, MessageContentImageUrl, AIMessageChunk } from '@langchain/core/messages' import { getImageUploads } from '../../src/multiModalUtils' -import { getFileFromStorage } from '../../src/storageUtils' -import { ICommonObject, IFileUpload } from '../../src/Interface' +import { addSingleFileToStorage, getFileFromStorage } from '../../src/storageUtils' +import { ICommonObject, IFileUpload, INodeData } from '../../src/Interface' import { BaseMessageLike } from '@langchain/core/messages' import { IFlowState } from './Interface.Agentflow' -import { handleEscapeCharacters, mapMimeTypeToInputField } from '../../src/utils' +import { getCredentialData, getCredentialParam, handleEscapeCharacters, mapMimeTypeToInputField } from '../../src/utils' +import fetch from 'node-fetch' export const addImagesToMessages = async ( options: ICommonObject, @@ -18,7 +19,8 @@ export const addImagesToMessages = async ( for (const upload of imageUploads) { let bf = upload.data if (upload.type == 'stored-file') { - const contents = await getFileFromStorage(upload.name, options.orgId, options.chatflowid, options.chatId) + const fileName = upload.name.replace(/^FILE-STORAGE::/, '') + const contents = await getFileFromStorage(fileName, options.orgId, options.chatflowid, options.chatId) // as the image is stored in the server, read the file and convert it to base64 bf = 'data:' + upload.mime + ';base64,' + contents.toString('base64') @@ -89,8 +91,9 @@ export const processMessagesWithImages = async ( if (item.type === 'stored-file' && item.name && item.mime.startsWith('image/')) { hasImageReferences = true try { + const fileName = item.name.replace(/^FILE-STORAGE::/, '') // Get file contents from storage - const contents = await getFileFromStorage(item.name, options.orgId, options.chatflowid, options.chatId) + const contents = await getFileFromStorage(fileName, options.orgId, options.chatflowid, options.chatId) // Create base64 data URL const base64Data = 'data:' + item.mime + ';base64,' + contents.toString('base64') @@ -322,7 +325,8 @@ export const getPastChatHistoryImageMessages = async ( const imageContents: MessageContentImageUrl[] = [] for (const upload of uploads) { if (upload.type === 'stored-file' && upload.mime.startsWith('image/')) { - const fileData = await getFileFromStorage(upload.name, options.orgId, options.chatflowid, options.chatId) + const fileName = upload.name.replace(/^FILE-STORAGE::/, '') + const fileData = await getFileFromStorage(fileName, options.orgId, options.chatflowid, options.chatId) // as the image is stored in the server, read the file and convert it to base64 const bf = 'data:' + upload.mime + ';base64,' + fileData.toString('base64') @@ -456,6 +460,437 @@ export const getPastChatHistoryImageMessages = async ( } } +/** + * Gets MIME type from filename extension + */ +export const getMimeTypeFromFilename = (filename: string): string => { + const extension = filename.toLowerCase().split('.').pop() + const mimeTypes: { [key: string]: string } = { + png: 'image/png', + jpg: 'image/jpeg', + jpeg: 'image/jpeg', + gif: 'image/gif', + pdf: 'application/pdf', + txt: 'text/plain', + csv: 'text/csv', + json: 'application/json', + html: 'text/html', + xml: 'application/xml' + } + return mimeTypes[extension || ''] || 'application/octet-stream' +} + +/** + * Gets artifact type from filename extension for UI rendering + */ +export const getArtifactTypeFromFilename = (filename: string): string => { + const extension = filename.toLowerCase().split('.').pop() + const artifactTypes: { [key: string]: string } = { + png: 'png', + jpg: 'jpeg', + jpeg: 'jpeg', + html: 'html', + htm: 'html', + md: 'markdown', + markdown: 'markdown', + json: 'json', + js: 'javascript', + javascript: 'javascript', + tex: 'latex', + latex: 'latex', + txt: 'text', + csv: 'text', + pdf: 'text' + } + return artifactTypes[extension || ''] || 'text' +} + +/** + * Saves base64 image data to storage and returns file information + */ +export const saveBase64Image = async ( + outputItem: any, + options: ICommonObject +): Promise<{ filePath: string; fileName: string; totalSize: number } | null> => { + try { + if (!outputItem.result) { + return null + } + + // Extract base64 data and create buffer + const base64Data = outputItem.result + const imageBuffer = Buffer.from(base64Data, 'base64') + + // Determine file extension and MIME type + const outputFormat = outputItem.output_format || 'png' + const fileName = `generated_image_${outputItem.id || Date.now()}.${outputFormat}` + const mimeType = outputFormat === 'png' ? 'image/png' : 'image/jpeg' + + // Save the image using the existing storage utility + const { path, totalSize } = await addSingleFileToStorage( + mimeType, + imageBuffer, + fileName, + options.orgId, + options.chatflowid, + options.chatId + ) + + return { filePath: path, fileName, totalSize } + } catch (error) { + console.error('Error saving base64 image:', error) + return null + } +} + +/** + * Saves Gemini inline image data to storage and returns file information + */ +export const saveGeminiInlineImage = async ( + inlineItem: any, + options: ICommonObject +): Promise<{ filePath: string; fileName: string; totalSize: number } | null> => { + try { + if (!inlineItem.data || !inlineItem.mimeType) { + return null + } + + // Extract base64 data and create buffer + const base64Data = inlineItem.data + const imageBuffer = Buffer.from(base64Data, 'base64') + + // Determine file extension from MIME type + const mimeType = inlineItem.mimeType + let extension = 'png' + if (mimeType.includes('jpeg') || mimeType.includes('jpg')) { + extension = 'jpg' + } else if (mimeType.includes('png')) { + extension = 'png' + } else if (mimeType.includes('gif')) { + extension = 'gif' + } else if (mimeType.includes('webp')) { + extension = 'webp' + } + + const fileName = `gemini_generated_image_${Date.now()}.${extension}` + + // Save the image using the existing storage utility + const { path, totalSize } = await addSingleFileToStorage( + mimeType, + imageBuffer, + fileName, + options.orgId, + options.chatflowid, + options.chatId + ) + + return { filePath: path, fileName, totalSize } + } catch (error) { + console.error('Error saving Gemini inline image:', error) + return null + } +} + +/** + * Downloads file content from container file citation + */ +export const downloadContainerFile = async ( + containerId: string, + fileId: string, + filename: string, + modelNodeData: INodeData, + options: ICommonObject +): Promise<{ filePath: string; totalSize: number } | null> => { + try { + const credentialData = await getCredentialData(modelNodeData.credential ?? '', options) + const openAIApiKey = getCredentialParam('openAIApiKey', credentialData, modelNodeData) + + if (!openAIApiKey) { + console.warn('No OpenAI API key available for downloading container file') + return null + } + + // Download the file using OpenAI Container API + const response = await fetch(`https://api.openai.com/v1/containers/${containerId}/files/${fileId}/content`, { + method: 'GET', + headers: { + Accept: '*/*', + Authorization: `Bearer ${openAIApiKey}` + } + }) + + if (!response.ok) { + console.warn( + `Failed to download container file ${fileId} from container ${containerId}: ${response.status} ${response.statusText}` + ) + return null + } + + // Extract the binary data from the Response object + const data = await response.arrayBuffer() + const dataBuffer = Buffer.from(data) + const mimeType = getMimeTypeFromFilename(filename) + + // Store the file using the same storage utility as OpenAIAssistant + const { path, totalSize } = await addSingleFileToStorage( + mimeType, + dataBuffer, + filename, + options.orgId, + options.chatflowid, + options.chatId + ) + + return { filePath: path, totalSize } + } catch (error) { + console.error('Error downloading container file:', error) + return null + } +} + +/** + * Replace inlineData base64 with file references in the response content + */ +export const replaceInlineDataWithFileReferences = ( + response: AIMessageChunk, + savedInlineImages: Array<{ filePath: string; fileName: string; mimeType: string }> +): void => { + // Check if content is an array + if (!Array.isArray(response.content)) { + return + } + + // Replace base64 data with file references in response content + let savedImageIndex = 0 + for (let i = 0; i < response.content.length; i++) { + const contentItem = response.content[i] + if ( + typeof contentItem === 'object' && + contentItem.type === 'inlineData' && + contentItem.inlineData && + savedImageIndex < savedInlineImages.length + ) { + const savedImage = savedInlineImages[savedImageIndex] + // Replace with file reference + response.content[i] = { + type: 'stored-file', + name: savedImage.fileName, + mime: savedImage.mimeType, + path: savedImage.filePath + } + savedImageIndex++ + } + } + + // Clear the inlineData from response_metadata to avoid duplication + if (response.response_metadata?.inlineData) { + delete response.response_metadata.inlineData + } +} + +/** + * Extracts artifacts from response metadata (both annotations and built-in tools) + */ +export const extractArtifactsFromResponse = async ( + responseMetadata: any, + modelNodeData: INodeData, + options: ICommonObject +): Promise<{ + artifacts: any[] + fileAnnotations: any[] + savedInlineImages?: Array<{ filePath: string; fileName: string; mimeType: string }> +}> => { + const artifacts: any[] = [] + const fileAnnotations: any[] = [] + const savedInlineImages: Array<{ filePath: string; fileName: string; mimeType: string }> = [] + + // Handle Gemini inline data (image generation) + if (responseMetadata?.inlineData && Array.isArray(responseMetadata.inlineData)) { + for (const inlineItem of responseMetadata.inlineData) { + if (inlineItem.type === 'gemini_inline_data' && inlineItem.data && inlineItem.mimeType) { + try { + const savedImageResult = await saveGeminiInlineImage(inlineItem, options) + if (savedImageResult) { + // Create artifact in the same format as other image artifacts + const fileType = getArtifactTypeFromFilename(savedImageResult.fileName) + artifacts.push({ + type: fileType, + data: savedImageResult.filePath + }) + + // Track saved image for replacing base64 data in content + savedInlineImages.push({ + filePath: savedImageResult.filePath, + fileName: savedImageResult.fileName, + mimeType: inlineItem.mimeType + }) + } + } catch (error) { + console.error('Error processing Gemini inline image artifact:', error) + } + } + } + } + + if (!responseMetadata?.output || !Array.isArray(responseMetadata.output)) { + return { artifacts, fileAnnotations, savedInlineImages: savedInlineImages.length > 0 ? savedInlineImages : undefined } + } + + for (const outputItem of responseMetadata.output) { + // Handle container file citations from annotations + if (outputItem.type === 'message' && outputItem.content && Array.isArray(outputItem.content)) { + for (const contentItem of outputItem.content) { + if (contentItem.annotations && Array.isArray(contentItem.annotations)) { + for (const annotation of contentItem.annotations) { + if (annotation.type === 'container_file_citation' && annotation.file_id && annotation.filename) { + try { + // Download and store the file content + const downloadResult = await downloadContainerFile( + annotation.container_id, + annotation.file_id, + annotation.filename, + modelNodeData, + options + ) + + if (downloadResult) { + const fileType = getArtifactTypeFromFilename(annotation.filename) + + if (fileType === 'png' || fileType === 'jpeg' || fileType === 'jpg') { + const artifact = { + type: fileType, + data: downloadResult.filePath + } + + artifacts.push(artifact) + } else { + fileAnnotations.push({ + filePath: downloadResult.filePath, + fileName: annotation.filename + }) + } + } + } catch (error) { + console.error('Error processing annotation:', error) + } + } + } + } + } + } + + // Handle built-in tool artifacts (like image generation) + if (outputItem.type === 'image_generation_call' && outputItem.result) { + try { + const savedImageResult = await saveBase64Image(outputItem, options) + if (savedImageResult) { + // Replace the base64 result with the file path in the response metadata + outputItem.result = savedImageResult.filePath + + // Create artifact in the same format as other image artifacts + const fileType = getArtifactTypeFromFilename(savedImageResult.fileName) + artifacts.push({ + type: fileType, + data: savedImageResult.filePath + }) + } + } catch (error) { + console.error('Error processing image generation artifact:', error) + } + } + } + + return { artifacts, fileAnnotations, savedInlineImages: savedInlineImages.length > 0 ? savedInlineImages : undefined } +} + +/** + * Add image artifacts from previous assistant messages as user messages + * This allows the LLM to see and reference the generated images in the conversation + * Messages are marked with a special flag for later removal + */ +export const addImageArtifactsToMessages = async (messages: BaseMessageLike[], options: ICommonObject): Promise => { + const imageExtensions = ['png', 'jpg', 'jpeg', 'gif', 'webp'] + const messagesToInsert: Array<{ index: number; message: any }> = [] + + // Iterate through messages to find assistant messages with image artifacts + for (let i = 0; i < messages.length; i++) { + const message = messages[i] as any + + // Check if this is an assistant message with artifacts + if ( + (message.role === 'assistant' || message.role === 'ai') && + message.additional_kwargs?.artifacts && + Array.isArray(message.additional_kwargs.artifacts) + ) { + const artifacts = message.additional_kwargs.artifacts + const imageArtifacts: Array<{ type: string; name: string; mime: string }> = [] + + // Extract image artifacts + for (const artifact of artifacts) { + if (artifact.type && artifact.data) { + // Check if this is an image artifact by file type + if (imageExtensions.includes(artifact.type.toLowerCase())) { + // Extract filename from the file path + const fileName = artifact.data.split('/').pop() || artifact.data + const mimeType = `image/${artifact.type.toLowerCase()}` + + imageArtifacts.push({ + type: 'stored-file', + name: fileName, + mime: mimeType + }) + } + } + } + + // If we found image artifacts, prepare to insert a user message after this assistant message + if (imageArtifacts.length > 0) { + // Check if the next message already contains these image artifacts to avoid duplicates + const nextMessage = messages[i + 1] as any + const shouldInsert = + !nextMessage || + nextMessage.role !== 'user' || + !Array.isArray(nextMessage.content) || + !nextMessage.content.some( + (item: any) => + (item.type === 'stored-file' || item.type === 'image_url') && + imageArtifacts.some((artifact) => { + // Compare with and without FILE-STORAGE:: prefix + const artifactName = artifact.name.replace('FILE-STORAGE::', '') + const itemName = item.name?.replace('FILE-STORAGE::', '') || '' + return artifactName === itemName + }) + ) + + if (shouldInsert) { + messagesToInsert.push({ + index: i + 1, + message: { + role: 'user', + content: imageArtifacts, + _isTemporaryImageMessage: true // Mark for later removal + } + }) + } + } + } + } + + // Insert messages in reverse order to maintain correct indices + for (let i = messagesToInsert.length - 1; i >= 0; i--) { + const { index, message } = messagesToInsert[i] + messages.splice(index, 0, message) + } + + // Convert stored-file references to base64 image_url format + if (messagesToInsert.length > 0) { + const { updatedMessages } = await processMessagesWithImages(messages, options) + // Replace the messages array content with the updated messages + messages.length = 0 + messages.push(...updatedMessages) + } +} + /** * Updates the flow state with new values */ diff --git a/packages/components/nodes/chatmodels/ChatGoogleGenerativeAI/FlowiseChatGoogleGenerativeAI.ts b/packages/components/nodes/chatmodels/ChatGoogleGenerativeAI/FlowiseChatGoogleGenerativeAI.ts index cdf3ac118..f4a9ef89a 100644 --- a/packages/components/nodes/chatmodels/ChatGoogleGenerativeAI/FlowiseChatGoogleGenerativeAI.ts +++ b/packages/components/nodes/chatmodels/ChatGoogleGenerativeAI/FlowiseChatGoogleGenerativeAI.ts @@ -607,7 +607,12 @@ export class LangchainChatGoogleGenerativeAI private client: GenerativeModel get _isMultimodalModel() { - return this.model.includes('vision') || this.model.startsWith('gemini-1.5') || this.model.startsWith('gemini-2') + return ( + this.model.includes('vision') || + this.model.startsWith('gemini-1.5') || + this.model.startsWith('gemini-2') || + this.model.startsWith('gemini-3') + ) } constructor(fields: GoogleGenerativeAIChatInput) { diff --git a/packages/components/nodes/chatmodels/ChatGoogleGenerativeAI/utils/common.ts b/packages/components/nodes/chatmodels/ChatGoogleGenerativeAI/utils/common.ts index 92c5f0b5a..6e0d32d22 100644 --- a/packages/components/nodes/chatmodels/ChatGoogleGenerativeAI/utils/common.ts +++ b/packages/components/nodes/chatmodels/ChatGoogleGenerativeAI/utils/common.ts @@ -452,6 +452,7 @@ export function mapGenerateContentResultToChatResult( const [candidate] = response.candidates const { content: candidateContent, ...generationInfo } = candidate let content: MessageContent | undefined + const inlineDataItems: any[] = [] if (Array.isArray(candidateContent?.parts) && candidateContent.parts.length === 1 && candidateContent.parts[0].text) { content = candidateContent.parts[0].text @@ -472,6 +473,18 @@ export function mapGenerateContentResultToChatResult( type: 'codeExecutionResult', codeExecutionResult: p.codeExecutionResult } + } else if ('inlineData' in p && p.inlineData) { + // Extract inline image data for processing by Agent + inlineDataItems.push({ + type: 'gemini_inline_data', + mimeType: p.inlineData.mimeType, + data: p.inlineData.data + }) + // Return the inline data as part of the content structure + return { + type: 'inlineData', + inlineData: p.inlineData + } } return p }) @@ -488,6 +501,12 @@ export function mapGenerateContentResultToChatResult( text = block?.text ?? text } + // Build response_metadata with inline data if present + const response_metadata: any = {} + if (inlineDataItems.length > 0) { + response_metadata.inlineData = inlineDataItems + } + const generation: ChatGeneration = { text, message: new AIMessage({ @@ -502,7 +521,8 @@ export function mapGenerateContentResultToChatResult( additional_kwargs: { ...generationInfo }, - usage_metadata: extra?.usageMetadata + usage_metadata: extra?.usageMetadata, + response_metadata: Object.keys(response_metadata).length > 0 ? response_metadata : undefined }), generationInfo } @@ -533,6 +553,8 @@ export function convertResponseContentToChatGenerationChunk( const [candidate] = response.candidates const { content: candidateContent, ...generationInfo } = candidate let content: MessageContent | undefined + const inlineDataItems: any[] = [] + // Checks if some parts do not have text. If false, it means that the content is a string. if (Array.isArray(candidateContent?.parts) && candidateContent.parts.every((p) => 'text' in p)) { content = candidateContent.parts.map((p) => p.text).join('') @@ -553,6 +575,18 @@ export function convertResponseContentToChatGenerationChunk( type: 'codeExecutionResult', codeExecutionResult: p.codeExecutionResult } + } else if ('inlineData' in p && p.inlineData) { + // Extract inline image data for processing by Agent + inlineDataItems.push({ + type: 'gemini_inline_data', + mimeType: p.inlineData.mimeType, + data: p.inlineData.data + }) + // Return the inline data as part of the content structure + return { + type: 'inlineData', + inlineData: p.inlineData + } } return p }) @@ -582,6 +616,12 @@ export function convertResponseContentToChatGenerationChunk( ) } + // Build response_metadata with inline data if present + const response_metadata: any = {} + if (inlineDataItems.length > 0) { + response_metadata.inlineData = inlineDataItems + } + return new ChatGenerationChunk({ text, message: new AIMessageChunk({ @@ -591,7 +631,8 @@ export function convertResponseContentToChatGenerationChunk( // Each chunk can have unique "generationInfo", and merging strategy is unclear, // so leave blank for now. additional_kwargs: {}, - usage_metadata: extra.usageMetadata + usage_metadata: extra.usageMetadata, + response_metadata: Object.keys(response_metadata).length > 0 ? response_metadata : undefined }), generationInfo })