import { EnhancedGenerateContentResponse, Content, Part, type FunctionDeclarationsTool as GoogleGenerativeAIFunctionDeclarationsTool, type FunctionDeclaration as GenerativeAIFunctionDeclaration, POSSIBLE_ROLES, FunctionCallPart, TextPart, FileDataPart, InlineDataPart } from '@google/generative-ai' import { AIMessage, AIMessageChunk, BaseMessage, ChatMessage, ToolMessage, ToolMessageChunk, MessageContent, MessageContentComplex, UsageMetadata, isAIMessage, isBaseMessage, isToolMessage, StandardContentBlockConverter, parseBase64DataUrl, convertToProviderContentBlock, isDataContentBlock } from '@langchain/core/messages' import { ChatGeneration, ChatGenerationChunk, ChatResult } from '@langchain/core/outputs' import { isLangChainTool } from '@langchain/core/utils/function_calling' import { isOpenAITool } from '@langchain/core/language_models/base' import { ToolCallChunk } from '@langchain/core/messages/tool' import { v4 as uuidv4 } from 'uuid' import { jsonSchemaToGeminiParameters, schemaToGenerativeAIParameters } from './zod_to_genai_parameters.js' import { GoogleGenerativeAIToolType } from './types.js' export function getMessageAuthor(message: BaseMessage) { const type = message._getType() if (ChatMessage.isInstance(message)) { return message.role } if (type === 'tool') { return type } return message.name ?? type } /** * !!! IMPORTANT: Must return 'user' as default instead of throwing error * https://github.com/FlowiseAI/Flowise/issues/4743 * Maps a message type to a Google Generative AI chat author. * @param message The message to map. * @param model The model to use for mapping. * @returns The message type mapped to a Google Generative AI chat author. */ export function convertAuthorToRole(author: string): (typeof POSSIBLE_ROLES)[number] { switch (author) { /** * Note: Gemini currently is not supporting system messages * we will convert them to human messages and merge with following * */ case 'supervisor': case 'ai': case 'model': // getMessageAuthor returns message.name. code ex.: return message.name ?? type; return 'model' case 'system': return 'system' case 'human': return 'user' case 'tool': case 'function': return 'function' default: return 'user' // return user as default instead of throwing error } } function messageContentMedia(content: MessageContentComplex): Part { if ('mimeType' in content && 'data' in content) { return { inlineData: { mimeType: content.mimeType, data: content.data } } } if ('mimeType' in content && 'fileUri' in content) { return { fileData: { mimeType: content.mimeType, fileUri: content.fileUri } } } throw new Error('Invalid media content') } function inferToolNameFromPreviousMessages(message: ToolMessage | ToolMessageChunk, previousMessages: BaseMessage[]): string | undefined { return previousMessages .map((msg) => { if (isAIMessage(msg)) { return msg.tool_calls ?? [] } return [] }) .flat() .find((toolCall) => { return toolCall.id === message.tool_call_id })?.name } function _getStandardContentBlockConverter(isMultimodalModel: boolean) { const standardContentBlockConverter: StandardContentBlockConverter<{ text: TextPart image: FileDataPart | InlineDataPart audio: FileDataPart | InlineDataPart file: FileDataPart | InlineDataPart | TextPart }> = { providerName: 'Google Gemini', fromStandardTextBlock(block) { return { text: block.text } }, fromStandardImageBlock(block): FileDataPart | InlineDataPart { if (!isMultimodalModel) { throw new Error('This model does not support images') } if (block.source_type === 'url') { const data = parseBase64DataUrl({ dataUrl: block.url }) if (data) { return { inlineData: { mimeType: data.mime_type, data: data.data } } } else { return { fileData: { mimeType: block.mime_type ?? '', fileUri: block.url } } } } if (block.source_type === 'base64') { return { inlineData: { mimeType: block.mime_type ?? '', data: block.data } } } throw new Error(`Unsupported source type: ${block.source_type}`) }, fromStandardAudioBlock(block): FileDataPart | InlineDataPart { if (!isMultimodalModel) { throw new Error('This model does not support audio') } if (block.source_type === 'url') { const data = parseBase64DataUrl({ dataUrl: block.url }) if (data) { return { inlineData: { mimeType: data.mime_type, data: data.data } } } else { return { fileData: { mimeType: block.mime_type ?? '', fileUri: block.url } } } } if (block.source_type === 'base64') { return { inlineData: { mimeType: block.mime_type ?? '', data: block.data } } } throw new Error(`Unsupported source type: ${block.source_type}`) }, fromStandardFileBlock(block): FileDataPart | InlineDataPart | TextPart { if (!isMultimodalModel) { throw new Error('This model does not support files') } if (block.source_type === 'text') { return { text: block.text } } if (block.source_type === 'url') { const data = parseBase64DataUrl({ dataUrl: block.url }) if (data) { return { inlineData: { mimeType: data.mime_type, data: data.data } } } else { return { fileData: { mimeType: block.mime_type ?? '', fileUri: block.url } } } } if (block.source_type === 'base64') { return { inlineData: { mimeType: block.mime_type ?? '', data: block.data } } } throw new Error(`Unsupported source type: ${block.source_type}`) } } return standardContentBlockConverter } function _convertLangChainContentToPart(content: MessageContentComplex, isMultimodalModel: boolean): Part | undefined { if (isDataContentBlock(content)) { return convertToProviderContentBlock(content, _getStandardContentBlockConverter(isMultimodalModel)) } if (content.type === 'text') { return { text: content.text } } else if (content.type === 'executableCode') { return { executableCode: content.executableCode } } else if (content.type === 'codeExecutionResult') { return { codeExecutionResult: content.codeExecutionResult } } else if (content.type === 'image_url') { if (!isMultimodalModel) { throw new Error(`This model does not support images`) } let source if (typeof content.image_url === 'string') { source = content.image_url } else if (typeof content.image_url === 'object' && 'url' in content.image_url) { source = content.image_url.url } else { throw new Error('Please provide image as base64 encoded data URL') } const [dm, data] = source.split(',') if (!dm.startsWith('data:')) { throw new Error('Please provide image as base64 encoded data URL') } const [mimeType, encoding] = dm.replace(/^data:/, '').split(';') if (encoding !== 'base64') { throw new Error('Please provide image as base64 encoded data URL') } return { inlineData: { data, mimeType } } } else if (content.type === 'media') { return messageContentMedia(content) } else if (content.type === 'tool_use') { return { functionCall: { name: content.name, args: content.input } } } else if ( content.type?.includes('/') && // Ensure it's a single slash. content.type.split('/').length === 2 && 'data' in content && typeof content.data === 'string' ) { return { inlineData: { mimeType: content.type, data: content.data } } } else if ('functionCall' in content) { // No action needed here — function calls will be added later from message.tool_calls return undefined } else { if ('type' in content) { throw new Error(`Unknown content type ${content.type}`) } else { throw new Error(`Unknown content ${JSON.stringify(content)}`) } } } export function convertMessageContentToParts(message: BaseMessage, isMultimodalModel: boolean, previousMessages: BaseMessage[]): Part[] { if (isToolMessage(message)) { const messageName = message.name ?? inferToolNameFromPreviousMessages(message, previousMessages) if (messageName === undefined) { throw new Error( `Google requires a tool name for each tool call response, and we could not infer a called tool name for ToolMessage "${message.id}" from your passed messages. Please populate a "name" field on that ToolMessage explicitly.` ) } const result = Array.isArray(message.content) ? (message.content.map((c) => _convertLangChainContentToPart(c, isMultimodalModel)).filter((p) => p !== undefined) as Part[]) : message.content if (message.status === 'error') { return [ { functionResponse: { name: messageName, // The API expects an object with an `error` field if the function call fails. // `error` must be a valid object (not a string or array), so we wrap `message.content` here response: { error: { details: result } } } } ] } return [ { functionResponse: { name: messageName, // again, can't have a string or array value for `response`, so we wrap it as an object here response: { result } } } ] } let functionCalls: FunctionCallPart[] = [] const messageParts: Part[] = [] if (typeof message.content === 'string' && message.content) { messageParts.push({ text: message.content }) } if (Array.isArray(message.content)) { messageParts.push( ...(message.content.map((c) => _convertLangChainContentToPart(c, isMultimodalModel)).filter((p) => p !== undefined) as Part[]) ) } if (isAIMessage(message) && message.tool_calls?.length) { functionCalls = message.tool_calls.map((tc) => { return { functionCall: { name: tc.name, args: tc.args } } }) } return [...messageParts, ...functionCalls] } export function convertBaseMessagesToContent( messages: BaseMessage[], isMultimodalModel: boolean, convertSystemMessageToHumanContent: boolean = false ) { return messages.reduce<{ content: Content[] mergeWithPreviousContent: boolean }>( (acc, message, index) => { if (!isBaseMessage(message)) { throw new Error('Unsupported message input') } const author = getMessageAuthor(message) if (author === 'system' && index !== 0) { throw new Error('System message should be the first one') } const role = convertAuthorToRole(author) const prevContent = acc.content[acc.content.length] if (!acc.mergeWithPreviousContent && prevContent && prevContent.role === role) { throw new Error('Google Generative AI requires alternate messages between authors') } const parts = convertMessageContentToParts(message, isMultimodalModel, messages.slice(0, index)) if (acc.mergeWithPreviousContent) { const prevContent = acc.content[acc.content.length - 1] if (!prevContent) { throw new Error('There was a problem parsing your system message. Please try a prompt without one.') } prevContent.parts.push(...parts) return { mergeWithPreviousContent: false, content: acc.content } } let actualRole = role if (actualRole === 'function' || (actualRole === 'system' && !convertSystemMessageToHumanContent)) { // GenerativeAI API will throw an error if the role is not "user" or "model." actualRole = 'user' } const content: Content = { role: actualRole, parts } return { mergeWithPreviousContent: author === 'system' && !convertSystemMessageToHumanContent, content: [...acc.content, content] } }, { content: [], mergeWithPreviousContent: false } ).content } export function mapGenerateContentResultToChatResult( response: EnhancedGenerateContentResponse, extra?: { usageMetadata: UsageMetadata | undefined } ): ChatResult { // if rejected or error, return empty generations with reason in filters if (!response.candidates || response.candidates.length === 0 || !response.candidates[0]) { return { generations: [], llmOutput: { filters: response.promptFeedback } } } const functionCalls = response.functionCalls() const [candidate] = response.candidates const { content: candidateContent, ...generationInfo } = candidate let content: MessageContent | undefined if (Array.isArray(candidateContent?.parts) && candidateContent.parts.length === 1 && candidateContent.parts[0].text) { content = candidateContent.parts[0].text } else if (Array.isArray(candidateContent?.parts) && candidateContent.parts.length > 0) { content = candidateContent.parts.map((p) => { if ('text' in p) { return { type: 'text', text: p.text } } else if ('executableCode' in p) { return { type: 'executableCode', executableCode: p.executableCode } } else if ('codeExecutionResult' in p) { return { type: 'codeExecutionResult', codeExecutionResult: p.codeExecutionResult } } return p }) } else { // no content returned - likely due to abnormal stop reason, e.g. malformed function call content = [] } let text = '' if (typeof content === 'string') { text = content } else if (Array.isArray(content) && content.length > 0) { const block = content.find((b) => 'text' in b) as { text: string } | undefined text = block?.text ?? text } const generation: ChatGeneration = { text, message: new AIMessage({ content: content ?? '', tool_calls: functionCalls?.map((fc) => { return { ...fc, type: 'tool_call', id: 'id' in fc && typeof fc.id === 'string' ? fc.id : uuidv4() } }), additional_kwargs: { ...generationInfo }, usage_metadata: extra?.usageMetadata }), generationInfo } return { generations: [generation], llmOutput: { tokenUsage: { promptTokens: extra?.usageMetadata?.input_tokens, completionTokens: extra?.usageMetadata?.output_tokens, totalTokens: extra?.usageMetadata?.total_tokens } } } } export function convertResponseContentToChatGenerationChunk( response: EnhancedGenerateContentResponse, extra: { usageMetadata?: UsageMetadata | undefined index: number } ): ChatGenerationChunk | null { if (!response.candidates || response.candidates.length === 0) { return null } const functionCalls = response.functionCalls() const [candidate] = response.candidates const { content: candidateContent, ...generationInfo } = candidate let content: MessageContent | undefined // Checks if some parts do not have text. If false, it means that the content is a string. if (Array.isArray(candidateContent?.parts) && candidateContent.parts.every((p) => 'text' in p)) { content = candidateContent.parts.map((p) => p.text).join('') } else if (Array.isArray(candidateContent?.parts)) { content = candidateContent.parts.map((p) => { if ('text' in p) { return { type: 'text', text: p.text } } else if ('executableCode' in p) { return { type: 'executableCode', executableCode: p.executableCode } } else if ('codeExecutionResult' in p) { return { type: 'codeExecutionResult', codeExecutionResult: p.codeExecutionResult } } return p }) } else { // no content returned - likely due to abnormal stop reason, e.g. malformed function call content = [] } let text = '' if (content && typeof content === 'string') { text = content } else if (Array.isArray(content)) { const block = content.find((b) => 'text' in b) as { text: string } | undefined text = block?.text ?? '' } const toolCallChunks: ToolCallChunk[] = [] if (functionCalls) { toolCallChunks.push( ...functionCalls.map((fc) => ({ ...fc, args: JSON.stringify(fc.args), index: extra.index, type: 'tool_call_chunk' as const, id: 'id' in fc && typeof fc.id === 'string' ? fc.id : uuidv4() })) ) } return new ChatGenerationChunk({ text, message: new AIMessageChunk({ content: content || '', name: !candidateContent ? undefined : candidateContent.role, tool_call_chunks: toolCallChunks, // Each chunk can have unique "generationInfo", and merging strategy is unclear, // so leave blank for now. additional_kwargs: {}, usage_metadata: extra.usageMetadata }), generationInfo }) } export function convertToGenerativeAITools(tools: GoogleGenerativeAIToolType[]): GoogleGenerativeAIFunctionDeclarationsTool[] { if (tools.every((tool) => 'functionDeclarations' in tool && Array.isArray(tool.functionDeclarations))) { return tools as GoogleGenerativeAIFunctionDeclarationsTool[] } return [ { functionDeclarations: tools.map((tool): GenerativeAIFunctionDeclaration => { if (isLangChainTool(tool)) { const jsonSchema = schemaToGenerativeAIParameters(tool.schema) if (jsonSchema.type === 'object' && 'properties' in jsonSchema && Object.keys(jsonSchema.properties).length === 0) { return { name: tool.name, description: tool.description } } return { name: tool.name, description: tool.description, parameters: jsonSchema } } if (isOpenAITool(tool)) { return { name: tool.function.name, description: tool.function.description ?? `A function available to call.`, parameters: jsonSchemaToGeminiParameters(tool.function.parameters) } } return tool as unknown as GenerativeAIFunctionDeclaration }) } ] }