Flowise/packages/components/nodes/chatmodels/ChatGoogleGenerativeAI/utils/common.ts

import {
    EnhancedGenerateContentResponse,
    Content,
    Part,
    type FunctionDeclarationsTool as GoogleGenerativeAIFunctionDeclarationsTool,
    type FunctionDeclaration as GenerativeAIFunctionDeclaration,
    POSSIBLE_ROLES,
    FunctionCallPart,
    TextPart,
    FileDataPart,
    InlineDataPart
} from '@google/generative-ai'
import {
    AIMessage,
    AIMessageChunk,
    BaseMessage,
    ChatMessage,
    ToolMessage,
    ToolMessageChunk,
    MessageContent,
    MessageContentComplex,
    UsageMetadata,
    isAIMessage,
    isBaseMessage,
    isToolMessage,
    StandardContentBlockConverter,
    parseBase64DataUrl,
    convertToProviderContentBlock,
    isDataContentBlock
} from '@langchain/core/messages'
import { ChatGeneration, ChatGenerationChunk, ChatResult } from '@langchain/core/outputs'
import { isLangChainTool } from '@langchain/core/utils/function_calling'
import { isOpenAITool } from '@langchain/core/language_models/base'
import { ToolCallChunk } from '@langchain/core/messages/tool'
import { v4 as uuidv4 } from 'uuid'
import { jsonSchemaToGeminiParameters, schemaToGenerativeAIParameters } from './zod_to_genai_parameters.js'
import { GoogleGenerativeAIToolType } from './types.js'

export function getMessageAuthor(message: BaseMessage) {
    const type = message._getType()
    if (ChatMessage.isInstance(message)) {
        return message.role
    }
    if (type === 'tool') {
        return type
    }
    return message.name ?? type
}

/**
 * !!! IMPORTANT: Must return 'user' as default instead of throwing error
 * https://github.com/FlowiseAI/Flowise/issues/4743
 * Maps a message type to a Google Generative AI chat author.
 * @param message The message to map.
 * @param model The model to use for mapping.
 * @returns The message type mapped to a Google Generative AI chat author.
 */
export function convertAuthorToRole(author: string): (typeof POSSIBLE_ROLES)[number] {
    switch (author) {
        /**
         *  Note: Gemini currently is not supporting system messages
         *  we will convert them to human messages and merge with following
         * */
        case 'supervisor':
        case 'ai':
        case 'model': // getMessageAuthor returns message.name. code ex.: return message.name ?? type;
            return 'model'
        case 'system':
            return 'system'
        case 'human':
            return 'user'
        case 'tool':
        case 'function':
            return 'function'
        default:
            return 'user' // return user as default instead of throwing error
    }
}

function messageContentMedia(content: MessageContentComplex): Part {
    if ('mimeType' in content && 'data' in content) {
        return {
            inlineData: {
                mimeType: content.mimeType,
                data: content.data
            }
        }
    }
    if ('mimeType' in content && 'fileUri' in content) {
        return {
            fileData: {
                mimeType: content.mimeType,
                fileUri: content.fileUri
            }
        }
    }

    throw new Error('Invalid media content')
}

function inferToolNameFromPreviousMessages(message: ToolMessage | ToolMessageChunk, previousMessages: BaseMessage[]): string | undefined {
    return previousMessages
        .map((msg) => {
            if (isAIMessage(msg)) {
                return msg.tool_calls ?? []
            }
            return []
        })
        .flat()
        .find((toolCall) => {
            return toolCall.id === message.tool_call_id
        })?.name
}

function _getStandardContentBlockConverter(isMultimodalModel: boolean) {
    const standardContentBlockConverter: StandardContentBlockConverter<{
        text: TextPart
        image: FileDataPart | InlineDataPart
        audio: FileDataPart | InlineDataPart
        file: FileDataPart | InlineDataPart | TextPart
    }> = {
        providerName: 'Google Gemini',

        fromStandardTextBlock(block) {
            return {
                text: block.text
            }
        },

        fromStandardImageBlock(block): FileDataPart | InlineDataPart {
            if (!isMultimodalModel) {
                throw new Error('This model does not support images')
            }
            if (block.source_type === 'url') {
                const data = parseBase64DataUrl({ dataUrl: block.url })
                if (data) {
                    return {
                        inlineData: {
                            mimeType: data.mime_type,
                            data: data.data
                        }
                    }
                } else {
                    return {
                        fileData: {
                            mimeType: block.mime_type ?? '',
                            fileUri: block.url
                        }
                    }
                }
            }

            if (block.source_type === 'base64') {
                return {
                    inlineData: {
                        mimeType: block.mime_type ?? '',
                        data: block.data
                    }
                }
            }

            throw new Error(`Unsupported source type: ${block.source_type}`)
        },

        fromStandardAudioBlock(block): FileDataPart | InlineDataPart {
            if (!isMultimodalModel) {
                throw new Error('This model does not support audio')
            }
            if (block.source_type === 'url') {
                const data = parseBase64DataUrl({ dataUrl: block.url })
                if (data) {
                    return {
                        inlineData: {
                            mimeType: data.mime_type,
                            data: data.data
                        }
                    }
                } else {
                    return {
                        fileData: {
                            mimeType: block.mime_type ?? '',
                            fileUri: block.url
                        }
                    }
                }
            }

            if (block.source_type === 'base64') {
                return {
                    inlineData: {
                        mimeType: block.mime_type ?? '',
                        data: block.data
                    }
                }
            }

            throw new Error(`Unsupported source type: ${block.source_type}`)
        },

        fromStandardFileBlock(block): FileDataPart | InlineDataPart | TextPart {
            if (!isMultimodalModel) {
                throw new Error('This model does not support files')
            }
            if (block.source_type === 'text') {
                return {
                    text: block.text
                }
            }
            if (block.source_type === 'url') {
                const data = parseBase64DataUrl({ dataUrl: block.url })
                if (data) {
                    return {
                        inlineData: {
                            mimeType: data.mime_type,
                            data: data.data
                        }
                    }
                } else {
                    return {
                        fileData: {
                            mimeType: block.mime_type ?? '',
                            fileUri: block.url
                        }
                    }
                }
            }

            if (block.source_type === 'base64') {
                return {
                    inlineData: {
                        mimeType: block.mime_type ?? '',
                        data: block.data
                    }
                }
            }
            throw new Error(`Unsupported source type: ${block.source_type}`)
        }
    }
    return standardContentBlockConverter
}

function _convertLangChainContentToPart(content: MessageContentComplex, isMultimodalModel: boolean): Part | undefined {
    if (isDataContentBlock(content)) {
        return convertToProviderContentBlock(content, _getStandardContentBlockConverter(isMultimodalModel))
    }

    if (content.type === 'text') {
        return { text: content.text }
    } else if (content.type === 'executableCode') {
        return { executableCode: content.executableCode }
    } else if (content.type === 'codeExecutionResult') {
        return { codeExecutionResult: content.codeExecutionResult }
    } else if (content.type === 'image_url') {
        if (!isMultimodalModel) {
            throw new Error(`This model does not support images`)
        }
        let source
        if (typeof content.image_url === 'string') {
            source = content.image_url
        } else if (typeof content.image_url === 'object' && 'url' in content.image_url) {
            source = content.image_url.url
        } else {
            throw new Error('Please provide image as base64 encoded data URL')
        }
        const [dm, data] = source.split(',')
        if (!dm.startsWith('data:')) {
            throw new Error('Please provide image as base64 encoded data URL')
        }

        const [mimeType, encoding] = dm.replace(/^data:/, '').split(';')
        if (encoding !== 'base64') {
            throw new Error('Please provide image as base64 encoded data URL')
        }

        return {
            inlineData: {
                data,
                mimeType
            }
        }
    } else if (content.type === 'media') {
        return messageContentMedia(content)
    } else if (content.type === 'tool_use') {
        return {
            functionCall: {
                name: content.name,
                args: content.input
            }
        }
    } else if (
        content.type?.includes('/') &&
        // Ensure it's a single slash.
        content.type.split('/').length === 2 &&
        'data' in content &&
        typeof content.data === 'string'
    ) {
        return {
            inlineData: {
                mimeType: content.type,
                data: content.data
            }
        }
    } else if ('functionCall' in content) {
        // No action needed here — function calls will be added later from message.tool_calls
        return undefined
    } else {
        if ('type' in content) {
            throw new Error(`Unknown content type ${content.type}`)
        } else {
            throw new Error(`Unknown content ${JSON.stringify(content)}`)
        }
    }
}

export function convertMessageContentToParts(message: BaseMessage, isMultimodalModel: boolean, previousMessages: BaseMessage[]): Part[] {
    if (isToolMessage(message)) {
        const messageName = message.name ?? inferToolNameFromPreviousMessages(message, previousMessages)
        if (messageName === undefined) {
            throw new Error(
                `Google requires a tool name for each tool call response, and we could not infer a called tool name for ToolMessage "${message.id}" from your passed messages. Please populate a "name" field on that ToolMessage explicitly.`
            )
        }

        const result = Array.isArray(message.content)
            ? (message.content.map((c) => _convertLangChainContentToPart(c, isMultimodalModel)).filter((p) => p !== undefined) as Part[])
            : message.content

        if (message.status === 'error') {
            return [
                {
                    functionResponse: {
                        name: messageName,
                        // The API expects an object with an `error` field if the function call fails.
                        // `error` must be a valid object (not a string or array), so we wrap `message.content` here
                        response: { error: { details: result } }
                    }
                }
            ]
        }

        return [
            {
                functionResponse: {
                    name: messageName,
                    // again, can't have a string or array value for `response`, so we wrap it as an object here
                    response: { result }
                }
            }
        ]
    }

    let functionCalls: FunctionCallPart[] = []
    const messageParts: Part[] = []

    if (typeof message.content === 'string' && message.content) {
        messageParts.push({ text: message.content })
    }

    if (Array.isArray(message.content)) {
        messageParts.push(
            ...(message.content.map((c) => _convertLangChainContentToPart(c, isMultimodalModel)).filter((p) => p !== undefined) as Part[])
        )
    }

    if (isAIMessage(message) && message.tool_calls?.length) {
        functionCalls = message.tool_calls.map((tc) => {
            return {
                functionCall: {
                    name: tc.name,
                    args: tc.args
                }
            }
        })
    }

    return [...messageParts, ...functionCalls]
}

export function convertBaseMessagesToContent(
    messages: BaseMessage[],
    isMultimodalModel: boolean,
    convertSystemMessageToHumanContent: boolean = false
) {
    return messages.reduce<{
        content: Content[]
        mergeWithPreviousContent: boolean
    }>(
        (acc, message, index) => {
            if (!isBaseMessage(message)) {
                throw new Error('Unsupported message input')
            }
            const author = getMessageAuthor(message)
            if (author === 'system' && index !== 0) {
                throw new Error('System message should be the first one')
            }
            const role = convertAuthorToRole(author)

            const prevContent = acc.content[acc.content.length]
            if (!acc.mergeWithPreviousContent && prevContent && prevContent.role === role) {
                throw new Error('Google Generative AI requires alternate messages between authors')
            }

            const parts = convertMessageContentToParts(message, isMultimodalModel, messages.slice(0, index))

            if (acc.mergeWithPreviousContent) {
                const prevContent = acc.content[acc.content.length - 1]
                if (!prevContent) {
                    throw new Error('There was a problem parsing your system message. Please try a prompt without one.')
                }
                prevContent.parts.push(...parts)

                return {
                    mergeWithPreviousContent: false,
                    content: acc.content
                }
            }
            let actualRole = role
            if (actualRole === 'function' || (actualRole === 'system' && !convertSystemMessageToHumanContent)) {
                // GenerativeAI API will throw an error if the role is not "user" or "model."
                actualRole = 'user'
            }
            const content: Content = {
                role: actualRole,
                parts
            }
            return {
                mergeWithPreviousContent: author === 'system' && !convertSystemMessageToHumanContent,
                content: [...acc.content, content]
            }
        },
        { content: [], mergeWithPreviousContent: false }
    ).content
}

export function mapGenerateContentResultToChatResult(
    response: EnhancedGenerateContentResponse,
    extra?: {
        usageMetadata: UsageMetadata | undefined
    }
): ChatResult {
    // if rejected or error, return empty generations with reason in filters
    if (!response.candidates || response.candidates.length === 0 || !response.candidates[0]) {
        return {
            generations: [],
            llmOutput: {
                filters: response.promptFeedback
            }
        }
    }

    const functionCalls = response.functionCalls()
    const [candidate] = response.candidates
    const { content: candidateContent, ...generationInfo } = candidate
    let content: MessageContent | undefined
    const inlineDataItems: any[] = []

    if (Array.isArray(candidateContent?.parts) && candidateContent.parts.length === 1 && candidateContent.parts[0].text) {
        content = candidateContent.parts[0].text
    } else if (Array.isArray(candidateContent?.parts) && candidateContent.parts.length > 0) {
        content = candidateContent.parts.map((p) => {
            if ('text' in p) {
                return {
                    type: 'text',
                    text: p.text
                }
            } else if ('executableCode' in p) {
                return {
                    type: 'executableCode',
                    executableCode: p.executableCode
                }
            } else if ('codeExecutionResult' in p) {
                return {
                    type: 'codeExecutionResult',
                    codeExecutionResult: p.codeExecutionResult
                }
            } else if ('inlineData' in p && p.inlineData) {
                // Extract inline image data for processing by Agent
                inlineDataItems.push({
                    type: 'gemini_inline_data',
                    mimeType: p.inlineData.mimeType,
                    data: p.inlineData.data
                })
                // Return the inline data as part of the content structure
                return {
                    type: 'inlineData',
                    inlineData: p.inlineData
                }
            }
            return p
        })
    } else {
        // no content returned - likely due to abnormal stop reason, e.g. malformed function call
        content = []
    }

    let text = ''
    if (typeof content === 'string') {
        text = content
    } else if (Array.isArray(content) && content.length > 0) {
        const block = content.find((b) => 'text' in b) as { text: string } | undefined
        text = block?.text ?? text
    }

    // Build response_metadata with inline data if present
    const response_metadata: any = {}
    if (inlineDataItems.length > 0) {
        response_metadata.inlineData = inlineDataItems
    }

    const generation: ChatGeneration = {
        text,
        message: new AIMessage({
            content: content ?? '',
            tool_calls: functionCalls?.map((fc) => {
                return {
                    ...fc,
                    type: 'tool_call',
                    id: 'id' in fc && typeof fc.id === 'string' ? fc.id : uuidv4()
                }
            }),
            additional_kwargs: {
                ...generationInfo
            },
            usage_metadata: extra?.usageMetadata,
            response_metadata: Object.keys(response_metadata).length > 0 ? response_metadata : undefined
        }),
        generationInfo
    }

    return {
        generations: [generation],
        llmOutput: {
            tokenUsage: {
                promptTokens: extra?.usageMetadata?.input_tokens,
                completionTokens: extra?.usageMetadata?.output_tokens,
                totalTokens: extra?.usageMetadata?.total_tokens
            }
        }
    }
}

export function convertResponseContentToChatGenerationChunk(
    response: EnhancedGenerateContentResponse,
    extra: {
        usageMetadata?: UsageMetadata | undefined
        index: number
    }
): ChatGenerationChunk | null {
    if (!response.candidates || response.candidates.length === 0) {
        return null
    }
    const functionCalls = response.functionCalls()
    const [candidate] = response.candidates
    const { content: candidateContent, ...generationInfo } = candidate
    let content: MessageContent | undefined
    const inlineDataItems: any[] = []

    // Checks if some parts do not have text. If false, it means that the content is a string.
    if (Array.isArray(candidateContent?.parts) && candidateContent.parts.every((p) => 'text' in p)) {
        content = candidateContent.parts.map((p) => p.text).join('')
    } else if (Array.isArray(candidateContent?.parts)) {
        content = candidateContent.parts.map((p) => {
            if ('text' in p) {
                return {
                    type: 'text',
                    text: p.text
                }
            } else if ('executableCode' in p) {
                return {
                    type: 'executableCode',
                    executableCode: p.executableCode
                }
            } else if ('codeExecutionResult' in p) {
                return {
                    type: 'codeExecutionResult',
                    codeExecutionResult: p.codeExecutionResult
                }
            } else if ('inlineData' in p && p.inlineData) {
                // Extract inline image data for processing by Agent
                inlineDataItems.push({
                    type: 'gemini_inline_data',
                    mimeType: p.inlineData.mimeType,
                    data: p.inlineData.data
                })
                // Return the inline data as part of the content structure
                return {
                    type: 'inlineData',
                    inlineData: p.inlineData
                }
            }
            return p
        })
    } else {
        // no content returned - likely due to abnormal stop reason, e.g. malformed function call
        content = []
    }

    let text = ''
    if (content && typeof content === 'string') {
        text = content
    } else if (Array.isArray(content)) {
        const block = content.find((b) => 'text' in b) as { text: string } | undefined
        text = block?.text ?? ''
    }

    const toolCallChunks: ToolCallChunk[] = []
    if (functionCalls) {
        toolCallChunks.push(
            ...functionCalls.map((fc) => ({
                ...fc,
                args: JSON.stringify(fc.args),
                index: extra.index,
                type: 'tool_call_chunk' as const,
                id: 'id' in fc && typeof fc.id === 'string' ? fc.id : uuidv4()
            }))
        )
    }

    // Build response_metadata with inline data if present
    const response_metadata: any = {}
    if (inlineDataItems.length > 0) {
        response_metadata.inlineData = inlineDataItems
    }

    return new ChatGenerationChunk({
        text,
        message: new AIMessageChunk({
            content: content || '',
            name: !candidateContent ? undefined : candidateContent.role,
            tool_call_chunks: toolCallChunks,
            // Each chunk can have unique "generationInfo", and merging strategy is unclear,
            // so leave blank for now.
            additional_kwargs: {},
            usage_metadata: extra.usageMetadata,
            response_metadata: Object.keys(response_metadata).length > 0 ? response_metadata : undefined
        }),
        generationInfo
    })
}

export function convertToGenerativeAITools(tools: GoogleGenerativeAIToolType[]): GoogleGenerativeAIFunctionDeclarationsTool[] {
    if (tools.every((tool) => 'functionDeclarations' in tool && Array.isArray(tool.functionDeclarations))) {
        return tools as GoogleGenerativeAIFunctionDeclarationsTool[]
    }
    return [
        {
            functionDeclarations: tools.map((tool): GenerativeAIFunctionDeclaration => {
                if (isLangChainTool(tool)) {
                    const jsonSchema = schemaToGenerativeAIParameters(tool.schema)
                    if (jsonSchema.type === 'object' && 'properties' in jsonSchema && Object.keys(jsonSchema.properties).length === 0) {
                        return {
                            name: tool.name,
                            description: tool.description
                        }
                    }
                    return {
                        name: tool.name,
                        description: tool.description,
                        parameters: jsonSchema
                    }
                }
                if (isOpenAITool(tool)) {
                    return {
                        name: tool.function.name,
                        description: tool.function.description ?? `A function available to call.`,
                        parameters: jsonSchemaToGeminiParameters(tool.function.parameters)
                    }
                }
                return tool as unknown as GenerativeAIFunctionDeclaration
            })
        }
    ]
}