Flowise/packages/components/nodes/chatmodels/ChatGoogleGenerativeAI/FlowiseChatGoogleGenerative...

582 lines
20 KiB
TypeScript

import { BaseMessage, AIMessage, AIMessageChunk, isBaseMessage, ChatMessage, MessageContent } from '@langchain/core/messages'
import { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager'
import { BaseChatModel, type BaseChatModelParams } from '@langchain/core/language_models/chat_models'
import { ChatGeneration, ChatGenerationChunk, ChatResult } from '@langchain/core/outputs'
import { ToolCall } from '@langchain/core/messages/tool'
import { NewTokenIndices } from '@langchain/core/callbacks/base'
import {
EnhancedGenerateContentResponse,
Content,
Part,
Tool,
GenerativeModel,
GoogleGenerativeAI as GenerativeAI
} from '@google/generative-ai'
import type { SafetySetting } from '@google/generative-ai'
import { ICommonObject, IMultiModalOption, IVisionChatModal } from '../../../src'
import { StructuredToolInterface } from '@langchain/core/tools'
import { isStructuredTool } from '@langchain/core/utils/function_calling'
import { zodToJsonSchema } from 'zod-to-json-schema'
interface TokenUsage {
completionTokens?: number
promptTokens?: number
totalTokens?: number
}
export interface GoogleGenerativeAIChatInput extends BaseChatModelParams {
modelName?: string
model?: string
temperature?: number
maxOutputTokens?: number
topP?: number
topK?: number
stopSequences?: string[]
safetySettings?: SafetySetting[]
apiKey?: string
streaming?: boolean
}
class LangchainChatGoogleGenerativeAI extends BaseChatModel implements GoogleGenerativeAIChatInput {
modelName = 'gemini-pro'
temperature?: number
maxOutputTokens?: number
topP?: number
topK?: number
stopSequences: string[] = []
safetySettings?: SafetySetting[]
apiKey?: string
streaming = false
private client: GenerativeModel
get _isMultimodalModel() {
return this.modelName.includes('vision') || this.modelName.startsWith('gemini-1.5')
}
constructor(fields?: GoogleGenerativeAIChatInput) {
super(fields ?? {})
this.modelName = fields?.model?.replace(/^models\//, '') ?? fields?.modelName?.replace(/^models\//, '') ?? 'gemini-pro'
this.maxOutputTokens = fields?.maxOutputTokens ?? this.maxOutputTokens
if (this.maxOutputTokens && this.maxOutputTokens < 0) {
throw new Error('`maxOutputTokens` must be a positive integer')
}
this.temperature = fields?.temperature ?? this.temperature
if (this.temperature && (this.temperature < 0 || this.temperature > 1)) {
throw new Error('`temperature` must be in the range of [0.0,1.0]')
}
this.topP = fields?.topP ?? this.topP
if (this.topP && this.topP < 0) {
throw new Error('`topP` must be a positive integer')
}
if (this.topP && this.topP > 1) {
throw new Error('`topP` must be below 1.')
}
this.topK = fields?.topK ?? this.topK
if (this.topK && this.topK < 0) {
throw new Error('`topK` must be a positive integer')
}
this.stopSequences = fields?.stopSequences ?? this.stopSequences
this.apiKey = fields?.apiKey ?? process.env['GOOGLE_API_KEY']
if (!this.apiKey) {
throw new Error(
'Please set an API key for Google GenerativeAI ' +
'in the environment variable GOOGLE_API_KEY ' +
'or in the `apiKey` field of the ' +
'ChatGoogleGenerativeAI constructor'
)
}
this.safetySettings = fields?.safetySettings ?? this.safetySettings
if (this.safetySettings && this.safetySettings.length > 0) {
const safetySettingsSet = new Set(this.safetySettings.map((s) => s.category))
if (safetySettingsSet.size !== this.safetySettings.length) {
throw new Error('The categories in `safetySettings` array must be unique')
}
}
this.streaming = fields?.streaming ?? this.streaming
this.getClient()
}
getClient(tools?: Tool[]) {
this.client = new GenerativeAI(this.apiKey ?? '').getGenerativeModel({
model: this.modelName,
tools,
safetySettings: this.safetySettings as SafetySetting[],
generationConfig: {
candidateCount: 1,
stopSequences: this.stopSequences,
maxOutputTokens: this.maxOutputTokens,
temperature: this.temperature,
topP: this.topP,
topK: this.topK
}
})
}
_combineLLMOutput() {
return []
}
_llmType() {
return 'googlegenerativeai'
}
override bindTools(tools: (StructuredToolInterface | Record<string, unknown>)[], kwargs?: Partial<ICommonObject>) {
//@ts-ignore
return this.bind({ tools: convertToGeminiTools(tools), ...kwargs })
}
convertFunctionResponse(prompts: Content[]) {
for (let i = 0; i < prompts.length; i += 1) {
if (prompts[i].role === 'function') {
if (prompts[i - 1].role === 'model') {
const toolName = prompts[i - 1].parts[0].functionCall?.name ?? ''
prompts[i].parts = [
{
functionResponse: {
name: toolName,
response: {
name: toolName,
content: prompts[i].parts[0].text
}
}
}
]
}
}
}
}
async _generateNonStreaming(
prompt: Content[],
options: this['ParsedCallOptions'],
_runManager?: CallbackManagerForLLMRun
): Promise<ChatResult> {
//@ts-ignore
const tools = options.tools ?? []
this.convertFunctionResponse(prompt)
if (tools.length > 0) {
this.getClient(tools)
} else {
this.getClient()
}
const res = await this.caller.callWithOptions({ signal: options?.signal }, async () => {
let output
try {
output = await this.client.generateContent({
contents: prompt
})
} catch (e: any) {
if (e.message?.includes('400 Bad Request')) {
e.status = 400
}
throw e
}
return output
})
const generationResult = mapGenerateContentResultToChatResult(res.response)
await _runManager?.handleLLMNewToken(generationResult.generations?.length ? generationResult.generations[0].text : '')
return generationResult
}
async _generate(
messages: BaseMessage[],
options: this['ParsedCallOptions'],
runManager?: CallbackManagerForLLMRun
): Promise<ChatResult> {
let prompt = convertBaseMessagesToContent(messages, this._isMultimodalModel)
prompt = checkIfEmptyContentAndSameRole(prompt)
// Handle streaming
if (this.streaming) {
const tokenUsage: TokenUsage = {}
const stream = this._streamResponseChunks(messages, options, runManager)
const finalChunks: Record<number, ChatGenerationChunk> = {}
for await (const chunk of stream) {
const index = (chunk.generationInfo as NewTokenIndices)?.completion ?? 0
if (finalChunks[index] === undefined) {
finalChunks[index] = chunk
} else {
finalChunks[index] = finalChunks[index].concat(chunk)
}
}
const generations = Object.entries(finalChunks)
.sort(([aKey], [bKey]) => parseInt(aKey, 10) - parseInt(bKey, 10))
.map(([_, value]) => value)
return { generations, llmOutput: { estimatedTokenUsage: tokenUsage } }
}
return this._generateNonStreaming(prompt, options, runManager)
}
async *_streamResponseChunks(
messages: BaseMessage[],
options: this['ParsedCallOptions'],
runManager?: CallbackManagerForLLMRun
): AsyncGenerator<ChatGenerationChunk> {
let prompt = convertBaseMessagesToContent(messages, this._isMultimodalModel)
prompt = checkIfEmptyContentAndSameRole(prompt)
//@ts-ignore
if (options.tools !== undefined && options.tools.length > 0) {
const result = await this._generateNonStreaming(prompt, options, runManager)
const generationMessage = result.generations[0].message as AIMessage
if (generationMessage === undefined) {
throw new Error('Could not parse Groq output.')
}
const toolCallChunks = generationMessage.tool_calls?.map((toolCall, i) => ({
name: toolCall.name,
args: JSON.stringify(toolCall.args),
id: toolCall.id,
index: i
}))
yield new ChatGenerationChunk({
message: new AIMessageChunk({
content: generationMessage.content,
additional_kwargs: generationMessage.additional_kwargs,
tool_call_chunks: toolCallChunks
}),
text: generationMessage.tool_calls?.length ? '' : (generationMessage.content as string)
})
} else {
const stream = await this.caller.callWithOptions({ signal: options?.signal }, async () => {
this.getClient()
const { stream } = await this.client.generateContentStream({
contents: prompt
})
return stream
})
for await (const response of stream) {
const chunk = convertResponseContentToChatGenerationChunk(response)
if (!chunk) {
continue
}
yield chunk
await runManager?.handleLLMNewToken(chunk.text ?? '')
}
}
}
}
export class ChatGoogleGenerativeAI extends LangchainChatGoogleGenerativeAI implements IVisionChatModal {
configuredModel: string
configuredMaxToken?: number
multiModalOption: IMultiModalOption
id: string
constructor(id: string, fields?: GoogleGenerativeAIChatInput) {
super(fields)
this.id = id
this.configuredModel = fields?.modelName ?? ''
this.configuredMaxToken = fields?.maxOutputTokens
}
revertToOriginalModel(): void {
super.modelName = this.configuredModel
super.maxOutputTokens = this.configuredMaxToken
}
setMultiModalOption(multiModalOption: IMultiModalOption): void {
this.multiModalOption = multiModalOption
}
setVisionModel(): void {
if (this.modelName !== 'gemini-pro-vision' && this.modelName !== 'gemini-1.5-pro-latest') {
super.modelName = 'gemini-1.5-pro-latest'
super.maxOutputTokens = this.configuredMaxToken ? this.configuredMaxToken : 8192
}
}
}
function getMessageAuthor(message: BaseMessage) {
const type = message._getType()
if (ChatMessage.isInstance(message)) {
return message.role
}
return message.name ?? type
}
function convertAuthorToRole(author: string) {
switch (author) {
/**
* Note: Gemini currently is not supporting system messages
* we will convert them to human messages and merge with following
* */
case 'ai':
case 'model': // getMessageAuthor returns message.name. code ex.: return message.name ?? type;
return 'model'
case 'system':
case 'human':
return 'user'
case 'function':
case 'tool':
return 'function'
default:
// Instead of throwing, we return model
// throw new Error(`Unknown / unsupported author: ${author}`)
return 'model'
}
}
function convertMessageContentToParts(content: MessageContent, isMultimodalModel: boolean): Part[] {
if (typeof content === 'string') {
return [{ text: content }]
}
return content.map((c) => {
if (c.type === 'text') {
return {
text: c.text
}
}
if (c.type === 'tool_use') {
return {
functionCall: c.functionCall
}
}
/*if (c.type === "tool_use" || c.type === "tool_result") {
// TODO: Fix when SDK types are fixed
return {
...contentPart,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} as any;
}*/
if (c.type === 'image_url') {
if (!isMultimodalModel) {
throw new Error(`This model does not support images`)
}
let source
if (typeof c.image_url === 'string') {
source = c.image_url
} else if (typeof c.image_url === 'object' && 'url' in c.image_url) {
source = c.image_url.url
} else {
throw new Error('Please provide image as base64 encoded data URL')
}
const [dm, data] = source.split(',')
if (!dm.startsWith('data:')) {
throw new Error('Please provide image as base64 encoded data URL')
}
const [mimeType, encoding] = dm.replace(/^data:/, '').split(';')
if (encoding !== 'base64') {
throw new Error('Please provide image as base64 encoded data URL')
}
return {
inlineData: {
data,
mimeType
}
}
}
throw new Error(`Unknown content type ${(c as { type: string }).type}`)
})
}
/*
* This is a dedicated logic for Multi Agent Supervisor to handle the case where the content is empty, and the role is the same
*/
function checkIfEmptyContentAndSameRole(contents: Content[]) {
let prevRole = ''
const removedContents: Content[] = []
for (const content of contents) {
const role = content.role
if (content.parts.length && content.parts[0].text === '' && role === prevRole) {
removedContents.push(content)
}
prevRole = role
}
return contents.filter((content) => !removedContents.includes(content))
}
function convertBaseMessagesToContent(messages: BaseMessage[], isMultimodalModel: boolean) {
return messages.reduce<{
content: Content[]
mergeWithPreviousContent: boolean
}>(
(acc, message, index) => {
if (!isBaseMessage(message)) {
throw new Error('Unsupported message input')
}
const author = getMessageAuthor(message)
if (author === 'system' && index !== 0) {
throw new Error('System message should be the first one')
}
const role = convertAuthorToRole(author)
const prevContent = acc.content[acc.content.length]
if (!acc.mergeWithPreviousContent && prevContent && prevContent.role === role) {
throw new Error('Google Generative AI requires alternate messages between authors')
}
const parts = convertMessageContentToParts(message.content, isMultimodalModel)
if (acc.mergeWithPreviousContent) {
const prevContent = acc.content[acc.content.length - 1]
if (!prevContent) {
throw new Error('There was a problem parsing your system message. Please try a prompt without one.')
}
prevContent.parts.push(...parts)
return {
mergeWithPreviousContent: false,
content: acc.content
}
}
const content: Content = {
role,
parts
}
return {
mergeWithPreviousContent: author === 'system',
content: [...acc.content, content]
}
},
{ content: [], mergeWithPreviousContent: false }
).content
}
function mapGenerateContentResultToChatResult(response: EnhancedGenerateContentResponse): ChatResult {
// if rejected or error, return empty generations with reason in filters
if (!response.candidates || response.candidates.length === 0 || !response.candidates[0]) {
return {
generations: [],
llmOutput: {
filters: response?.promptFeedback
}
}
}
const [candidate] = response.candidates
const { content, ...generationInfo } = candidate
const text = content.parts.map(({ text }) => text).join('')
if (content.parts.some((part) => part.functionCall)) {
const toolCalls: ToolCall[] = []
for (const fcPart of content.parts) {
const fc = fcPart.functionCall
if (fc) {
const { name, args } = fc
toolCalls.push({ name, args })
}
}
const functionCalls = toolCalls.map((tool) => {
return { functionCall: { name: tool.name, args: tool.args }, type: 'tool_use' }
})
const generation: ChatGeneration = {
text,
message: new AIMessage({
content: functionCalls,
name: !content ? undefined : content.role,
additional_kwargs: generationInfo,
tool_calls: toolCalls
}),
generationInfo
}
return {
generations: [generation]
}
} else {
const generation: ChatGeneration = {
text,
message: new AIMessage({
content: text,
name: !content ? undefined : content.role,
additional_kwargs: generationInfo
}),
generationInfo
}
return {
generations: [generation]
}
}
}
function convertResponseContentToChatGenerationChunk(response: EnhancedGenerateContentResponse): ChatGenerationChunk | null {
if (!response.candidates || response.candidates.length === 0) {
return null
}
const [candidate] = response.candidates
const { content, ...generationInfo } = candidate
const text = content?.parts[0]?.text ?? ''
return new ChatGenerationChunk({
text,
message: new AIMessageChunk({
content: text,
name: !content ? undefined : content.role,
// Each chunk can have unique "generationInfo", and merging strategy is unclear,
// so leave blank for now.
additional_kwargs: {}
}),
generationInfo
})
}
function zodToGeminiParameters(zodObj: any) {
// Gemini doesn't accept either the $schema or additionalProperties
// attributes, so we need to explicitly remove them.
const jsonSchema: any = zodToJsonSchema(zodObj)
// eslint-disable-next-line unused-imports/no-unused-vars
const { $schema, additionalProperties, ...rest } = jsonSchema
if (rest.properties) {
Object.keys(rest.properties).forEach((key) => {
if (rest.properties[key].enum?.length) {
rest.properties[key] = { type: 'string', format: 'enum', enum: rest.properties[key].enum }
}
})
}
return rest
}
function convertToGeminiTools(structuredTools: (StructuredToolInterface | Record<string, unknown>)[]) {
return [
{
functionDeclarations: structuredTools.map((structuredTool) => {
if (isStructuredTool(structuredTool)) {
const jsonSchema = zodToGeminiParameters(structuredTool.schema)
return {
name: structuredTool.name,
description: structuredTool.description,
parameters: jsonSchema
}
}
return structuredTool
})
}
]
}