diff --git a/packages/components/nodes/chains/VisionChain/OpenAIVisionChain.ts b/packages/components/nodes/chains/VisionChain/OpenAIVisionChain.ts index 7745f05d7..6d19235c7 100644 --- a/packages/components/nodes/chains/VisionChain/OpenAIVisionChain.ts +++ b/packages/components/nodes/chains/VisionChain/OpenAIVisionChain.ts @@ -1,10 +1,8 @@ import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface' -import { getBaseClasses, handleEscapeCharacters } from '../../../src/utils' -import { VLLMChain } from './VLLMChain' -import { BaseLanguageModel } from 'langchain/base_language' +import { getBaseClasses, getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils' +import { OpenAIVisionChainInput, VLLMChain } from './VLLMChain' import { ConsoleCallbackHandler, CustomChainHandler, additionalCallbacks } from '../../../src/handler' import { formatResponse } from '../../outputparsers/OutputParserHelpers' -import { ChatOpenAI } from 'langchain/chat_models/openai' class OpenAIVisionChain_Chains implements INode { label: string @@ -18,6 +16,7 @@ class OpenAIVisionChain_Chains implements INode { description: string inputs: INodeParams[] outputs: INodeOutputsValue[] + credential: INodeParams constructor() { this.label = 'Open AI Vision Chain' @@ -26,14 +25,28 @@ class OpenAIVisionChain_Chains implements INode { this.type = 'OpenAIVisionChain' this.icon = 'chain.svg' this.category = 'Chains' - this.badge = 'EXPERIMENTAL' + this.badge = 'BETA' this.description = 'Chain to run queries against OpenAI (GPT-4) Vision .' this.baseClasses = [this.type, ...getBaseClasses(VLLMChain)] + this.credential = { + label: 'Connect Credential', + name: 'credential', + type: 'credential', + credentialNames: ['openAIApi'] + } this.inputs = [ { - label: 'Language Model (Works only with Open AI [gpt-4-vision-preview])', - name: 'model', - type: 'BaseLanguageModel' + label: 'Model Name', + name: 'modelName', + type: 'options', + options: [ + { + label: 'gpt-4-vision-preview', + name: 'gpt-4-vision-preview' + } + ], + default: 'gpt-4-vision-preview', + optional: true }, { label: 'Prompt', @@ -57,7 +70,33 @@ class OpenAIVisionChain_Chains implements INode { } ], default: 'low', - optional: false + optional: false, + additionalParams: true + }, + { + label: 'Temperature', + name: 'temperature', + type: 'number', + step: 0.1, + default: 0.9, + optional: true, + additionalParams: true + }, + { + label: 'Top Probability', + name: 'topP', + type: 'number', + step: 0.1, + optional: true, + additionalParams: true + }, + { + label: 'Max Tokens', + name: 'maxTokens', + type: 'number', + step: 1, + optional: true, + additionalParams: true }, { label: 'Chain Name', @@ -96,22 +135,26 @@ class OpenAIVisionChain_Chains implements INode { } async init(nodeData: INodeData, input: string, options: ICommonObject): Promise { - const model = nodeData.inputs?.model as BaseLanguageModel const prompt = nodeData.inputs?.prompt const output = nodeData.outputs?.output as string const imageResolution = nodeData.inputs?.imageResolution const promptValues = prompt.promptValues as ICommonObject - if (!(model as any).openAIApiKey || (model as any).modelName !== 'gpt-4-vision-preview') { - throw new Error('Chain works with OpenAI Vision model only') - } - const openAIModel = model as ChatOpenAI - const fields = { - openAIApiKey: openAIModel.openAIApiKey, + const credentialData = await getCredentialData(nodeData.credential ?? '', options) + const openAIApiKey = getCredentialParam('openAIApiKey', credentialData, nodeData) + const temperature = nodeData.inputs?.temperature as string + const modelName = nodeData.inputs?.modelName as string + const maxTokens = nodeData.inputs?.maxTokens as string + const topP = nodeData.inputs?.topP as string + const fields: OpenAIVisionChainInput = { + openAIApiKey: openAIApiKey, imageResolution: imageResolution, verbose: process.env.DEBUG === 'true', imageUrls: options.uploads, - openAIModel: openAIModel + modelName: modelName } + if (temperature) fields.temperature = parseFloat(temperature) + if (maxTokens) fields.maxTokens = parseInt(maxTokens, 10) + if (topP) fields.topP = parseFloat(topP) if (output === this.name) { const chain = new VLLMChain({ ...fields, diff --git a/packages/components/nodes/chains/VisionChain/VLLMChain.ts b/packages/components/nodes/chains/VisionChain/VLLMChain.ts index f9b92e535..2849cf631 100644 --- a/packages/components/nodes/chains/VisionChain/VLLMChain.ts +++ b/packages/components/nodes/chains/VisionChain/VLLMChain.ts @@ -2,7 +2,6 @@ import { OpenAI as OpenAIClient, ClientOptions } from 'openai' import { BaseChain, ChainInputs } from 'langchain/chains' import { ChainValues } from 'langchain/schema' import { BasePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate } from 'langchain/prompts' -import { ChatOpenAI } from 'langchain/chat_models/openai' import path from 'path' import { getUserHome } from '../../../src/utils' import fs from 'fs' @@ -18,7 +17,10 @@ export interface OpenAIVisionChainInput extends ChainInputs { configuration?: ClientOptions imageUrls?: [] imageResolution?: string - openAIModel: ChatOpenAI + temperature?: number + modelName?: string + maxTokens?: number + topP?: number } /** @@ -30,12 +32,6 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput { static lc_name() { return 'VLLMChain' } - - get lc_secrets(): { [key: string]: string } | undefined { - return { - openAIApiKey: 'OPENAI_API_KEY' - } - } prompt: BasePromptTemplate | undefined inputKey = 'input' @@ -44,10 +40,13 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput { imageResolution: string = 'low' openAIApiKey?: string openAIOrganization?: string - openAIModel: ChatOpenAI clientConfig: ClientOptions client: OpenAIClient throwError: boolean + temperature?: number + modelName?: string + maxTokens?: number + topP?: number constructor(fields: OpenAIVisionChainInput) { super(fields) @@ -55,13 +54,16 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput { this.imageResolution = fields?.imageResolution ?? 'low' this.openAIApiKey = fields?.openAIApiKey this.prompt = fields?.prompt + this.temperature = fields?.temperature + this.modelName = fields?.modelName + this.maxTokens = fields?.maxTokens + this.topP = fields?.topP this.imageUrls = fields?.imageUrls ?? [] if (!this.openAIApiKey) { throw new Error('OpenAI API key not found') } this.openAIOrganization = fields?.openAIOrganization - this.openAIModel = fields.openAIModel this.clientConfig = { ...fields?.configuration, @@ -76,12 +78,12 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput { const userInput = values[this.inputKey] const vRequest: any = { - model: 'gpt-4-vision-preview', - temperature: this.openAIModel.temperature, - top_p: this.openAIModel.topP, + model: this.modelName, + temperature: this.temperature, + top_p: this.topP, messages: [] } - if (this.openAIModel.maxTokens) vRequest.max_tokens = this.openAIModel.maxTokens + if (this.maxTokens) vRequest.max_tokens = this.maxTokens else vRequest.max_tokens = 1024 const userRole: any = { role: 'user' } @@ -94,7 +96,7 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput { this.imageUrls.forEach((imageUrl: any) => { let bf = imageUrl?.data if (imageUrl.type == 'stored-file') { - const filePath = path.join(getUserHome(), '.flowise', 'gptvision', imageUrl.data) + const filePath = path.join(getUserHome(), '.flowise', 'gptvision', imageUrl.data, imageUrl.name) // as the image is stored in the server, read the file and convert it to base64 const contents = fs.readFileSync(filePath) diff --git a/packages/server/src/index.ts b/packages/server/src/index.ts index 83b018d5d..84e76c6ee 100644 --- a/packages/server/src/index.ts +++ b/packages/server/src/index.ts @@ -972,12 +972,29 @@ export class App { } }) + function streamFileToUser(res: Response, filePath: string) { + const fileStream = fs.createReadStream(filePath) + fileStream.pipe(res) + } + // Download file from assistant this.app.post('/api/v1/openai-assistants-file', async (req: Request, res: Response) => { const filePath = path.join(getUserHome(), '.flowise', 'openai-assistant', req.body.fileName) res.setHeader('Content-Disposition', 'attachment; filename=' + path.basename(filePath)) - const fileStream = fs.createReadStream(filePath) - fileStream.pipe(res) + streamFileToUser(res, filePath) + }) + + // stream uploaded image + this.app.get('/api/v1/get-upload-file/:id', async (req: Request, res: Response) => { + if (!req.params.id || !req.query.chatId) { + return res.status(500).send(`Invalid file path`) + } + const filePath = path.join(getUserHome(), '.flowise', 'gptvision', req.query.chatId as string, req.params.id) + console.log(filePath) + if (!path.isAbsolute(filePath) || !fs.existsSync(filePath)) { + return res.status(500).send(`Invalid file path`) + } + streamFileToUser(res, filePath) }) // ---------------------------------------- @@ -1352,16 +1369,17 @@ export class App { ;(incomingInput.uploads as any[]).forEach((upload: any) => { if (upload.type === 'file') { const filename = upload.name - const filePath = path.join(getUserHome(), '.flowise', 'gptvision', filename) - if (!fs.existsSync(path.join(getUserHome(), '.flowise', 'gptvision'))) { - fs.mkdirSync(path.dirname(filePath), { recursive: true }) + const dir = path.join(getUserHome(), '.flowise', 'gptvision', chatId) + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }) } + const filePath = path.join(dir, filename) const splitDataURI = upload.data.split(',') - //const fname = splitDataURI.pop()?.split(':')[1] ?? '' const bf = Buffer.from(splitDataURI.pop() || '', 'base64') - if (!fs.existsSync(filePath)) fs.writeFileSync(filePath, bf) - // don't need to store the file contents in chatmessage, just the filename - upload.data = filename //bf.toString('base64') + //TODO: check if file exists, what should we do if it exists? + fs.writeFileSync(filePath, bf) + // don't need to store the file contents in chatmessage, just the filename and chatId + upload.data = chatId upload.type = 'stored-file' } }) diff --git a/packages/ui/src/views/chatmessage/ChatMessage.js b/packages/ui/src/views/chatmessage/ChatMessage.js index 92c736991..d2ff51d81 100644 --- a/packages/ui/src/views/chatmessage/ChatMessage.js +++ b/packages/ui/src/views/chatmessage/ChatMessage.js @@ -418,7 +418,14 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => { if (message.sourceDocuments) obj.sourceDocuments = JSON.parse(message.sourceDocuments) if (message.usedTools) obj.usedTools = JSON.parse(message.usedTools) if (message.fileAnnotations) obj.fileAnnotations = JSON.parse(message.fileAnnotations) - if (message.fileUploads) obj.fileUploads = JSON.parse(message.fileUploads) + if (message.fileUploads) { + obj.fileUploads = JSON.parse(message.fileUploads) + obj.fileUploads.forEach((file) => { + if (file.type === 'stored-file') { + file.data = `${baseURL}/api/v1/get-upload-file/${file.name}?chatId=${chatId}` + } + }) + } return obj }) setMessages((prevMessages) => [...prevMessages, ...loadedMessages])