GPT Vision: Vision Chain Node update along with addition of chatid folder on the server side when saving messages.

2023-12-07 22:32:07 +05:30 · 2023-12-07 22:32:07 +05:30 · 68fbe0ea12
parent b492153f8a
commit 68fbe0ea12
4 changed files with 112 additions and 42 deletions
--- a/packages/components/nodes/chains/VisionChain/OpenAIVisionChain.ts
+++ b/packages/components/nodes/chains/VisionChain/OpenAIVisionChain.ts
@ -1,10 +1,8 @@
 import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
-import { getBaseClasses, handleEscapeCharacters } from '../../../src/utils'
-import { VLLMChain } from './VLLMChain'
-import { BaseLanguageModel } from 'langchain/base_language'
+import { getBaseClasses, getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
+import { OpenAIVisionChainInput, VLLMChain } from './VLLMChain'
 import { ConsoleCallbackHandler, CustomChainHandler, additionalCallbacks } from '../../../src/handler'
 import { formatResponse } from '../../outputparsers/OutputParserHelpers'
-import { ChatOpenAI } from 'langchain/chat_models/openai'

 class OpenAIVisionChain_Chains implements INode {
    label: string
@ -18,6 +16,7 @@ class OpenAIVisionChain_Chains implements INode {
    description: string
    inputs: INodeParams[]
    outputs: INodeOutputsValue[]
+    credential: INodeParams

    constructor() {
        this.label = 'Open AI Vision Chain'
@ -26,14 +25,28 @@ class OpenAIVisionChain_Chains implements INode {
        this.type = 'OpenAIVisionChain'
        this.icon = 'chain.svg'
        this.category = 'Chains'
-        this.badge = 'EXPERIMENTAL'
+        this.badge = 'BETA'
        this.description = 'Chain to run queries against OpenAI (GPT-4) Vision .'
        this.baseClasses = [this.type, ...getBaseClasses(VLLMChain)]
+        this.credential = {
+            label: 'Connect Credential',
+            name: 'credential',
+            type: 'credential',
+            credentialNames: ['openAIApi']
+        }
        this.inputs = [
            {
-                label: 'Language Model (Works only with Open AI [gpt-4-vision-preview])',
-                name: 'model',
-                type: 'BaseLanguageModel'
+                label: 'Model Name',
+                name: 'modelName',
+                type: 'options',
+                options: [
+                    {
+                        label: 'gpt-4-vision-preview',
+                        name: 'gpt-4-vision-preview'
+                    }
+                ],
+                default: 'gpt-4-vision-preview',
+                optional: true
            },
            {
                label: 'Prompt',
@ -57,7 +70,33 @@ class OpenAIVisionChain_Chains implements INode {
                    }
                ],
                default: 'low',
-                optional: false
+                optional: false,
+                additionalParams: true
+            },
+            {
+                label: 'Temperature',
+                name: 'temperature',
+                type: 'number',
+                step: 0.1,
+                default: 0.9,
+                optional: true,
+                additionalParams: true
+            },
+            {
+                label: 'Top Probability',
+                name: 'topP',
+                type: 'number',
+                step: 0.1,
+                optional: true,
+                additionalParams: true
+            },
+            {
+                label: 'Max Tokens',
+                name: 'maxTokens',
+                type: 'number',
+                step: 1,
+                optional: true,
+                additionalParams: true
            },
            {
                label: 'Chain Name',
@ -96,22 +135,26 @@ class OpenAIVisionChain_Chains implements INode {
    }

    async init(nodeData: INodeData, input: string, options: ICommonObject): Promise<any> {
-        const model = nodeData.inputs?.model as BaseLanguageModel
        const prompt = nodeData.inputs?.prompt
        const output = nodeData.outputs?.output as string
        const imageResolution = nodeData.inputs?.imageResolution
        const promptValues = prompt.promptValues as ICommonObject
-        if (!(model as any).openAIApiKey || (model as any).modelName !== 'gpt-4-vision-preview') {
-            throw new Error('Chain works with OpenAI Vision model only')
-        }
-        const openAIModel = model as ChatOpenAI
-        const fields = {
-            openAIApiKey: openAIModel.openAIApiKey,
+        const credentialData = await getCredentialData(nodeData.credential ?? '', options)
+        const openAIApiKey = getCredentialParam('openAIApiKey', credentialData, nodeData)
+        const temperature = nodeData.inputs?.temperature as string
+        const modelName = nodeData.inputs?.modelName as string
+        const maxTokens = nodeData.inputs?.maxTokens as string
+        const topP = nodeData.inputs?.topP as string
+        const fields: OpenAIVisionChainInput = {
+            openAIApiKey: openAIApiKey,
            imageResolution: imageResolution,
            verbose: process.env.DEBUG === 'true',
            imageUrls: options.uploads,
-            openAIModel: openAIModel
+            modelName: modelName
        }
+        if (temperature) fields.temperature = parseFloat(temperature)
+        if (maxTokens) fields.maxTokens = parseInt(maxTokens, 10)
+        if (topP) fields.topP = parseFloat(topP)
        if (output === this.name) {
            const chain = new VLLMChain({
                ...fields,
--- a/packages/components/nodes/chains/VisionChain/VLLMChain.ts
+++ b/packages/components/nodes/chains/VisionChain/VLLMChain.ts
@ -2,7 +2,6 @@ import { OpenAI as OpenAIClient, ClientOptions } from 'openai'
 import { BaseChain, ChainInputs } from 'langchain/chains'
 import { ChainValues } from 'langchain/schema'
 import { BasePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate } from 'langchain/prompts'
-import { ChatOpenAI } from 'langchain/chat_models/openai'
 import path from 'path'
 import { getUserHome } from '../../../src/utils'
 import fs from 'fs'
@ -18,7 +17,10 @@ export interface OpenAIVisionChainInput extends ChainInputs {
    configuration?: ClientOptions
    imageUrls?: []
    imageResolution?: string
-    openAIModel: ChatOpenAI
+    temperature?: number
+    modelName?: string
+    maxTokens?: number
+    topP?: number
 }

 /**
@ -30,12 +32,6 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
    static lc_name() {
        return 'VLLMChain'
    }
-
-    get lc_secrets(): { [key: string]: string } | undefined {
-        return {
-            openAIApiKey: 'OPENAI_API_KEY'
-        }
-    }
    prompt: BasePromptTemplate | undefined

    inputKey = 'input'
@ -44,10 +40,13 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
    imageResolution: string = 'low'
    openAIApiKey?: string
    openAIOrganization?: string
-    openAIModel: ChatOpenAI
    clientConfig: ClientOptions
    client: OpenAIClient
    throwError: boolean
+    temperature?: number
+    modelName?: string
+    maxTokens?: number
+    topP?: number

    constructor(fields: OpenAIVisionChainInput) {
        super(fields)
@ -55,13 +54,16 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
        this.imageResolution = fields?.imageResolution ?? 'low'
        this.openAIApiKey = fields?.openAIApiKey
        this.prompt = fields?.prompt
+        this.temperature = fields?.temperature
+        this.modelName = fields?.modelName
+        this.maxTokens = fields?.maxTokens
+        this.topP = fields?.topP
        this.imageUrls = fields?.imageUrls ?? []
        if (!this.openAIApiKey) {
            throw new Error('OpenAI API key not found')
        }

        this.openAIOrganization = fields?.openAIOrganization
-        this.openAIModel = fields.openAIModel

        this.clientConfig = {
            ...fields?.configuration,
@ -76,12 +78,12 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
        const userInput = values[this.inputKey]

        const vRequest: any = {
-            model: 'gpt-4-vision-preview',
-            temperature: this.openAIModel.temperature,
-            top_p: this.openAIModel.topP,
+            model: this.modelName,
+            temperature: this.temperature,
+            top_p: this.topP,
            messages: []
        }
-        if (this.openAIModel.maxTokens) vRequest.max_tokens = this.openAIModel.maxTokens
+        if (this.maxTokens) vRequest.max_tokens = this.maxTokens
        else vRequest.max_tokens = 1024

        const userRole: any = { role: 'user' }
@ -94,7 +96,7 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
            this.imageUrls.forEach((imageUrl: any) => {
                let bf = imageUrl?.data
                if (imageUrl.type == 'stored-file') {
-                    const filePath = path.join(getUserHome(), '.flowise', 'gptvision', imageUrl.data)
+                    const filePath = path.join(getUserHome(), '.flowise', 'gptvision', imageUrl.data, imageUrl.name)

                    // as the image is stored in the server, read the file and convert it to base64
                    const contents = fs.readFileSync(filePath)
--- a/packages/server/src/index.ts
+++ b/packages/server/src/index.ts
@ -972,12 +972,29 @@ export class App {
            }
        })

+        function streamFileToUser(res: Response, filePath: string) {
+            const fileStream = fs.createReadStream(filePath)
+            fileStream.pipe(res)
+        }
+
        // Download file from assistant
        this.app.post('/api/v1/openai-assistants-file', async (req: Request, res: Response) => {
            const filePath = path.join(getUserHome(), '.flowise', 'openai-assistant', req.body.fileName)
            res.setHeader('Content-Disposition', 'attachment; filename=' + path.basename(filePath))
-            const fileStream = fs.createReadStream(filePath)
-            fileStream.pipe(res)
+            streamFileToUser(res, filePath)
+        })
+
+        // stream uploaded image
+        this.app.get('/api/v1/get-upload-file/:id', async (req: Request, res: Response) => {
+            if (!req.params.id || !req.query.chatId) {
+                return res.status(500).send(`Invalid file path`)
+            }
+            const filePath = path.join(getUserHome(), '.flowise', 'gptvision', req.query.chatId as string, req.params.id)
+            console.log(filePath)
+            if (!path.isAbsolute(filePath) || !fs.existsSync(filePath)) {
+                return res.status(500).send(`Invalid file path`)
+            }
+            streamFileToUser(res, filePath)
        })

        // ----------------------------------------
@ -1352,16 +1369,17 @@ export class App {
                ;(incomingInput.uploads as any[]).forEach((upload: any) => {
                    if (upload.type === 'file') {
                        const filename = upload.name
-                        const filePath = path.join(getUserHome(), '.flowise', 'gptvision', filename)
-                        if (!fs.existsSync(path.join(getUserHome(), '.flowise', 'gptvision'))) {
-                            fs.mkdirSync(path.dirname(filePath), { recursive: true })
+                        const dir = path.join(getUserHome(), '.flowise', 'gptvision', chatId)
+                        if (!fs.existsSync(dir)) {
+                            fs.mkdirSync(dir, { recursive: true })
                        }
+                        const filePath = path.join(dir, filename)
                        const splitDataURI = upload.data.split(',')
-                        //const fname = splitDataURI.pop()?.split(':')[1] ?? ''
                        const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
-                        if (!fs.existsSync(filePath)) fs.writeFileSync(filePath, bf)
-                        // don't need to store the file contents in chatmessage, just the filename
-                        upload.data = filename //bf.toString('base64')
+                        //TODO: check if file exists, what should we do if it exists?
+                        fs.writeFileSync(filePath, bf)
+                        // don't need to store the file contents in chatmessage, just the filename and chatId
+                        upload.data = chatId
                        upload.type = 'stored-file'
                    }
                })
--- a/packages/ui/src/views/chatmessage/ChatMessage.js
+++ b/packages/ui/src/views/chatmessage/ChatMessage.js
@ -418,7 +418,14 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => {
                if (message.sourceDocuments) obj.sourceDocuments = JSON.parse(message.sourceDocuments)
                if (message.usedTools) obj.usedTools = JSON.parse(message.usedTools)
                if (message.fileAnnotations) obj.fileAnnotations = JSON.parse(message.fileAnnotations)
-                if (message.fileUploads) obj.fileUploads = JSON.parse(message.fileUploads)
+                if (message.fileUploads) {
+                    obj.fileUploads = JSON.parse(message.fileUploads)
+                    obj.fileUploads.forEach((file) => {
+                        if (file.type === 'stored-file') {
+                            file.data = `${baseURL}/api/v1/get-upload-file/${file.name}?chatId=${chatId}`
+                        }
+                    })
+                }
                return obj
            })
            setMessages((prevMessages) => [...prevMessages, ...loadedMessages])