GPT Vision: Vision Chain Node update along with addition of chatid folder on the server side when saving messages.

This commit is contained in:
vinodkiran 2023-12-07 22:32:07 +05:30
parent b492153f8a
commit 68fbe0ea12
4 changed files with 112 additions and 42 deletions

View File

@ -1,10 +1,8 @@
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface' import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { getBaseClasses, handleEscapeCharacters } from '../../../src/utils' import { getBaseClasses, getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
import { VLLMChain } from './VLLMChain' import { OpenAIVisionChainInput, VLLMChain } from './VLLMChain'
import { BaseLanguageModel } from 'langchain/base_language'
import { ConsoleCallbackHandler, CustomChainHandler, additionalCallbacks } from '../../../src/handler' import { ConsoleCallbackHandler, CustomChainHandler, additionalCallbacks } from '../../../src/handler'
import { formatResponse } from '../../outputparsers/OutputParserHelpers' import { formatResponse } from '../../outputparsers/OutputParserHelpers'
import { ChatOpenAI } from 'langchain/chat_models/openai'
class OpenAIVisionChain_Chains implements INode { class OpenAIVisionChain_Chains implements INode {
label: string label: string
@ -18,6 +16,7 @@ class OpenAIVisionChain_Chains implements INode {
description: string description: string
inputs: INodeParams[] inputs: INodeParams[]
outputs: INodeOutputsValue[] outputs: INodeOutputsValue[]
credential: INodeParams
constructor() { constructor() {
this.label = 'Open AI Vision Chain' this.label = 'Open AI Vision Chain'
@ -26,14 +25,28 @@ class OpenAIVisionChain_Chains implements INode {
this.type = 'OpenAIVisionChain' this.type = 'OpenAIVisionChain'
this.icon = 'chain.svg' this.icon = 'chain.svg'
this.category = 'Chains' this.category = 'Chains'
this.badge = 'EXPERIMENTAL' this.badge = 'BETA'
this.description = 'Chain to run queries against OpenAI (GPT-4) Vision .' this.description = 'Chain to run queries against OpenAI (GPT-4) Vision .'
this.baseClasses = [this.type, ...getBaseClasses(VLLMChain)] this.baseClasses = [this.type, ...getBaseClasses(VLLMChain)]
this.credential = {
label: 'Connect Credential',
name: 'credential',
type: 'credential',
credentialNames: ['openAIApi']
}
this.inputs = [ this.inputs = [
{ {
label: 'Language Model (Works only with Open AI [gpt-4-vision-preview])', label: 'Model Name',
name: 'model', name: 'modelName',
type: 'BaseLanguageModel' type: 'options',
options: [
{
label: 'gpt-4-vision-preview',
name: 'gpt-4-vision-preview'
}
],
default: 'gpt-4-vision-preview',
optional: true
}, },
{ {
label: 'Prompt', label: 'Prompt',
@ -57,7 +70,33 @@ class OpenAIVisionChain_Chains implements INode {
} }
], ],
default: 'low', default: 'low',
optional: false optional: false,
additionalParams: true
},
{
label: 'Temperature',
name: 'temperature',
type: 'number',
step: 0.1,
default: 0.9,
optional: true,
additionalParams: true
},
{
label: 'Top Probability',
name: 'topP',
type: 'number',
step: 0.1,
optional: true,
additionalParams: true
},
{
label: 'Max Tokens',
name: 'maxTokens',
type: 'number',
step: 1,
optional: true,
additionalParams: true
}, },
{ {
label: 'Chain Name', label: 'Chain Name',
@ -96,22 +135,26 @@ class OpenAIVisionChain_Chains implements INode {
} }
async init(nodeData: INodeData, input: string, options: ICommonObject): Promise<any> { async init(nodeData: INodeData, input: string, options: ICommonObject): Promise<any> {
const model = nodeData.inputs?.model as BaseLanguageModel
const prompt = nodeData.inputs?.prompt const prompt = nodeData.inputs?.prompt
const output = nodeData.outputs?.output as string const output = nodeData.outputs?.output as string
const imageResolution = nodeData.inputs?.imageResolution const imageResolution = nodeData.inputs?.imageResolution
const promptValues = prompt.promptValues as ICommonObject const promptValues = prompt.promptValues as ICommonObject
if (!(model as any).openAIApiKey || (model as any).modelName !== 'gpt-4-vision-preview') { const credentialData = await getCredentialData(nodeData.credential ?? '', options)
throw new Error('Chain works with OpenAI Vision model only') const openAIApiKey = getCredentialParam('openAIApiKey', credentialData, nodeData)
} const temperature = nodeData.inputs?.temperature as string
const openAIModel = model as ChatOpenAI const modelName = nodeData.inputs?.modelName as string
const fields = { const maxTokens = nodeData.inputs?.maxTokens as string
openAIApiKey: openAIModel.openAIApiKey, const topP = nodeData.inputs?.topP as string
const fields: OpenAIVisionChainInput = {
openAIApiKey: openAIApiKey,
imageResolution: imageResolution, imageResolution: imageResolution,
verbose: process.env.DEBUG === 'true', verbose: process.env.DEBUG === 'true',
imageUrls: options.uploads, imageUrls: options.uploads,
openAIModel: openAIModel modelName: modelName
} }
if (temperature) fields.temperature = parseFloat(temperature)
if (maxTokens) fields.maxTokens = parseInt(maxTokens, 10)
if (topP) fields.topP = parseFloat(topP)
if (output === this.name) { if (output === this.name) {
const chain = new VLLMChain({ const chain = new VLLMChain({
...fields, ...fields,

View File

@ -2,7 +2,6 @@ import { OpenAI as OpenAIClient, ClientOptions } from 'openai'
import { BaseChain, ChainInputs } from 'langchain/chains' import { BaseChain, ChainInputs } from 'langchain/chains'
import { ChainValues } from 'langchain/schema' import { ChainValues } from 'langchain/schema'
import { BasePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate } from 'langchain/prompts' import { BasePromptTemplate, ChatPromptTemplate, SystemMessagePromptTemplate } from 'langchain/prompts'
import { ChatOpenAI } from 'langchain/chat_models/openai'
import path from 'path' import path from 'path'
import { getUserHome } from '../../../src/utils' import { getUserHome } from '../../../src/utils'
import fs from 'fs' import fs from 'fs'
@ -18,7 +17,10 @@ export interface OpenAIVisionChainInput extends ChainInputs {
configuration?: ClientOptions configuration?: ClientOptions
imageUrls?: [] imageUrls?: []
imageResolution?: string imageResolution?: string
openAIModel: ChatOpenAI temperature?: number
modelName?: string
maxTokens?: number
topP?: number
} }
/** /**
@ -30,12 +32,6 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
static lc_name() { static lc_name() {
return 'VLLMChain' return 'VLLMChain'
} }
get lc_secrets(): { [key: string]: string } | undefined {
return {
openAIApiKey: 'OPENAI_API_KEY'
}
}
prompt: BasePromptTemplate | undefined prompt: BasePromptTemplate | undefined
inputKey = 'input' inputKey = 'input'
@ -44,10 +40,13 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
imageResolution: string = 'low' imageResolution: string = 'low'
openAIApiKey?: string openAIApiKey?: string
openAIOrganization?: string openAIOrganization?: string
openAIModel: ChatOpenAI
clientConfig: ClientOptions clientConfig: ClientOptions
client: OpenAIClient client: OpenAIClient
throwError: boolean throwError: boolean
temperature?: number
modelName?: string
maxTokens?: number
topP?: number
constructor(fields: OpenAIVisionChainInput) { constructor(fields: OpenAIVisionChainInput) {
super(fields) super(fields)
@ -55,13 +54,16 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
this.imageResolution = fields?.imageResolution ?? 'low' this.imageResolution = fields?.imageResolution ?? 'low'
this.openAIApiKey = fields?.openAIApiKey this.openAIApiKey = fields?.openAIApiKey
this.prompt = fields?.prompt this.prompt = fields?.prompt
this.temperature = fields?.temperature
this.modelName = fields?.modelName
this.maxTokens = fields?.maxTokens
this.topP = fields?.topP
this.imageUrls = fields?.imageUrls ?? [] this.imageUrls = fields?.imageUrls ?? []
if (!this.openAIApiKey) { if (!this.openAIApiKey) {
throw new Error('OpenAI API key not found') throw new Error('OpenAI API key not found')
} }
this.openAIOrganization = fields?.openAIOrganization this.openAIOrganization = fields?.openAIOrganization
this.openAIModel = fields.openAIModel
this.clientConfig = { this.clientConfig = {
...fields?.configuration, ...fields?.configuration,
@ -76,12 +78,12 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
const userInput = values[this.inputKey] const userInput = values[this.inputKey]
const vRequest: any = { const vRequest: any = {
model: 'gpt-4-vision-preview', model: this.modelName,
temperature: this.openAIModel.temperature, temperature: this.temperature,
top_p: this.openAIModel.topP, top_p: this.topP,
messages: [] messages: []
} }
if (this.openAIModel.maxTokens) vRequest.max_tokens = this.openAIModel.maxTokens if (this.maxTokens) vRequest.max_tokens = this.maxTokens
else vRequest.max_tokens = 1024 else vRequest.max_tokens = 1024
const userRole: any = { role: 'user' } const userRole: any = { role: 'user' }
@ -94,7 +96,7 @@ export class VLLMChain extends BaseChain implements OpenAIVisionChainInput {
this.imageUrls.forEach((imageUrl: any) => { this.imageUrls.forEach((imageUrl: any) => {
let bf = imageUrl?.data let bf = imageUrl?.data
if (imageUrl.type == 'stored-file') { if (imageUrl.type == 'stored-file') {
const filePath = path.join(getUserHome(), '.flowise', 'gptvision', imageUrl.data) const filePath = path.join(getUserHome(), '.flowise', 'gptvision', imageUrl.data, imageUrl.name)
// as the image is stored in the server, read the file and convert it to base64 // as the image is stored in the server, read the file and convert it to base64
const contents = fs.readFileSync(filePath) const contents = fs.readFileSync(filePath)

View File

@ -972,12 +972,29 @@ export class App {
} }
}) })
function streamFileToUser(res: Response, filePath: string) {
const fileStream = fs.createReadStream(filePath)
fileStream.pipe(res)
}
// Download file from assistant // Download file from assistant
this.app.post('/api/v1/openai-assistants-file', async (req: Request, res: Response) => { this.app.post('/api/v1/openai-assistants-file', async (req: Request, res: Response) => {
const filePath = path.join(getUserHome(), '.flowise', 'openai-assistant', req.body.fileName) const filePath = path.join(getUserHome(), '.flowise', 'openai-assistant', req.body.fileName)
res.setHeader('Content-Disposition', 'attachment; filename=' + path.basename(filePath)) res.setHeader('Content-Disposition', 'attachment; filename=' + path.basename(filePath))
const fileStream = fs.createReadStream(filePath) streamFileToUser(res, filePath)
fileStream.pipe(res) })
// stream uploaded image
this.app.get('/api/v1/get-upload-file/:id', async (req: Request, res: Response) => {
if (!req.params.id || !req.query.chatId) {
return res.status(500).send(`Invalid file path`)
}
const filePath = path.join(getUserHome(), '.flowise', 'gptvision', req.query.chatId as string, req.params.id)
console.log(filePath)
if (!path.isAbsolute(filePath) || !fs.existsSync(filePath)) {
return res.status(500).send(`Invalid file path`)
}
streamFileToUser(res, filePath)
}) })
// ---------------------------------------- // ----------------------------------------
@ -1352,16 +1369,17 @@ export class App {
;(incomingInput.uploads as any[]).forEach((upload: any) => { ;(incomingInput.uploads as any[]).forEach((upload: any) => {
if (upload.type === 'file') { if (upload.type === 'file') {
const filename = upload.name const filename = upload.name
const filePath = path.join(getUserHome(), '.flowise', 'gptvision', filename) const dir = path.join(getUserHome(), '.flowise', 'gptvision', chatId)
if (!fs.existsSync(path.join(getUserHome(), '.flowise', 'gptvision'))) { if (!fs.existsSync(dir)) {
fs.mkdirSync(path.dirname(filePath), { recursive: true }) fs.mkdirSync(dir, { recursive: true })
} }
const filePath = path.join(dir, filename)
const splitDataURI = upload.data.split(',') const splitDataURI = upload.data.split(',')
//const fname = splitDataURI.pop()?.split(':')[1] ?? ''
const bf = Buffer.from(splitDataURI.pop() || '', 'base64') const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
if (!fs.existsSync(filePath)) fs.writeFileSync(filePath, bf) //TODO: check if file exists, what should we do if it exists?
// don't need to store the file contents in chatmessage, just the filename fs.writeFileSync(filePath, bf)
upload.data = filename //bf.toString('base64') // don't need to store the file contents in chatmessage, just the filename and chatId
upload.data = chatId
upload.type = 'stored-file' upload.type = 'stored-file'
} }
}) })

View File

@ -418,7 +418,14 @@ export const ChatMessage = ({ open, chatflowid, isDialog }) => {
if (message.sourceDocuments) obj.sourceDocuments = JSON.parse(message.sourceDocuments) if (message.sourceDocuments) obj.sourceDocuments = JSON.parse(message.sourceDocuments)
if (message.usedTools) obj.usedTools = JSON.parse(message.usedTools) if (message.usedTools) obj.usedTools = JSON.parse(message.usedTools)
if (message.fileAnnotations) obj.fileAnnotations = JSON.parse(message.fileAnnotations) if (message.fileAnnotations) obj.fileAnnotations = JSON.parse(message.fileAnnotations)
if (message.fileUploads) obj.fileUploads = JSON.parse(message.fileUploads) if (message.fileUploads) {
obj.fileUploads = JSON.parse(message.fileUploads)
obj.fileUploads.forEach((file) => {
if (file.type === 'stored-file') {
file.data = `${baseURL}/api/v1/get-upload-file/${file.name}?chatId=${chatId}`
}
})
}
return obj return obj
}) })
setMessages((prevMessages) => [...prevMessages, ...loadedMessages]) setMessages((prevMessages) => [...prevMessages, ...loadedMessages])