Merge pull request #913 from vectara/vectara-upload-files

Add Vectara upload file component
This commit is contained in:
Henry Heng 2023-09-15 16:00:41 +01:00 committed by GitHub
commit b0e6d4ad77
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 335 additions and 222 deletions

View File

@ -1,7 +1,7 @@
import { INode, INodeData, INodeParams } from '../../../src/Interface'
import { BabyAGI } from './core'
import { BaseChatModel } from 'langchain/chat_models/base'
import { VectorStore } from 'langchain/vectorstores'
import { VectorStore } from 'langchain/vectorstores/base'
class BabyAGI_Agents implements INode {
label: string

View File

@ -2,7 +2,7 @@ import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Inter
import { getBaseClasses } from '../../../src/utils'
import { VectorDBQAChain } from 'langchain/chains'
import { BaseLanguageModel } from 'langchain/base_language'
import { VectorStore } from 'langchain/vectorstores'
import { VectorStore } from 'langchain/vectorstores/base'
import { ConsoleCallbackHandler, CustomChainHandler, additionalCallbacks } from '../../../src/handler'
class VectorDBQAChain_Chains implements INode {

View File

@ -92,7 +92,7 @@ class VectaraExisting_VectorStores implements INode {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const apiKey = getCredentialParam('apiKey', credentialData, nodeData)
const customerId = getCredentialParam('customerID', credentialData, nodeData)
const corpusId = getCredentialParam('corpusID', credentialData, nodeData)
const corpusId = getCredentialParam('corpusID', credentialData, nodeData).split(',')
const vectaraMetadataFilter = nodeData.inputs?.filter as string
const sentencesBefore = nodeData.inputs?.sentencesBefore as number

View File

@ -0,0 +1,176 @@
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { VectaraStore, VectaraLibArgs, VectaraFilter, VectaraContextConfig, VectaraFile } from 'langchain/vectorstores/vectara'
class VectaraUpload_VectorStores implements INode {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
baseClasses: string[]
inputs: INodeParams[]
credential: INodeParams
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Vectara Upload File'
this.name = 'vectaraUpload'
this.version = 1.0
this.type = 'Vectara'
this.icon = 'vectara.png'
this.category = 'Vector Stores'
this.description = 'Upload files to Vectara'
this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever']
this.credential = {
label: 'Connect Credential',
name: 'credential',
type: 'credential',
credentialNames: ['vectaraApi']
}
this.inputs = [
{
label: 'File',
name: 'file',
description:
'File to upload to Vectara. Supported file types: https://docs.vectara.com/docs/api-reference/indexing-apis/file-upload/file-upload-filetypes',
type: 'file'
},
{
label: 'Vectara Metadata Filter',
name: 'filter',
description:
'Filter to apply to Vectara metadata. Refer to the <a target="_blank" href="https://docs.flowiseai.com/vector-stores/vectara">documentation</a> on how to use Vectara filters with Flowise.',
type: 'string',
additionalParams: true,
optional: true
},
{
label: 'Sentences Before',
name: 'sentencesBefore',
description: 'Number of sentences to fetch before the matched sentence. Defaults to 2.',
type: 'number',
additionalParams: true,
optional: true
},
{
label: 'Sentences After',
name: 'sentencesAfter',
description: 'Number of sentences to fetch after the matched sentence. Defaults to 2.',
type: 'number',
additionalParams: true,
optional: true
},
{
label: 'Lambda',
name: 'lambda',
description:
'Improves retrieval accuracy by adjusting the balance (from 0 to 1) between neural search and keyword-based search factors.',
type: 'number',
additionalParams: true,
optional: true
},
{
label: 'Top K',
name: 'topK',
description: 'Number of top results to fetch. Defaults to 4',
placeholder: '4',
type: 'number',
additionalParams: true,
optional: true
}
]
this.outputs = [
{
label: 'Vectara Retriever',
name: 'retriever',
baseClasses: this.baseClasses
},
{
label: 'Vectara Vector Store',
name: 'vectorStore',
baseClasses: [this.type, ...getBaseClasses(VectaraStore)]
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const apiKey = getCredentialParam('apiKey', credentialData, nodeData)
const customerId = getCredentialParam('customerID', credentialData, nodeData)
const corpusId = getCredentialParam('corpusID', credentialData, nodeData).split(',')
const fileBase64 = nodeData.inputs?.file
const vectaraMetadataFilter = nodeData.inputs?.filter as string
const sentencesBefore = nodeData.inputs?.sentencesBefore as number
const sentencesAfter = nodeData.inputs?.sentencesAfter as number
const lambda = nodeData.inputs?.lambda as number
const output = nodeData.outputs?.output as string
const topK = nodeData.inputs?.topK as string
const k = topK ? parseInt(topK, 10) : 4
const vectaraArgs: VectaraLibArgs = {
apiKey: apiKey,
customerId: customerId,
corpusId: corpusId
}
const vectaraFilter: VectaraFilter = {}
if (vectaraMetadataFilter) vectaraFilter.filter = vectaraMetadataFilter
if (lambda) vectaraFilter.lambda = lambda
const vectaraContextConfig: VectaraContextConfig = {}
if (sentencesBefore) vectaraContextConfig.sentencesBefore = sentencesBefore
if (sentencesAfter) vectaraContextConfig.sentencesAfter = sentencesAfter
vectaraFilter.contextConfig = vectaraContextConfig
let files: string[] = []
if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) {
files = JSON.parse(fileBase64)
} else {
files = [fileBase64]
}
const vectaraFiles: VectaraFile[] = []
for (const file of files) {
const splitDataURI = file.split(',')
splitDataURI.pop()
const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
const blob = new Blob([bf])
vectaraFiles.push({ blob: blob, fileName: getFileName(file) })
}
const vectorStore = new VectaraStore(vectaraArgs)
await vectorStore.addFiles(vectaraFiles)
if (output === 'retriever') {
const retriever = vectorStore.asRetriever(k, vectaraFilter)
return retriever
} else if (output === 'vectorStore') {
;(vectorStore as any).k = k
return vectorStore
}
return vectorStore
}
}
const getFileName = (fileBase64: string) => {
let fileNames = []
if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) {
const files = JSON.parse(fileBase64)
for (const file of files) {
const splitDataURI = file.split(',')
const filename = splitDataURI[splitDataURI.length - 1].split(':')[1]
fileNames.push(filename)
}
return fileNames.join(', ')
} else {
const splitDataURI = fileBase64.split(',')
const filename = splitDataURI[splitDataURI.length - 1].split(':')[1]
return filename
}
}
module.exports = { nodeClass: VectaraUpload_VectorStores }

View File

@ -101,7 +101,7 @@ class VectaraUpsert_VectorStores implements INode {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const apiKey = getCredentialParam('apiKey', credentialData, nodeData)
const customerId = getCredentialParam('customerID', credentialData, nodeData)
const corpusId = getCredentialParam('corpusID', credentialData, nodeData)
const corpusId = getCredentialParam('corpusID', credentialData, nodeData).split(',')
const docs = nodeData.inputs?.document as Document[]
const embeddings = {} as Embeddings

View File

Before

Width:  |  Height:  |  Size: 66 KiB

After

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 66 KiB

View File

@ -42,7 +42,7 @@
"google-auth-library": "^9.0.0",
"graphql": "^16.6.0",
"html-to-text": "^9.0.5",
"langchain": "^0.0.145",
"langchain": "^0.0.147",
"langfuse-langchain": "^1.0.14-alpha.0",
"langsmith": "^0.0.32",
"linkifyjs": "^4.1.1",

View File

@ -1,11 +1,125 @@
{
"description": "A simple LLM chain that uses Vectara to enable conversations with uploaded documents",
"description": "A simple LLM chain that uses Vectara to enable conversations with uploaded files",
"nodes": [
{
"width": 300,
"height": 524,
"id": "vectaraUpload_0",
"position": { "x": 219.0098475967174, "y": 189.74396248534583 },
"type": "customNode",
"data": {
"id": "vectaraUpload_0",
"label": "Vectara Upload File",
"version": 1,
"name": "vectaraUpload",
"type": "Vectara",
"baseClasses": ["Vectara", "VectorStoreRetriever", "BaseRetriever"],
"category": "Vector Stores",
"description": "Upload files to Vectara",
"inputParams": [
{
"label": "Connect Credential",
"name": "credential",
"type": "credential",
"credentialNames": ["vectaraApi"],
"id": "vectaraUpload_0-input-credential-credential"
},
{
"label": "File",
"name": "file",
"description": "File to upload to Vectara. Supported file types: https://docs.vectara.com/docs/api-reference/indexing-apis/file-upload/file-upload-filetypes",
"type": "file",
"id": "vectaraUpload_0-input-file-file"
},
{
"label": "Vectara Metadata Filter",
"name": "filter",
"description": "Filter to apply to Vectara metadata. Refer to the <a target=\"_blank\" href=\"https://docs.flowiseai.com/vector-stores/vectara\">documentation</a> on how to use Vectara filters with Flowise.",
"type": "string",
"additionalParams": true,
"optional": true,
"id": "vectaraUpload_0-input-filter-string"
},
{
"label": "Sentences Before",
"name": "sentencesBefore",
"description": "Number of sentences to fetch before the matched sentence. Defaults to 2.",
"type": "number",
"additionalParams": true,
"optional": true,
"id": "vectaraUpload_0-input-sentencesBefore-number"
},
{
"label": "Sentences After",
"name": "sentencesAfter",
"description": "Number of sentences to fetch after the matched sentence. Defaults to 2.",
"type": "number",
"additionalParams": true,
"optional": true,
"id": "vectaraUpload_0-input-sentencesAfter-number"
},
{
"label": "Lambda",
"name": "lambda",
"description": "Improves retrieval accuracy by adjusting the balance (from 0 to 1) between neural search and keyword-based search factors.",
"type": "number",
"additionalParams": true,
"optional": true,
"id": "vectaraUpload_0-input-lambda-number"
},
{
"label": "Top K",
"name": "topK",
"description": "Number of top results to fetch. Defaults to 4",
"placeholder": "4",
"type": "number",
"additionalParams": true,
"optional": true,
"id": "vectaraUpload_0-input-topK-number"
}
],
"inputAnchors": [],
"inputs": {
"filter": "",
"sentencesBefore": "",
"sentencesAfter": "",
"lambda": "",
"topK": ""
},
"outputAnchors": [
{
"name": "output",
"label": "Output",
"type": "options",
"options": [
{
"id": "vectaraUpload_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever",
"name": "retriever",
"label": "Vectara Retriever",
"type": "Vectara | VectorStoreRetriever | BaseRetriever"
},
{
"id": "vectaraUpload_0-output-vectorStore-Vectara|VectorStore",
"name": "vectorStore",
"label": "Vectara Vector Store",
"type": "Vectara | VectorStore"
}
],
"default": "retriever"
}
],
"outputs": { "output": "retriever" },
"selected": false
},
"selected": false,
"positionAbsolute": { "x": 219.0098475967174, "y": 189.74396248534583 },
"dragging": false
},
{
"width": 300,
"height": 525,
"id": "chatOpenAI_0",
"position": { "x": 514.1088940275924, "y": 199.574479681537 },
"position": { "x": 669.6533996522251, "y": 177.86181519287192 },
"type": "customNode",
"data": {
"id": "chatOpenAI_0",
@ -13,7 +127,7 @@
"version": 1,
"name": "chatOpenAI",
"type": "ChatOpenAI",
"baseClasses": ["ChatOpenAI", "BaseChatModel", "BaseLanguageModel"],
"baseClasses": ["ChatOpenAI", "BaseChatModel", "BaseLanguageModel", "Runnable"],
"category": "Chat Models",
"description": "Wrapper around OpenAI large language models that use the Chat endpoint",
"inputParams": [
@ -36,7 +150,10 @@
{ "label": "gpt-3.5-turbo", "name": "gpt-3.5-turbo" },
{ "label": "gpt-3.5-turbo-0613", "name": "gpt-3.5-turbo-0613" },
{ "label": "gpt-3.5-turbo-16k", "name": "gpt-3.5-turbo-16k" },
{ "label": "gpt-3.5-turbo-16k-0613", "name": "gpt-3.5-turbo-16k-0613" }
{
"label": "gpt-3.5-turbo-16k-0613",
"name": "gpt-3.5-turbo-16k-0613"
}
],
"default": "gpt-3.5-turbo",
"optional": true,
@ -103,6 +220,14 @@
"optional": true,
"additionalParams": true,
"id": "chatOpenAI_0-input-basepath-string"
},
{
"label": "BaseOptions",
"name": "baseOptions",
"type": "json",
"optional": true,
"additionalParams": true,
"id": "chatOpenAI_0-input-baseOptions-json"
}
],
"inputAnchors": [],
@ -114,28 +239,29 @@
"frequencyPenalty": "",
"presencePenalty": "",
"timeout": "",
"basepath": ""
"basepath": "",
"baseOptions": ""
},
"outputAnchors": [
{
"id": "chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel",
"id": "chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel|Runnable",
"name": "chatOpenAI",
"label": "ChatOpenAI",
"type": "ChatOpenAI | BaseChatModel | BaseLanguageModel"
"type": "ChatOpenAI | BaseChatModel | BaseLanguageModel | Runnable"
}
],
"outputs": {},
"selected": false
},
"selected": false,
"positionAbsolute": { "x": 514.1088940275924, "y": 199.574479681537 },
"positionAbsolute": { "x": 669.6533996522251, "y": 177.86181519287192 },
"dragging": false
},
{
"width": 300,
"height": 481,
"id": "conversationalRetrievalQAChain_0",
"position": { "x": 900.4793407261002, "y": 205.9476004518217 },
"position": { "x": 1135.5490908971935, "y": 201.62146241822506 },
"type": "customNode",
"data": {
"id": "conversationalRetrievalQAChain_0",
@ -143,7 +269,7 @@
"version": 1,
"name": "conversationalRetrievalQAChain",
"type": "ConversationalRetrievalQAChain",
"baseClasses": ["ConversationalRetrievalQAChain", "BaseChain"],
"baseClasses": ["ConversationalRetrievalQAChain", "BaseChain", "Runnable"],
"category": "Chains",
"description": "Document QA - built on RetrievalQAChain to provide a chat history component",
"inputParams": [
@ -214,234 +340,45 @@
],
"inputs": {
"model": "{{chatOpenAI_0.data.instance}}",
"vectorStoreRetriever": "{{vectaraUpsert_0.data.instance}}",
"vectorStoreRetriever": "{{vectaraUpload_0.data.instance}}",
"memory": "",
"returnSourceDocuments": "",
"returnSourceDocuments": true,
"systemMessagePrompt": "",
"chainOption": ""
},
"outputAnchors": [
{
"id": "conversationalRetrievalQAChain_0-output-conversationalRetrievalQAChain-ConversationalRetrievalQAChain|BaseChain",
"id": "conversationalRetrievalQAChain_0-output-conversationalRetrievalQAChain-ConversationalRetrievalQAChain|BaseChain|Runnable",
"name": "conversationalRetrievalQAChain",
"label": "ConversationalRetrievalQAChain",
"type": "ConversationalRetrievalQAChain | BaseChain"
"type": "ConversationalRetrievalQAChain | BaseChain | Runnable"
}
],
"outputs": {},
"selected": false
},
"selected": false,
"positionAbsolute": { "x": 900.4793407261002, "y": 205.9476004518217 },
"dragging": false
},
{
"width": 300,
"height": 509,
"id": "pdfFile_0",
"position": { "x": -210.44158723479913, "y": 236.6627524951051 },
"type": "customNode",
"data": {
"id": "pdfFile_0",
"label": "Pdf File",
"version": 1,
"name": "pdfFile",
"type": "Document",
"baseClasses": ["Document"],
"category": "Document Loaders",
"description": "Load data from PDF files",
"inputParams": [
{ "label": "Pdf File", "name": "pdfFile", "type": "file", "fileType": ".pdf", "id": "pdfFile_0-input-pdfFile-file" },
{
"label": "Usage",
"name": "usage",
"type": "options",
"options": [
{ "label": "One document per page", "name": "perPage" },
{ "label": "One document per file", "name": "perFile" }
],
"default": "perPage",
"id": "pdfFile_0-input-usage-options"
},
{
"label": "Use Legacy Build",
"name": "legacyBuild",
"type": "boolean",
"optional": true,
"additionalParams": true,
"id": "pdfFile_0-input-legacyBuild-boolean"
},
{
"label": "Metadata",
"name": "metadata",
"type": "json",
"optional": true,
"additionalParams": true,
"id": "pdfFile_0-input-metadata-json"
}
],
"inputAnchors": [
{
"label": "Text Splitter",
"name": "textSplitter",
"type": "TextSplitter",
"optional": true,
"id": "pdfFile_0-input-textSplitter-TextSplitter"
}
],
"inputs": { "textSplitter": "", "usage": "perPage", "legacyBuild": "", "metadata": "" },
"outputAnchors": [
{ "id": "pdfFile_0-output-pdfFile-Document", "name": "pdfFile", "label": "Document", "type": "Document" }
],
"outputs": {},
"selected": false
},
"selected": false,
"positionAbsolute": { "x": -210.44158723479913, "y": 236.6627524951051 },
"dragging": false
},
{
"width": 300,
"height": 408,
"id": "vectaraUpsert_0",
"position": { "x": 172.06946164914868, "y": 373.11406233089934 },
"type": "customNode",
"data": {
"id": "vectaraUpsert_0",
"label": "Vectara Upsert Document",
"version": 1,
"name": "vectaraUpsert",
"type": "Vectara",
"baseClasses": ["Vectara", "VectorStoreRetriever", "BaseRetriever"],
"category": "Vector Stores",
"description": "Upsert documents to Vectara",
"inputParams": [
{
"label": "Connect Credential",
"name": "credential",
"type": "credential",
"credentialNames": ["vectaraApi"],
"id": "vectaraUpsert_0-input-credential-credential"
},
{
"label": "Vectara Metadata Filter",
"name": "filter",
"description": "Filter to apply to Vectara metadata. Refer to the <a target=\"_blank\" href=\"https://docs.flowiseai.com/vector-stores/vectara\">documentation</a> on how to use Vectara filters with Flowise.",
"type": "string",
"additionalParams": true,
"optional": true,
"id": "vectaraUpsert_0-input-filter-string"
},
{
"label": "Sentences Before",
"name": "sentencesBefore",
"description": "Number of sentences to fetch before the matched sentence. Defaults to 2.",
"type": "number",
"additionalParams": true,
"optional": true,
"id": "vectaraUpsert_0-input-sentencesBefore-number"
},
{
"label": "Sentences After",
"name": "sentencesAfter",
"description": "Number of sentences to fetch after the matched sentence. Defaults to 2.",
"type": "number",
"additionalParams": true,
"optional": true,
"id": "vectaraUpsert_0-input-sentencesAfter-number"
},
{
"label": "Lambda",
"name": "lambda",
"description": "Improves retrieval accuracy by adjusting the balance (from 0 to 1) between neural search and keyword-based search factors.",
"type": "number",
"additionalParams": true,
"optional": true,
"id": "vectaraUpsert_0-input-lambda-number"
},
{
"label": "Top K",
"name": "topK",
"description": "Number of top results to fetch. Defaults to 4",
"placeholder": "4",
"type": "number",
"additionalParams": true,
"optional": true,
"id": "vectaraUpsert_0-input-topK-number"
}
],
"inputAnchors": [
{
"label": "Document",
"name": "document",
"type": "Document",
"list": true,
"id": "vectaraUpsert_0-input-document-Document"
}
],
"inputs": {
"document": ["{{pdfFile_0.data.instance}}"],
"filter": "",
"sentencesBefore": "",
"sentencesAfter": "",
"lambda": "",
"topK": ""
},
"outputAnchors": [
{
"name": "output",
"label": "Output",
"type": "options",
"options": [
{
"id": "vectaraUpsert_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever",
"name": "retriever",
"label": "Vectara Retriever",
"type": "Vectara | VectorStoreRetriever | BaseRetriever"
},
{
"id": "vectaraUpsert_0-output-vectorStore-Vectara|VectorStore",
"name": "vectorStore",
"label": "Vectara Vector Store",
"type": "Vectara | VectorStore"
}
],
"default": "retriever"
}
],
"outputs": { "output": "retriever" },
"selected": false
},
"positionAbsolute": { "x": 172.06946164914868, "y": 373.11406233089934 },
"selected": false
"dragging": false,
"positionAbsolute": { "x": 1135.5490908971935, "y": 201.62146241822506 }
}
],
"edges": [
{
"source": "chatOpenAI_0",
"sourceHandle": "chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel",
"target": "conversationalRetrievalQAChain_0",
"targetHandle": "conversationalRetrievalQAChain_0-input-model-BaseLanguageModel",
"type": "buttonedge",
"id": "chatOpenAI_0-chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel-conversationalRetrievalQAChain_0-conversationalRetrievalQAChain_0-input-model-BaseLanguageModel",
"data": { "label": "" }
},
{
"source": "pdfFile_0",
"sourceHandle": "pdfFile_0-output-pdfFile-Document",
"target": "vectaraUpsert_0",
"targetHandle": "vectaraUpsert_0-input-document-Document",
"type": "buttonedge",
"id": "pdfFile_0-pdfFile_0-output-pdfFile-Document-vectaraUpsert_0-vectaraUpsert_0-input-document-Document",
"data": { "label": "" }
},
{
"source": "vectaraUpsert_0",
"sourceHandle": "vectaraUpsert_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever",
"source": "vectaraUpload_0",
"sourceHandle": "vectaraUpload_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever",
"target": "conversationalRetrievalQAChain_0",
"targetHandle": "conversationalRetrievalQAChain_0-input-vectorStoreRetriever-BaseRetriever",
"type": "buttonedge",
"id": "vectaraUpsert_0-vectaraUpsert_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever-conversationalRetrievalQAChain_0-conversationalRetrievalQAChain_0-input-vectorStoreRetriever-BaseRetriever",
"id": "vectaraUpload_0-vectaraUpload_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever-conversationalRetrievalQAChain_0-conversationalRetrievalQAChain_0-input-vectorStoreRetriever-BaseRetriever",
"data": { "label": "" }
},
{
"source": "chatOpenAI_0",
"sourceHandle": "chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel|Runnable",
"target": "conversationalRetrievalQAChain_0",
"targetHandle": "conversationalRetrievalQAChain_0-input-model-BaseLanguageModel",
"type": "buttonedge",
"id": "chatOpenAI_0-chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel|Runnable-conversationalRetrievalQAChain_0-conversationalRetrievalQAChain_0-input-model-BaseLanguageModel",
"data": { "label": "" }
}
]