import { flatten } from 'lodash'
import {
VectaraStore,
VectaraLibArgs,
VectaraFilter,
VectaraContextConfig,
VectaraFile,
MMRConfig
} from '@langchain/community/vectorstores/vectara'
import { Document } from '@langchain/core/documents'
import { Embeddings } from '@langchain/core/embeddings'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { getFileFromStorage } from '../../../src'
class Vectara_VectorStores implements INode {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
badge: string
baseClasses: string[]
inputs: INodeParams[]
credential: INodeParams
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Vectara'
this.name = 'vectara'
this.version = 2.0
this.type = 'Vectara'
this.icon = 'vectara.png'
this.category = 'Vector Stores'
this.description = 'Upsert embedded data and perform similarity search upon query using Vectara, a LLM-powered search-as-a-service'
this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever']
this.badge = 'NEW'
this.credential = {
label: 'Connect Credential',
name: 'credential',
type: 'credential',
credentialNames: ['vectaraApi']
}
this.inputs = [
{
label: 'Document',
name: 'document',
type: 'Document',
list: true,
optional: true
},
{
label: 'File',
name: 'file',
description:
'File to upload to Vectara. Supported file types: https://docs.vectara.com/docs/api-reference/indexing-apis/file-upload/file-upload-filetypes',
type: 'file',
optional: true
},
{
label: 'Metadata Filter',
name: 'filter',
description:
'Filter to apply to Vectara metadata. Refer to the documentation on how to use Vectara filters with Flowise.',
type: 'string',
additionalParams: true,
optional: true
},
{
label: 'Sentences Before',
name: 'sentencesBefore',
description: 'Number of sentences to fetch before the matched sentence. Defaults to 2.',
type: 'number',
default: 2,
additionalParams: true,
optional: true
},
{
label: 'Sentences After',
name: 'sentencesAfter',
description: 'Number of sentences to fetch after the matched sentence. Defaults to 2.',
type: 'number',
default: 2,
additionalParams: true,
optional: true
},
{
label: 'Lambda',
name: 'lambda',
description:
'Enable hybrid search to improve retrieval accuracy by adjusting the balance (from 0 to 1) between neural search and keyword-based search factors.' +
'A value of 0.0 means that only neural search is used, while a value of 1.0 means that only keyword-based search is used. Defaults to 0.0 (neural only).',
default: 0.0,
type: 'number',
additionalParams: true,
optional: true
},
{
label: 'Top K',
name: 'topK',
description: 'Number of top results to fetch. Defaults to 5',
placeholder: '5',
type: 'number',
additionalParams: true,
optional: true
},
{
label: 'MMR K',
name: 'mmrK',
description: 'Number of top results to fetch for MMR. Defaults to 50',
placeholder: '50',
type: 'number',
additionalParams: true,
optional: true
},
{
label: 'MMR diversity bias',
name: 'mmrDiversityBias',
step: 0.1,
description:
'The diversity bias to use for MMR. This is a value between 0.0 and 1.0' +
'Values closer to 1.0 optimize for the most diverse results.' +
'Defaults to 0 (MMR disabled)',
placeholder: '0.0',
type: 'number',
additionalParams: true,
optional: true
}
]
this.outputs = [
{
label: 'Vectara Retriever',
name: 'retriever',
baseClasses: this.baseClasses
},
{
label: 'Vectara Vector Store',
name: 'vectorStore',
baseClasses: [this.type, ...getBaseClasses(VectaraStore)]
}
]
}
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise> {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const apiKey = getCredentialParam('apiKey', credentialData, nodeData)
const customerId = getCredentialParam('customerID', credentialData, nodeData)
const corpusId = getCredentialParam('corpusID', credentialData, nodeData).split(',')
const docs = nodeData.inputs?.document as Document[]
const embeddings = {} as Embeddings
const vectaraMetadataFilter = nodeData.inputs?.filter as string
const sentencesBefore = nodeData.inputs?.sentencesBefore as number
const sentencesAfter = nodeData.inputs?.sentencesAfter as number
const lambda = nodeData.inputs?.lambda as number
const fileBase64 = nodeData.inputs?.file
const vectaraArgs: VectaraLibArgs = {
apiKey: apiKey,
customerId: customerId,
corpusId: corpusId,
source: 'flowise'
}
const vectaraFilter: VectaraFilter = {}
if (vectaraMetadataFilter) vectaraFilter.filter = vectaraMetadataFilter
if (lambda) vectaraFilter.lambda = lambda
const vectaraContextConfig: VectaraContextConfig = {}
if (sentencesBefore) vectaraContextConfig.sentencesBefore = sentencesBefore
if (sentencesAfter) vectaraContextConfig.sentencesAfter = sentencesAfter
vectaraFilter.contextConfig = vectaraContextConfig
const flattenDocs = docs && docs.length ? flatten(docs) : []
const finalDocs = []
for (let i = 0; i < flattenDocs.length; i += 1) {
if (flattenDocs[i] && flattenDocs[i].pageContent) {
finalDocs.push(new Document(flattenDocs[i]))
}
}
const vectaraFiles: VectaraFile[] = []
let files: string[] = []
if (fileBase64.startsWith('FILE-STORAGE::')) {
const fileName = fileBase64.replace('FILE-STORAGE::', '')
if (fileName.startsWith('[') && fileName.endsWith(']')) {
files = JSON.parse(fileName)
} else {
files = [fileName]
}
const chatflowid = options.chatflowid
for (const file of files) {
const fileData = await getFileFromStorage(file, chatflowid)
const blob = new Blob([fileData])
vectaraFiles.push({ blob: blob, fileName: getFileName(file) })
}
} else {
if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) {
files = JSON.parse(fileBase64)
} else {
files = [fileBase64]
}
for (const file of files) {
const splitDataURI = file.split(',')
splitDataURI.pop()
const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
const blob = new Blob([bf])
vectaraFiles.push({ blob: blob, fileName: getFileName(file) })
}
}
try {
if (finalDocs.length) await VectaraStore.fromDocuments(finalDocs, embeddings, vectaraArgs)
if (vectaraFiles.length) {
const vectorStore = new VectaraStore(vectaraArgs)
await vectorStore.addFiles(vectaraFiles)
}
return { numAdded: finalDocs.length, addedDocs: finalDocs }
} catch (e) {
throw new Error(e)
}
}
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise {
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const apiKey = getCredentialParam('apiKey', credentialData, nodeData)
const customerId = getCredentialParam('customerID', credentialData, nodeData)
const corpusId = getCredentialParam('corpusID', credentialData, nodeData).split(',')
const vectaraMetadataFilter = nodeData.inputs?.filter as string
const sentencesBefore = nodeData.inputs?.sentencesBefore as number
const sentencesAfter = nodeData.inputs?.sentencesAfter as number
const lambda = nodeData.inputs?.lambda as number
const output = nodeData.outputs?.output as string
const topK = nodeData.inputs?.topK as string
const k = topK ? parseFloat(topK) : 5
const mmrK = nodeData.inputs?.mmrK as number
const mmrDiversityBias = nodeData.inputs?.mmrDiversityBias as number
const vectaraArgs: VectaraLibArgs = {
apiKey: apiKey,
customerId: customerId,
corpusId: corpusId,
source: 'flowise'
}
const vectaraFilter: VectaraFilter = {}
if (vectaraMetadataFilter) vectaraFilter.filter = vectaraMetadataFilter
if (lambda) vectaraFilter.lambda = lambda
const vectaraContextConfig: VectaraContextConfig = {}
if (sentencesBefore) vectaraContextConfig.sentencesBefore = sentencesBefore
if (sentencesAfter) vectaraContextConfig.sentencesAfter = sentencesAfter
vectaraFilter.contextConfig = vectaraContextConfig
const mmrConfig: MMRConfig = {}
mmrConfig.enabled = mmrDiversityBias > 0
mmrConfig.mmrTopK = mmrK
mmrConfig.diversityBias = mmrDiversityBias
vectaraFilter.mmrConfig = mmrConfig
const vectorStore = new VectaraStore(vectaraArgs)
if (output === 'retriever') {
const retriever = vectorStore.asRetriever(k, vectaraFilter)
return retriever
} else if (output === 'vectorStore') {
;(vectorStore as any).k = k
if (vectaraMetadataFilter) {
;(vectorStore as any).filter = vectaraFilter.filter
}
return vectorStore
}
return vectorStore
}
}
const getFileName = (fileBase64: string) => {
let fileNames = []
if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) {
const files = JSON.parse(fileBase64)
for (const file of files) {
const splitDataURI = file.split(',')
const filename = splitDataURI[splitDataURI.length - 1].split(':')[1]
fileNames.push(filename)
}
return fileNames.join(', ')
} else {
const splitDataURI = fileBase64.split(',')
const filename = splitDataURI[splitDataURI.length - 1].split(':')[1]
return filename
}
}
module.exports = { nodeClass: Vectara_VectorStores }