Flowise/packages/components/nodes/vectorstores/Kendra/Kendra.ts

295 lines
11 KiB
TypeScript

import { flatten } from 'lodash'
import { AmazonKendraRetriever } from '@langchain/aws'
import { KendraClient, BatchPutDocumentCommand, BatchDeleteDocumentCommand } from '@aws-sdk/client-kendra'
import { Document } from '@langchain/core/documents'
import { ICommonObject, INode, INodeData, INodeOptionsValue, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { FLOWISE_CHATID, getCredentialData, getCredentialParam } from '../../../src/utils'
import { howToUseFileUpload } from '../VectorStoreUtils'
import { MODEL_TYPE, getRegions } from '../../../src/modelLoader'
class Kendra_VectorStores implements INode {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
badge: string
baseClasses: string[]
inputs: INodeParams[]
credential: INodeParams
outputs: INodeOutputsValue[]
constructor() {
this.label = 'AWS Kendra'
this.name = 'kendra'
this.version = 1.0
this.type = 'Kendra'
this.icon = 'kendra.svg'
this.category = 'Vector Stores'
this.description = `Use AWS Kendra's intelligent search service for document retrieval and semantic search`
this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever']
this.credential = {
label: 'AWS Credential',
name: 'credential',
type: 'credential',
credentialNames: ['awsApi'],
optional: true
}
this.inputs = [
{
label: 'Document',
name: 'document',
type: 'Document',
list: true,
optional: true
},
{
label: 'Region',
name: 'region',
type: 'asyncOptions',
loadMethod: 'listRegions',
default: 'us-east-1'
},
{
label: 'Kendra Index ID',
name: 'indexId',
type: 'string',
placeholder: 'xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx',
description: 'The ID of your AWS Kendra index'
},
{
label: 'File Upload',
name: 'fileUpload',
description: 'Allow file upload on the chat',
hint: {
label: 'How to use',
value: howToUseFileUpload
},
type: 'boolean',
additionalParams: true,
optional: true
},
{
label: 'Top K',
name: 'topK',
description: 'Number of top results to fetch. Default to 10',
placeholder: '10',
type: 'number',
additionalParams: true,
optional: true
},
{
label: 'Attribute Filter',
name: 'attributeFilter',
description: 'Optional filter to apply when retrieving documents',
type: 'json',
optional: true,
additionalParams: true,
acceptVariable: true
}
]
// Note: Kendra doesn't support MMR search, but keeping the structure consistent
this.outputs = [
{
label: 'Kendra Retriever',
name: 'retriever',
baseClasses: this.baseClasses
},
{
label: 'Kendra Vector Store',
name: 'vectorStore',
baseClasses: [this.type, 'BaseRetriever']
}
]
}
loadMethods = {
async listRegions(): Promise<INodeOptionsValue[]> {
return await getRegions(MODEL_TYPE.CHAT, 'awsChatBedrock')
}
}
//@ts-ignore
vectorStoreMethods = {
async upsert(nodeData: INodeData, options: ICommonObject): Promise<Partial<IndexingResult>> {
const indexId = nodeData.inputs?.indexId as string
const region = nodeData.inputs?.region as string
const docs = nodeData.inputs?.document as Document[]
const isFileUploadEnabled = nodeData.inputs?.fileUpload as boolean
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
let clientConfig: any = { region }
if (credentialData && Object.keys(credentialData).length !== 0) {
const accessKeyId = getCredentialParam('awsKey', credentialData, nodeData)
const secretAccessKey = getCredentialParam('awsSecret', credentialData, nodeData)
const sessionToken = getCredentialParam('awsSession', credentialData, nodeData)
if (accessKeyId && secretAccessKey) {
clientConfig.credentials = {
accessKeyId,
secretAccessKey,
...(sessionToken && { sessionToken })
}
}
}
const client = new KendraClient(clientConfig)
const flattenDocs = docs && docs.length ? flatten(docs) : []
const finalDocs = []
const kendraDocuments = []
for (let i = 0; i < flattenDocs.length; i += 1) {
if (flattenDocs[i] && flattenDocs[i].pageContent) {
if (isFileUploadEnabled && options.chatId) {
flattenDocs[i].metadata = { ...flattenDocs[i].metadata, [FLOWISE_CHATID]: options.chatId }
}
finalDocs.push(new Document(flattenDocs[i]))
// Prepare document for Kendra
const docId = `doc_${Date.now()}_${i}`
const docTitle = flattenDocs[i].metadata?.title || flattenDocs[i].metadata?.source || `Document ${i + 1}`
kendraDocuments.push({
Id: docId,
Title: docTitle,
Blob: new Uint8Array(Buffer.from(flattenDocs[i].pageContent, 'utf-8')),
ContentType: 'PLAIN_TEXT' as any
})
}
}
try {
if (kendraDocuments.length > 0) {
// Kendra has a limit of 10 documents per batch
const batchSize = 10
for (let i = 0; i < kendraDocuments.length; i += batchSize) {
const batch = kendraDocuments.slice(i, i + batchSize)
const command = new BatchPutDocumentCommand({
IndexId: indexId,
Documents: batch
})
const response = await client.send(command)
if (response.FailedDocuments && response.FailedDocuments.length > 0) {
console.error('Failed documents:', response.FailedDocuments)
throw new Error(`Failed to index some documents: ${JSON.stringify(response.FailedDocuments)}`)
}
}
}
return { numAdded: finalDocs.length, addedDocs: finalDocs }
} catch (error) {
throw new Error(`Failed to index documents to Kendra: ${error}`)
}
},
async delete(nodeData: INodeData, ids: string[], options: ICommonObject): Promise<void> {
const indexId = nodeData.inputs?.indexId as string
const region = nodeData.inputs?.region as string
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
let clientConfig: any = { region }
if (credentialData && Object.keys(credentialData).length !== 0) {
const accessKeyId = getCredentialParam('awsKey', credentialData, nodeData)
const secretAccessKey = getCredentialParam('awsSecret', credentialData, nodeData)
const sessionToken = getCredentialParam('awsSession', credentialData, nodeData)
if (accessKeyId && secretAccessKey) {
clientConfig.credentials = {
accessKeyId,
secretAccessKey,
...(sessionToken && { sessionToken })
}
}
}
const client = new KendraClient(clientConfig)
try {
// Kendra has a limit of 10 documents per batch delete
const batchSize = 10
for (let i = 0; i < ids.length; i += batchSize) {
const batch = ids.slice(i, i + batchSize)
const command = new BatchDeleteDocumentCommand({
IndexId: indexId,
DocumentIdList: batch
})
await client.send(command)
}
} catch (error) {
throw new Error(`Failed to delete documents from Kendra: ${error}`)
}
}
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
const indexId = nodeData.inputs?.indexId as string
const region = nodeData.inputs?.region as string
const topK = nodeData.inputs?.topK as string
const attributeFilter = nodeData.inputs?.attributeFilter
const isFileUploadEnabled = nodeData.inputs?.fileUpload as boolean
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
let clientOptions: any = {}
if (credentialData && Object.keys(credentialData).length !== 0) {
clientOptions.credentials = {
accessKeyId: getCredentialParam('awsKey', credentialData, nodeData),
secretAccessKey: getCredentialParam('awsSecret', credentialData, nodeData),
sessionToken: getCredentialParam('awsSession', credentialData, nodeData)
}
}
let filter = undefined
if (attributeFilter) {
filter = typeof attributeFilter === 'object' ? attributeFilter : JSON.parse(attributeFilter)
}
// Add chat-specific filtering if file upload is enabled
if (isFileUploadEnabled && options.chatId) {
if (!filter) {
filter = {}
}
filter.OrAllFilters = [
...(filter.OrAllFilters || []),
{
EqualsTo: {
Key: FLOWISE_CHATID,
Value: {
StringValue: options.chatId
}
}
}
]
}
const retriever = new AmazonKendraRetriever({
topK: topK ? parseInt(topK) : 10,
indexId,
region,
attributeFilter: filter,
clientOptions
})
const output = nodeData.outputs?.output as string
if (output === 'retriever') {
return retriever
} else if (output === 'vectorStore') {
// Kendra doesn't have a traditional vector store interface,
// but we can return the retriever with additional properties
;(retriever as any).k = topK ? parseInt(topK) : 10
;(retriever as any).filter = filter
return retriever
}
}
}
module.exports = { nodeClass: Kendra_VectorStores }