diff --git a/packages/components/nodes/chains/VectaraChain/VectaraChain.ts b/packages/components/nodes/chains/VectaraChain/VectaraChain.ts index 143c6d5b7..3799d062f 100644 --- a/packages/components/nodes/chains/VectaraChain/VectaraChain.ts +++ b/packages/components/nodes/chains/VectaraChain/VectaraChain.ts @@ -5,6 +5,35 @@ import { Document } from 'langchain/document' import { VectaraStore } from 'langchain/vectorstores/vectara' import fetch from 'node-fetch' +// functionality based on https://github.com/vectara/vectara-answer +const reorderCitations = (unorderedSummary: string) => { + const allCitations = unorderedSummary.match(/\[\d+\]/g) || [] + + const uniqueCitations = [...new Set(allCitations)] + const citationToReplacement: { [key: string]: string } = {} + uniqueCitations.forEach((citation, index) => { + citationToReplacement[citation] = `[${index + 1}]` + }) + + return unorderedSummary.replace(/\[\d+\]/g, (match) => citationToReplacement[match]) +} +const applyCitationOrder = (searchResults: any[], unorderedSummary: string) => { + const orderedSearchResults: any[] = [] + const allCitations = unorderedSummary.match(/\[\d+\]/g) || [] + + const addedIndices = new Set() + for (let i = 0; i < allCitations.length; i++) { + const citation = allCitations[i] + const index = Number(citation.slice(1, citation.length - 1)) - 1 + + if (addedIndices.has(index)) continue + orderedSearchResults.push(searchResults[index]) + addedIndices.add(index) + } + + return orderedSearchResults +} + class VectaraChain_Chains implements INode { label: string name: string @@ -254,7 +283,7 @@ class VectaraChain_Chains implements INode { const result = await response.json() const responses = result.responseSet[0].response const documents = result.responseSet[0].document - let summarizedText = '' + let rawSummarizedText = '' for (let i = 0; i < responses.length; i += 1) { const responseMetadata = responses[i].metadata @@ -287,9 +316,12 @@ class VectaraChain_Chains implements INode { throw new Error(`BAD REQUEST: summarizer ${summarizerPromptName} is invalid for this account.`) } - summarizedText = result.responseSet[0].summary[0]?.text + rawSummarizedText = result.responseSet[0].summary[0]?.text - const sourceDocuments: Document[] = responses.map( + let summarizedText = reorderCitations(rawSummarizedText) + let summaryResponses = applyCitationOrder(responses, rawSummarizedText) + + const sourceDocuments: Document[] = summaryResponses.map( (response: { text: string; metadata: Record; score: number }) => new Document({ pageContent: response.text, diff --git a/packages/components/package.json b/packages/components/package.json index c7a29a9ff..1d4cea573 100644 --- a/packages/components/package.json +++ b/packages/components/package.json @@ -47,6 +47,7 @@ "google-auth-library": "^9.0.0", "graphql": "^16.6.0", "html-to-text": "^9.0.5", + "husky": "^8.0.3", "ioredis": "^5.3.2", "langchain": "^0.0.165", "langfuse-langchain": "^1.0.31", @@ -82,6 +83,9 @@ "@types/object-hash": "^3.0.2", "@types/pg": "^8.10.2", "@types/ws": "^8.5.3", + "eslint-plugin-markdown": "^3.0.1", + "eslint-plugin-react": "^7.33.2", + "eslint-plugin-react-hooks": "^4.6.0", "gulp": "^4.0.2", "typescript": "^4.8.4" }