Merge pull request #1261 from vectara/vectara_renumber_citations

reorder citations in Vectara response
This commit is contained in:
Henry Heng 2023-11-21 17:51:27 +00:00 committed by GitHub
commit e4f2b78e9a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 39 additions and 3 deletions

View File

@ -5,6 +5,35 @@ import { Document } from 'langchain/document'
import { VectaraStore } from 'langchain/vectorstores/vectara' import { VectaraStore } from 'langchain/vectorstores/vectara'
import fetch from 'node-fetch' import fetch from 'node-fetch'
// functionality based on https://github.com/vectara/vectara-answer
const reorderCitations = (unorderedSummary: string) => {
const allCitations = unorderedSummary.match(/\[\d+\]/g) || []
const uniqueCitations = [...new Set(allCitations)]
const citationToReplacement: { [key: string]: string } = {}
uniqueCitations.forEach((citation, index) => {
citationToReplacement[citation] = `[${index + 1}]`
})
return unorderedSummary.replace(/\[\d+\]/g, (match) => citationToReplacement[match])
}
const applyCitationOrder = (searchResults: any[], unorderedSummary: string) => {
const orderedSearchResults: any[] = []
const allCitations = unorderedSummary.match(/\[\d+\]/g) || []
const addedIndices = new Set<number>()
for (let i = 0; i < allCitations.length; i++) {
const citation = allCitations[i]
const index = Number(citation.slice(1, citation.length - 1)) - 1
if (addedIndices.has(index)) continue
orderedSearchResults.push(searchResults[index])
addedIndices.add(index)
}
return orderedSearchResults
}
class VectaraChain_Chains implements INode { class VectaraChain_Chains implements INode {
label: string label: string
name: string name: string
@ -254,7 +283,7 @@ class VectaraChain_Chains implements INode {
const result = await response.json() const result = await response.json()
const responses = result.responseSet[0].response const responses = result.responseSet[0].response
const documents = result.responseSet[0].document const documents = result.responseSet[0].document
let summarizedText = '' let rawSummarizedText = ''
for (let i = 0; i < responses.length; i += 1) { for (let i = 0; i < responses.length; i += 1) {
const responseMetadata = responses[i].metadata const responseMetadata = responses[i].metadata
@ -287,9 +316,12 @@ class VectaraChain_Chains implements INode {
throw new Error(`BAD REQUEST: summarizer ${summarizerPromptName} is invalid for this account.`) throw new Error(`BAD REQUEST: summarizer ${summarizerPromptName} is invalid for this account.`)
} }
summarizedText = result.responseSet[0].summary[0]?.text rawSummarizedText = result.responseSet[0].summary[0]?.text
const sourceDocuments: Document[] = responses.map( let summarizedText = reorderCitations(rawSummarizedText)
let summaryResponses = applyCitationOrder(responses, rawSummarizedText)
const sourceDocuments: Document[] = summaryResponses.map(
(response: { text: string; metadata: Record<string, unknown>; score: number }) => (response: { text: string; metadata: Record<string, unknown>; score: number }) =>
new Document({ new Document({
pageContent: response.text, pageContent: response.text,

View File

@ -47,6 +47,7 @@
"google-auth-library": "^9.0.0", "google-auth-library": "^9.0.0",
"graphql": "^16.6.0", "graphql": "^16.6.0",
"html-to-text": "^9.0.5", "html-to-text": "^9.0.5",
"husky": "^8.0.3",
"ioredis": "^5.3.2", "ioredis": "^5.3.2",
"langchain": "^0.0.165", "langchain": "^0.0.165",
"langfuse-langchain": "^1.0.31", "langfuse-langchain": "^1.0.31",
@ -82,6 +83,9 @@
"@types/object-hash": "^3.0.2", "@types/object-hash": "^3.0.2",
"@types/pg": "^8.10.2", "@types/pg": "^8.10.2",
"@types/ws": "^8.5.3", "@types/ws": "^8.5.3",
"eslint-plugin-markdown": "^3.0.1",
"eslint-plugin-react": "^7.33.2",
"eslint-plugin-react-hooks": "^4.6.0",
"gulp": "^4.0.2", "gulp": "^4.0.2",
"typescript": "^4.8.4" "typescript": "^4.8.4"
} }