Merge pull request #774 from vectara/sentence-config

Add sentence config option to Vectara
2023-08-17 10:20:09 +01:00 · 2023-08-17 10:20:09 +01:00 · 5a8db9a534
parent e998b6a984 9ab23889b6
commit 5a8db9a534
4 changed files with 248 additions and 196 deletions
--- a/packages/components/nodes/vectorstores/Vectara_Existing/Vectara_Existing.ts
+++ b/packages/components/nodes/vectorstores/Vectara_Existing/Vectara_Existing.ts
@ -1,6 +1,6 @@
 import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
 import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
-import { VectaraStore, VectaraLibArgs, VectaraFilter } from 'langchain/vectorstores/vectara'
+import { VectaraStore, VectaraLibArgs, VectaraFilter, VectaraContextConfig } from 'langchain/vectorstores/vectara'

 class VectaraExisting_VectorStores implements INode {
    label: string
@ -40,9 +40,27 @@ class VectaraExisting_VectorStores implements INode {
                additionalParams: true,
                optional: true
            },
+            {
+                label: 'Sentences Before',
+                name: 'sentencesBefore',
+                description: 'Number of sentences to fetch before the matched sentence. Defaults to 2.',
+                type: 'number',
+                additionalParams: true,
+                optional: true
+            },
+            {
+                label: 'Sentences After',
+                name: 'sentencesAfter',
+                description: 'Number of sentences to fetch after the matched sentence. Defaults to 2.',
+                type: 'number',
+                additionalParams: true,
+                optional: true
+            },
            {
                label: 'Lambda',
                name: 'lambda',
+                description:
+                    'Improves retrieval accuracy by adjusting the balance (from 0 to 1) between neural search and keyword-based search factors.',
                type: 'number',
                additionalParams: true,
                optional: true
@ -77,6 +95,8 @@ class VectaraExisting_VectorStores implements INode {
        const corpusId = getCredentialParam('corpusID', credentialData, nodeData)

        const vectaraMetadataFilter = nodeData.inputs?.filter as string
+        const sentencesBefore = nodeData.inputs?.sentencesBefore as number
+        const sentencesAfter = nodeData.inputs?.sentencesAfter as number
        const lambda = nodeData.inputs?.lambda as number
        const output = nodeData.outputs?.output as string
        const topK = nodeData.inputs?.topK as string
@ -92,6 +112,11 @@ class VectaraExisting_VectorStores implements INode {
        if (vectaraMetadataFilter) vectaraFilter.filter = vectaraMetadataFilter
        if (lambda) vectaraFilter.lambda = lambda

+        const vectaraContextConfig: VectaraContextConfig = {}
+        if (sentencesBefore) vectaraContextConfig.sentencesBefore = sentencesBefore
+        if (sentencesAfter) vectaraContextConfig.sentencesAfter = sentencesAfter
+        vectaraFilter.contextConfig = vectaraContextConfig
+
        const vectorStore = new VectaraStore(vectaraArgs)

        if (output === 'retriever') {
--- a/packages/components/nodes/vectorstores/Vectara_Upsert/Vectara_Upsert.ts
+++ b/packages/components/nodes/vectorstores/Vectara_Upsert/Vectara_Upsert.ts
@ -1,7 +1,7 @@
 import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
 import { Embeddings } from 'langchain/embeddings/base'
 import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
-import { VectaraStore, VectaraLibArgs, VectaraFilter } from 'langchain/vectorstores/vectara'
+import { VectaraStore, VectaraLibArgs, VectaraFilter, VectaraContextConfig } from 'langchain/vectorstores/vectara'
 import { Document } from 'langchain/document'
 import { flatten } from 'lodash'

@ -49,9 +49,27 @@ class VectaraUpsert_VectorStores implements INode {
                additionalParams: true,
                optional: true
            },
+            {
+                label: 'Sentences Before',
+                name: 'sentencesBefore',
+                description: 'Number of sentences to fetch before the matched sentence. Defaults to 2.',
+                type: 'number',
+                additionalParams: true,
+                optional: true
+            },
+            {
+                label: 'Sentences After',
+                name: 'sentencesAfter',
+                description: 'Number of sentences to fetch after the matched sentence. Defaults to 2.',
+                type: 'number',
+                additionalParams: true,
+                optional: true
+            },
            {
                label: 'Lambda',
                name: 'lambda',
+                description:
+                    'Improves retrieval accuracy by adjusting the balance (from 0 to 1) between neural search and keyword-based search factors.',
                type: 'number',
                additionalParams: true,
                optional: true
@ -88,6 +106,8 @@ class VectaraUpsert_VectorStores implements INode {
        const docs = nodeData.inputs?.document as Document[]
        const embeddings = {} as Embeddings
        const vectaraMetadataFilter = nodeData.inputs?.filter as string
+        const sentencesBefore = nodeData.inputs?.sentencesBefore as number
+        const sentencesAfter = nodeData.inputs?.sentencesAfter as number
        const lambda = nodeData.inputs?.lambda as number
        const output = nodeData.outputs?.output as string
        const topK = nodeData.inputs?.topK as string
@ -103,6 +123,11 @@ class VectaraUpsert_VectorStores implements INode {
        if (vectaraMetadataFilter) vectaraFilter.filter = vectaraMetadataFilter
        if (lambda) vectaraFilter.lambda = lambda

+        const vectaraContextConfig: VectaraContextConfig = {}
+        if (sentencesBefore) vectaraContextConfig.sentencesBefore = sentencesBefore
+        if (sentencesAfter) vectaraContextConfig.sentencesAfter = sentencesAfter
+        vectaraFilter.contextConfig = vectaraContextConfig
+
        const flattenDocs = docs && docs.length ? flatten(docs) : []
        const finalDocs = []
        for (let i = 0; i < flattenDocs.length; i += 1) {
--- a/packages/components/package.json
+++ b/packages/components/package.json
@ -40,7 +40,7 @@
        "google-auth-library": "^9.0.0",
        "graphql": "^16.6.0",
        "html-to-text": "^9.0.5",
-        "langchain": "^0.0.122",
+        "langchain": "^0.0.126",
        "linkifyjs": "^4.1.1",
        "mammoth": "^1.5.1",
        "moment": "^2.29.3",
--- a/packages/server/marketplaces/chatflows/Vectara
+++ b/packages/server/marketplaces/chatflows/Vectara
@ -1,186 +1,11 @@
 {
    "description": "A simple LLM chain that uses Vectara to enable conversations with uploaded documents",
    "nodes": [
-        {
-            "width": 300,
-            "height": 408,
-            "id": "vectaraUpsert_0",
-            "position": { "x": 438, "y": 214 },
-            "type": "customNode",
-            "data": {
-                "id": "vectaraUpsert_0",
-                "label": "Vectara Upsert Document",
-                "version": 1,
-                "name": "vectaraUpsert",
-                "type": "Vectara",
-                "baseClasses": ["Vectara", "VectorStoreRetriever", "BaseRetriever"],
-                "category": "Vector Stores",
-                "description": "Upsert documents to Vectara",
-                "inputParams": [
-                    {
-                        "label": "Connect Credential",
-                        "name": "credential",
-                        "type": "credential",
-                        "credentialNames": ["vectaraApi"],
-                        "id": "vectaraUpsert_0-input-credential-credential"
-                    },
-                    {
-                        "label": "Filter",
-                        "name": "filter",
-                        "type": "json",
-                        "additionalParams": true,
-                        "optional": true,
-                        "id": "vectaraUpsert_0-input-filter-json"
-                    },
-                    {
-                        "label": "Lambda",
-                        "name": "lambda",
-                        "type": "number",
-                        "additionalParams": true,
-                        "optional": true,
-                        "id": "vectaraUpsert_0-input-lambda-number"
-                    },
-                    {
-                        "label": "Top K",
-                        "name": "topK",
-                        "description": "Number of top results to fetch. Defaults to 4",
-                        "placeholder": "4",
-                        "type": "number",
-                        "additionalParams": true,
-                        "optional": true,
-                        "id": "vectaraUpsert_0-input-topK-number"
-                    }
-                ],
-                "inputAnchors": [
-                    {
-                        "label": "Document",
-                        "name": "document",
-                        "type": "Document",
-                        "list": true,
-                        "id": "vectaraUpsert_0-input-document-Document"
-                    }
-                ],
-                "inputs": {
-                    "document": ["{{pdfFile_0.data.instance}}"],
-                    "filter": "",
-                    "lambda": "",
-                    "topK": ""
-                },
-                "outputAnchors": [
-                    {
-                        "name": "output",
-                        "label": "Output",
-                        "type": "options",
-                        "options": [
-                            {
-                                "id": "vectaraUpsert_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever",
-                                "name": "retriever",
-                                "label": "Vectara Retriever",
-                                "type": "Vectara | VectorStoreRetriever | BaseRetriever"
-                            },
-                            {
-                                "id": "vectaraUpsert_0-output-vectorStore-Vectara|VectorStore",
-                                "name": "vectorStore",
-                                "label": "Vectara Vector Store",
-                                "type": "Vectara | VectorStore"
-                            }
-                        ],
-                        "default": "retriever"
-                    }
-                ],
-                "outputs": { "output": "retriever" },
-                "selected": false
-            },
-            "selected": false,
-            "dragging": false,
-            "positionAbsolute": { "x": 438, "y": 214 }
-        },
-        {
-            "width": 300,
-            "height": 509,
-            "id": "pdfFile_0",
-            "position": { "x": 68.3013317598369, "y": 199.60454731299677 },
-            "type": "customNode",
-            "data": {
-                "id": "pdfFile_0",
-                "label": "Pdf File",
-                "version": 1,
-                "name": "pdfFile",
-                "type": "Document",
-                "baseClasses": ["Document"],
-                "category": "Document Loaders",
-                "description": "Load data from PDF files",
-                "inputParams": [
-                    {
-                        "label": "Pdf File",
-                        "name": "pdfFile",
-                        "type": "file",
-                        "fileType": ".pdf",
-                        "id": "pdfFile_0-input-pdfFile-file"
-                    },
-                    {
-                        "label": "Usage",
-                        "name": "usage",
-                        "type": "options",
-                        "options": [
-                            { "label": "One document per page", "name": "perPage" },
-                            { "label": "One document per file", "name": "perFile" }
-                        ],
-                        "default": "perPage",
-                        "id": "pdfFile_0-input-usage-options"
-                    },
-                    {
-                        "label": "Use Legacy Build",
-                        "name": "legacyBuild",
-                        "type": "boolean",
-                        "optional": true,
-                        "additionalParams": true,
-                        "id": "pdfFile_0-input-legacyBuild-boolean"
-                    },
-                    {
-                        "label": "Metadata",
-                        "name": "metadata",
-                        "type": "json",
-                        "optional": true,
-                        "additionalParams": true,
-                        "id": "pdfFile_0-input-metadata-json"
-                    }
-                ],
-                "inputAnchors": [
-                    {
-                        "label": "Text Splitter",
-                        "name": "textSplitter",
-                        "type": "TextSplitter",
-                        "optional": true,
-                        "id": "pdfFile_0-input-textSplitter-TextSplitter"
-                    }
-                ],
-                "inputs": {
-                    "textSplitter": "",
-                    "usage": "perPage",
-                    "legacyBuild": "",
-                    "metadata": ""
-                },
-                "outputAnchors": [
-                    {
-                        "id": "pdfFile_0-output-pdfFile-Document",
-                        "name": "pdfFile",
-                        "label": "Document",
-                        "type": "Document"
-                    }
-                ],
-                "outputs": {},
-                "selected": false
-            },
-            "selected": false,
-            "positionAbsolute": { "x": 68.3013317598369, "y": 199.60454731299677 },
-            "dragging": false
-        },
        {
            "width": 300,
            "height": 525,
            "id": "chatOpenAI_0",
-            "position": { "x": 804.3889791707068, "y": 195.11620799951592 },
+            "position": { "x": 514.1088940275924, "y": 199.574479681537 },
            "type": "customNode",
            "data": {
                "id": "chatOpenAI_0",
@ -211,10 +36,7 @@
                            { "label": "gpt-3.5-turbo", "name": "gpt-3.5-turbo" },
                            { "label": "gpt-3.5-turbo-0613", "name": "gpt-3.5-turbo-0613" },
                            { "label": "gpt-3.5-turbo-16k", "name": "gpt-3.5-turbo-16k" },
-                            {
-                                "label": "gpt-3.5-turbo-16k-0613",
-                                "name": "gpt-3.5-turbo-16k-0613"
-                            }
+                            { "label": "gpt-3.5-turbo-16k-0613", "name": "gpt-3.5-turbo-16k-0613" }
                        ],
                        "default": "gpt-3.5-turbo",
                        "optional": true,
@ -286,7 +108,7 @@
                "inputAnchors": [],
                "inputs": {
                    "modelName": "gpt-3.5-turbo",
-                    "temperature": "0.2",
+                    "temperature": "0.5",
                    "maxTokens": "",
                    "topP": "",
                    "frequencyPenalty": "",
@ -306,14 +128,14 @@
                "selected": false
            },
            "selected": false,
-            "positionAbsolute": { "x": 804.3889791707068, "y": 195.11620799951592 },
+            "positionAbsolute": { "x": 514.1088940275924, "y": 199.574479681537 },
            "dragging": false
        },
        {
            "width": 300,
            "height": 481,
            "id": "conversationalRetrievalQAChain_0",
-            "position": { "x": 1160.4877473512795, "y": 259.2799138505109 },
+            "position": { "x": 900.4793407261002, "y": 205.9476004518217 },
            "type": "customNode",
            "data": {
                "id": "conversationalRetrievalQAChain_0",
@ -410,11 +232,200 @@
                "selected": false
            },
            "selected": false,
-            "positionAbsolute": { "x": 1160.4877473512795, "y": 259.2799138505109 },
+            "positionAbsolute": { "x": 900.4793407261002, "y": 205.9476004518217 },
            "dragging": false
+        },
+        {
+            "width": 300,
+            "height": 509,
+            "id": "pdfFile_0",
+            "position": { "x": -210.44158723479913, "y": 236.6627524951051 },
+            "type": "customNode",
+            "data": {
+                "id": "pdfFile_0",
+                "label": "Pdf File",
+                "version": 1,
+                "name": "pdfFile",
+                "type": "Document",
+                "baseClasses": ["Document"],
+                "category": "Document Loaders",
+                "description": "Load data from PDF files",
+                "inputParams": [
+                    { "label": "Pdf File", "name": "pdfFile", "type": "file", "fileType": ".pdf", "id": "pdfFile_0-input-pdfFile-file" },
+                    {
+                        "label": "Usage",
+                        "name": "usage",
+                        "type": "options",
+                        "options": [
+                            { "label": "One document per page", "name": "perPage" },
+                            { "label": "One document per file", "name": "perFile" }
+                        ],
+                        "default": "perPage",
+                        "id": "pdfFile_0-input-usage-options"
+                    },
+                    {
+                        "label": "Use Legacy Build",
+                        "name": "legacyBuild",
+                        "type": "boolean",
+                        "optional": true,
+                        "additionalParams": true,
+                        "id": "pdfFile_0-input-legacyBuild-boolean"
+                    },
+                    {
+                        "label": "Metadata",
+                        "name": "metadata",
+                        "type": "json",
+                        "optional": true,
+                        "additionalParams": true,
+                        "id": "pdfFile_0-input-metadata-json"
+                    }
+                ],
+                "inputAnchors": [
+                    {
+                        "label": "Text Splitter",
+                        "name": "textSplitter",
+                        "type": "TextSplitter",
+                        "optional": true,
+                        "id": "pdfFile_0-input-textSplitter-TextSplitter"
+                    }
+                ],
+                "inputs": { "textSplitter": "", "usage": "perPage", "legacyBuild": "", "metadata": "" },
+                "outputAnchors": [
+                    { "id": "pdfFile_0-output-pdfFile-Document", "name": "pdfFile", "label": "Document", "type": "Document" }
+                ],
+                "outputs": {},
+                "selected": false
+            },
+            "selected": false,
+            "positionAbsolute": { "x": -210.44158723479913, "y": 236.6627524951051 },
+            "dragging": false
+        },
+        {
+            "width": 300,
+            "height": 408,
+            "id": "vectaraUpsert_0",
+            "position": { "x": 172.06946164914868, "y": 373.11406233089934 },
+            "type": "customNode",
+            "data": {
+                "id": "vectaraUpsert_0",
+                "label": "Vectara Upsert Document",
+                "version": 1,
+                "name": "vectaraUpsert",
+                "type": "Vectara",
+                "baseClasses": ["Vectara", "VectorStoreRetriever", "BaseRetriever"],
+                "category": "Vector Stores",
+                "description": "Upsert documents to Vectara",
+                "inputParams": [
+                    {
+                        "label": "Connect Credential",
+                        "name": "credential",
+                        "type": "credential",
+                        "credentialNames": ["vectaraApi"],
+                        "id": "vectaraUpsert_0-input-credential-credential"
+                    },
+                    {
+                        "label": "Vectara Metadata Filter",
+                        "name": "filter",
+                        "description": "Filter to apply to Vectara metadata. Refer to the <a target=\"_blank\" href=\"https://docs.flowiseai.com/vector-stores/vectara\">documentation</a> on how to use Vectara filters with Flowise.",
+                        "type": "string",
+                        "additionalParams": true,
+                        "optional": true,
+                        "id": "vectaraUpsert_0-input-filter-string"
+                    },
+                    {
+                        "label": "Sentences Before",
+                        "name": "sentencesBefore",
+                        "description": "Number of sentences to fetch before the matched sentence. Defaults to 2.",
+                        "type": "number",
+                        "additionalParams": true,
+                        "optional": true,
+                        "id": "vectaraUpsert_0-input-sentencesBefore-number"
+                    },
+                    {
+                        "label": "Sentences After",
+                        "name": "sentencesAfter",
+                        "description": "Number of sentences to fetch after the matched sentence. Defaults to 2.",
+                        "type": "number",
+                        "additionalParams": true,
+                        "optional": true,
+                        "id": "vectaraUpsert_0-input-sentencesAfter-number"
+                    },
+                    {
+                        "label": "Lambda",
+                        "name": "lambda",
+                        "description": "Improves retrieval accuracy by adjusting the balance (from 0 to 1) between neural search and keyword-based search factors.",
+                        "type": "number",
+                        "additionalParams": true,
+                        "optional": true,
+                        "id": "vectaraUpsert_0-input-lambda-number"
+                    },
+                    {
+                        "label": "Top K",
+                        "name": "topK",
+                        "description": "Number of top results to fetch. Defaults to 4",
+                        "placeholder": "4",
+                        "type": "number",
+                        "additionalParams": true,
+                        "optional": true,
+                        "id": "vectaraUpsert_0-input-topK-number"
+                    }
+                ],
+                "inputAnchors": [
+                    {
+                        "label": "Document",
+                        "name": "document",
+                        "type": "Document",
+                        "list": true,
+                        "id": "vectaraUpsert_0-input-document-Document"
+                    }
+                ],
+                "inputs": {
+                    "document": ["{{pdfFile_0.data.instance}}"],
+                    "filter": "",
+                    "sentencesBefore": "",
+                    "sentencesAfter": "",
+                    "lambda": "",
+                    "topK": ""
+                },
+                "outputAnchors": [
+                    {
+                        "name": "output",
+                        "label": "Output",
+                        "type": "options",
+                        "options": [
+                            {
+                                "id": "vectaraUpsert_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever",
+                                "name": "retriever",
+                                "label": "Vectara Retriever",
+                                "type": "Vectara | VectorStoreRetriever | BaseRetriever"
+                            },
+                            {
+                                "id": "vectaraUpsert_0-output-vectorStore-Vectara|VectorStore",
+                                "name": "vectorStore",
+                                "label": "Vectara Vector Store",
+                                "type": "Vectara | VectorStore"
+                            }
+                        ],
+                        "default": "retriever"
+                    }
+                ],
+                "outputs": { "output": "retriever" },
+                "selected": false
+            },
+            "positionAbsolute": { "x": 172.06946164914868, "y": 373.11406233089934 },
+            "selected": false
        }
    ],
    "edges": [
+        {
+            "source": "chatOpenAI_0",
+            "sourceHandle": "chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel",
+            "target": "conversationalRetrievalQAChain_0",
+            "targetHandle": "conversationalRetrievalQAChain_0-input-model-BaseLanguageModel",
+            "type": "buttonedge",
+            "id": "chatOpenAI_0-chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel-conversationalRetrievalQAChain_0-conversationalRetrievalQAChain_0-input-model-BaseLanguageModel",
+            "data": { "label": "" }
+        },
        {
            "source": "pdfFile_0",
            "sourceHandle": "pdfFile_0-output-pdfFile-Document",
@ -432,15 +443,6 @@
            "type": "buttonedge",
            "id": "vectaraUpsert_0-vectaraUpsert_0-output-retriever-Vectara|VectorStoreRetriever|BaseRetriever-conversationalRetrievalQAChain_0-conversationalRetrievalQAChain_0-input-vectorStoreRetriever-BaseRetriever",
            "data": { "label": "" }
-        },
-        {
-            "source": "chatOpenAI_0",
-            "sourceHandle": "chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel",
-            "target": "conversationalRetrievalQAChain_0",
-            "targetHandle": "conversationalRetrievalQAChain_0-input-model-BaseLanguageModel",
-            "type": "buttonedge",
-            "id": "chatOpenAI_0-chatOpenAI_0-output-chatOpenAI-ChatOpenAI|BaseChatModel|BaseLanguageModel-conversationalRetrievalQAChain_0-conversationalRetrievalQAChain_0-input-model-BaseLanguageModel",
-            "data": { "label": "" }
        }
    ]
 }