From ab875cc1b3fb6152e1b9e4266660a00fcabaf524 Mon Sep 17 00:00:00 2001 From: Henry Date: Fri, 12 May 2023 13:57:38 +0100 Subject: [PATCH] add metadata filter --- .../nodes/documentloaders/Cheerio/Cheerio.ts | 34 ++++++++++-- .../nodes/documentloaders/Csv/Csv.ts | 33 ++++++++++-- .../nodes/documentloaders/Docx/Docx.ts | 33 ++++++++++-- .../nodes/documentloaders/Folder/Folder.ts | 33 ++++++++++-- .../nodes/documentloaders/Github/Github.ts | 33 ++++++++++-- .../nodes/documentloaders/Json/Json.ts | 33 ++++++++++-- .../nodes/documentloaders/Notion/Notion.ts | 33 ++++++++++-- .../nodes/documentloaders/Pdf/Pdf.ts | 40 ++++++++++---- .../nodes/documentloaders/Text/Text.ts | 32 +++++++++-- .../Pinecone_Existing/Pinecone_Existing.ts | 13 +++++ .../Supabase_Existing/Supabase_Exisiting.ts | 25 +++++++-- packages/server/src/utils/index.ts | 6 ++- .../ui/src/ui-component/json/JsonEditor.js | 53 +++++++++++++++++++ .../ui/src/views/canvas/NodeInputHandler.js | 11 ++++ 14 files changed, 364 insertions(+), 48 deletions(-) create mode 100644 packages/ui/src/ui-component/json/JsonEditor.js diff --git a/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts b/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts index 8be10f742..4f4f18411 100644 --- a/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts +++ b/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts @@ -31,12 +31,21 @@ class Cheerio_DocumentLoaders implements INode { name: 'textSplitter', type: 'TextSplitter', optional: true + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true } ] } async init(nodeData: INodeData): Promise { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter + const metadata = nodeData.inputs?.metadata + let url = nodeData.inputs?.url as string var urlPattern = new RegExp( @@ -50,14 +59,31 @@ class Cheerio_DocumentLoaders implements INode { ) // validate fragment locator const loader = new CheerioWebBaseLoader(urlPattern.test(url.trim()) ? url.trim() : '') + let docs = [] if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - return docs + docs = await loader.loadAndSplit(textSplitter) } else { - const docs = await loader.load() - return docs + docs = await loader.load() } + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs = [] + for (const doc of docs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + + return docs } } diff --git a/packages/components/nodes/documentloaders/Csv/Csv.ts b/packages/components/nodes/documentloaders/Csv/Csv.ts index 5a4ca76e8..bcaec79f3 100644 --- a/packages/components/nodes/documentloaders/Csv/Csv.ts +++ b/packages/components/nodes/documentloaders/Csv/Csv.ts @@ -41,6 +41,13 @@ class Csv_DocumentLoaders implements INode { description: 'Extracting a single column', placeholder: 'Enter column name', optional: true + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true } ] } @@ -49,17 +56,35 @@ class Csv_DocumentLoaders implements INode { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const csvFileBase64 = nodeData.inputs?.csvFile as string const columnName = nodeData.inputs?.columnName as string + const metadata = nodeData.inputs?.metadata const blob = new Blob(getBlob(csvFileBase64)) const loader = new CSVLoader(blob, columnName.trim().length === 0 ? undefined : columnName.trim()) + let docs = [] if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - return docs + docs = await loader.loadAndSplit(textSplitter) } else { - const docs = await loader.load() - return docs + docs = await loader.load() } + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs = [] + for (const doc of docs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + + return docs } } diff --git a/packages/components/nodes/documentloaders/Docx/Docx.ts b/packages/components/nodes/documentloaders/Docx/Docx.ts index 87a9cc44d..36dd04651 100644 --- a/packages/components/nodes/documentloaders/Docx/Docx.ts +++ b/packages/components/nodes/documentloaders/Docx/Docx.ts @@ -33,6 +33,13 @@ class Docx_DocumentLoaders implements INode { name: 'textSplitter', type: 'TextSplitter', optional: true + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true } ] } @@ -40,17 +47,35 @@ class Docx_DocumentLoaders implements INode { async init(nodeData: INodeData): Promise { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const docxFileBase64 = nodeData.inputs?.docxFile as string + const metadata = nodeData.inputs?.metadata const blob = new Blob(getBlob(docxFileBase64)) const loader = new DocxLoader(blob) + let docs = [] if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - return docs + docs = await loader.loadAndSplit(textSplitter) } else { - const docs = await loader.load() - return docs + docs = await loader.load() } + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs = [] + for (const doc of docs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + + return docs } } diff --git a/packages/components/nodes/documentloaders/Folder/Folder.ts b/packages/components/nodes/documentloaders/Folder/Folder.ts index 8767303fe..2290133e4 100644 --- a/packages/components/nodes/documentloaders/Folder/Folder.ts +++ b/packages/components/nodes/documentloaders/Folder/Folder.ts @@ -37,6 +37,13 @@ class Folder_DocumentLoaders implements INode { name: 'textSplitter', type: 'TextSplitter', optional: true + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true } ] } @@ -44,6 +51,7 @@ class Folder_DocumentLoaders implements INode { async init(nodeData: INodeData): Promise { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const folderPath = nodeData.inputs?.folderPath as string + const metadata = nodeData.inputs?.metadata const loader = new DirectoryLoader(folderPath, { '.json': (path) => new JSONLoader(path), @@ -53,14 +61,31 @@ class Folder_DocumentLoaders implements INode { // @ts-ignore '.pdf': (path) => new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }) }) + let docs = [] if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - return docs + docs = await loader.loadAndSplit(textSplitter) } else { - const docs = await loader.load() - return docs + docs = await loader.load() } + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs = [] + for (const doc of docs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + + return docs } } diff --git a/packages/components/nodes/documentloaders/Github/Github.ts b/packages/components/nodes/documentloaders/Github/Github.ts index 2340bea74..552790abf 100644 --- a/packages/components/nodes/documentloaders/Github/Github.ts +++ b/packages/components/nodes/documentloaders/Github/Github.ts @@ -45,6 +45,13 @@ class Github_DocumentLoaders implements INode { name: 'textSplitter', type: 'TextSplitter', optional: true + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true } ] } @@ -54,6 +61,7 @@ class Github_DocumentLoaders implements INode { const branch = nodeData.inputs?.branch as string const accessToken = nodeData.inputs?.accessToken as string const textSplitter = nodeData.inputs?.textSplitter as TextSplitter + const metadata = nodeData.inputs?.metadata const options: GithubRepoLoaderParams = { branch, @@ -64,14 +72,31 @@ class Github_DocumentLoaders implements INode { if (accessToken) options.accessToken = accessToken const loader = new GithubRepoLoader(repoLink, options) + let docs = [] if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - return docs + docs = await loader.loadAndSplit(textSplitter) } else { - const docs = await loader.load() - return docs + docs = await loader.load() } + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs = [] + for (const doc of docs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + + return docs } } diff --git a/packages/components/nodes/documentloaders/Json/Json.ts b/packages/components/nodes/documentloaders/Json/Json.ts index 3ecdda6bd..46f7704d6 100644 --- a/packages/components/nodes/documentloaders/Json/Json.ts +++ b/packages/components/nodes/documentloaders/Json/Json.ts @@ -41,6 +41,13 @@ class Json_DocumentLoaders implements INode { description: 'Extracting multiple pointers', placeholder: 'Enter pointers name', optional: true + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true } ] } @@ -49,6 +56,7 @@ class Json_DocumentLoaders implements INode { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const jsonFileBase64 = nodeData.inputs?.jsonFile as string const pointersName = nodeData.inputs?.pointersName as string + const metadata = nodeData.inputs?.metadata let pointers: string[] = [] if (pointersName) { @@ -58,14 +66,31 @@ class Json_DocumentLoaders implements INode { const blob = new Blob(getBlob(jsonFileBase64)) const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined) + let docs = [] if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - return docs + docs = await loader.loadAndSplit(textSplitter) } else { - const docs = await loader.load() - return docs + docs = await loader.load() } + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs = [] + for (const doc of docs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + + return docs } } diff --git a/packages/components/nodes/documentloaders/Notion/Notion.ts b/packages/components/nodes/documentloaders/Notion/Notion.ts index 457c98f82..f5bfcb2ad 100644 --- a/packages/components/nodes/documentloaders/Notion/Notion.ts +++ b/packages/components/nodes/documentloaders/Notion/Notion.ts @@ -33,6 +33,13 @@ class Notion_DocumentLoaders implements INode { name: 'textSplitter', type: 'TextSplitter', optional: true + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true } ] } @@ -40,16 +47,34 @@ class Notion_DocumentLoaders implements INode { async init(nodeData: INodeData): Promise { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const notionFolder = nodeData.inputs?.notionFolder as string + const metadata = nodeData.inputs?.metadata const loader = new NotionLoader(notionFolder) + let docs = [] if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - return docs + docs = await loader.loadAndSplit(textSplitter) } else { - const docs = await loader.load() - return docs + docs = await loader.load() } + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs = [] + for (const doc of docs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + + return docs } } diff --git a/packages/components/nodes/documentloaders/Pdf/Pdf.ts b/packages/components/nodes/documentloaders/Pdf/Pdf.ts index 59c0fab7a..c27f78091 100644 --- a/packages/components/nodes/documentloaders/Pdf/Pdf.ts +++ b/packages/components/nodes/documentloaders/Pdf/Pdf.ts @@ -49,6 +49,13 @@ class Pdf_DocumentLoaders implements INode { } ], default: 'perPage' + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true } ] } @@ -57,30 +64,45 @@ class Pdf_DocumentLoaders implements INode { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const pdfFileBase64 = nodeData.inputs?.pdfFile as string const usage = nodeData.inputs?.usage as string + const metadata = nodeData.inputs?.metadata const blob = new Blob(getBlob(pdfFileBase64)) - + let docs = [] if (usage === 'perFile') { // @ts-ignore const loader = new PDFLoader(blob, { splitPages: false, pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }) if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - return docs + docs = await loader.loadAndSplit(textSplitter) } else { - const docs = await loader.load() - return docs + docs = await loader.load() } } else { // @ts-ignore const loader = new PDFLoader(blob, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }) if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - return docs + docs = await loader.loadAndSplit(textSplitter) } else { - const docs = await loader.load() - return docs + docs = await loader.load() } } + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs = [] + for (const doc of docs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + + return docs } } diff --git a/packages/components/nodes/documentloaders/Text/Text.ts b/packages/components/nodes/documentloaders/Text/Text.ts index 73c48ad32..466c45200 100644 --- a/packages/components/nodes/documentloaders/Text/Text.ts +++ b/packages/components/nodes/documentloaders/Text/Text.ts @@ -33,6 +33,13 @@ class Text_DocumentLoaders implements INode { name: 'textSplitter', type: 'TextSplitter', optional: true + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true } ] } @@ -40,17 +47,34 @@ class Text_DocumentLoaders implements INode { async init(nodeData: INodeData): Promise { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const txtFileBase64 = nodeData.inputs?.txtFile as string + const metadata = nodeData.inputs?.metadata const blob = new Blob(getBlob(txtFileBase64)) const loader = new TextLoader(blob) + let docs = [] if (textSplitter) { - const docs = await loader.loadAndSplit(textSplitter) - return docs + docs = await loader.loadAndSplit(textSplitter) } else { - const docs = await loader.load() - return docs + docs = await loader.load() } + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs = [] + for (const doc of docs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + return docs } } diff --git a/packages/components/nodes/vectorstores/Pinecone_Existing/Pinecone_Existing.ts b/packages/components/nodes/vectorstores/Pinecone_Existing/Pinecone_Existing.ts index 866fcfed7..04706ed07 100644 --- a/packages/components/nodes/vectorstores/Pinecone_Existing/Pinecone_Existing.ts +++ b/packages/components/nodes/vectorstores/Pinecone_Existing/Pinecone_Existing.ts @@ -50,6 +50,13 @@ class Pinecone_Existing_VectorStores implements INode { type: 'string', placeholder: 'my-first-namespace', optional: true + }, + { + label: 'Pinecone Metadata Filter', + name: 'pineconeMetadataFilter', + type: 'json', + optional: true, + additionalParams: true } ] this.outputs = [ @@ -71,6 +78,8 @@ class Pinecone_Existing_VectorStores implements INode { const pineconeEnv = nodeData.inputs?.pineconeEnv as string const index = nodeData.inputs?.pineconeIndex as string const pineconeNamespace = nodeData.inputs?.pineconeNamespace as string + const pineconeMetadataFilter = nodeData.inputs?.pineconeMetadataFilter + const embeddings = nodeData.inputs?.embeddings as Embeddings const output = nodeData.outputs?.output as string @@ -87,6 +96,10 @@ class Pinecone_Existing_VectorStores implements INode { } if (pineconeNamespace) obj.namespace = pineconeNamespace + if (pineconeMetadataFilter) { + const metadatafilter = typeof pineconeMetadataFilter === 'object' ? pineconeMetadataFilter : JSON.parse(pineconeMetadataFilter) + obj.filter = metadatafilter + } const vectorStore = await PineconeStore.fromExistingIndex(embeddings, obj) diff --git a/packages/components/nodes/vectorstores/Supabase_Existing/Supabase_Exisiting.ts b/packages/components/nodes/vectorstores/Supabase_Existing/Supabase_Exisiting.ts index 26433bc05..f97b18873 100644 --- a/packages/components/nodes/vectorstores/Supabase_Existing/Supabase_Exisiting.ts +++ b/packages/components/nodes/vectorstores/Supabase_Existing/Supabase_Exisiting.ts @@ -1,7 +1,7 @@ import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface' import { Embeddings } from 'langchain/embeddings/base' import { getBaseClasses } from '../../../src/utils' -import { SupabaseVectorStore } from 'langchain/vectorstores/supabase' +import { SupabaseLibArgs, SupabaseVectorStore } from 'langchain/vectorstores/supabase' import { createClient } from '@supabase/supabase-js' class Supabase_Existing_VectorStores implements INode { @@ -48,6 +48,13 @@ class Supabase_Existing_VectorStores implements INode { label: 'Query Name', name: 'queryName', type: 'string' + }, + { + label: 'Supabase Metadata Filter', + name: 'supabaseMetadataFilter', + type: 'json', + optional: true, + additionalParams: true } ] this.outputs = [ @@ -70,15 +77,23 @@ class Supabase_Existing_VectorStores implements INode { const tableName = nodeData.inputs?.tableName as string const queryName = nodeData.inputs?.queryName as string const embeddings = nodeData.inputs?.embeddings as Embeddings + const supabaseMetadataFilter = nodeData.inputs?.supabaseMetadataFilter const output = nodeData.outputs?.output as string const client = createClient(supabaseProjUrl, supabaseApiKey) - const vectorStore = await SupabaseVectorStore.fromExistingIndex(embeddings, { + const obj: SupabaseLibArgs = { client, - tableName: tableName, - queryName: queryName - }) + tableName, + queryName + } + + if (supabaseMetadataFilter) { + const metadatafilter = typeof supabaseMetadataFilter === 'object' ? supabaseMetadataFilter : JSON.parse(supabaseMetadataFilter) + obj.filter = metadatafilter + } + + const vectorStore = await SupabaseVectorStore.fromExistingIndex(embeddings, obj) if (output === 'retriever') { const retriever = vectorStore.asRetriever() diff --git a/packages/server/src/utils/index.ts b/packages/server/src/utils/index.ts index ab6a3c84f..5f5a8d14c 100644 --- a/packages/server/src/utils/index.ts +++ b/packages/server/src/utils/index.ts @@ -404,8 +404,10 @@ export const isSameOverrideConfig = ( existingOverrideConfig?: ICommonObject, newOverrideConfig?: ICommonObject ): boolean => { - // Skip check if its internal call - if (isInternal) return true + if (isInternal) { + if (existingOverrideConfig && Object.keys(existingOverrideConfig).length) return false + return true + } // If existing and new overrideconfig are the same if ( existingOverrideConfig && diff --git a/packages/ui/src/ui-component/json/JsonEditor.js b/packages/ui/src/ui-component/json/JsonEditor.js new file mode 100644 index 000000000..6876b241e --- /dev/null +++ b/packages/ui/src/ui-component/json/JsonEditor.js @@ -0,0 +1,53 @@ +import { useState } from 'react' +import PropTypes from 'prop-types' +import { FormControl } from '@mui/material' +import ReactJson from 'react-json-view' + +export const JsonEditorInput = ({ value, onChange, disabled = false, isDarkMode = false }) => { + const [myValue, setMyValue] = useState(value ? JSON.parse(value) : {}) + + return ( + <> + + {disabled && ( + + )} + {!disabled && ( + { + setMyValue(edit.updated_src) + onChange(JSON.stringify(edit.updated_src)) + }} + onAdd={() => { + //console.log(add) + }} + onDelete={(deleteobj) => { + setMyValue(deleteobj.updated_src) + onChange(JSON.stringify(deleteobj.updated_src)) + }} + /> + )} + + + ) +} + +JsonEditorInput.propTypes = { + value: PropTypes.string, + onChange: PropTypes.func, + disabled: PropTypes.bool, + isDarkMode: PropTypes.bool +} diff --git a/packages/ui/src/views/canvas/NodeInputHandler.js b/packages/ui/src/views/canvas/NodeInputHandler.js index dfa9943e9..99b0aed40 100644 --- a/packages/ui/src/views/canvas/NodeInputHandler.js +++ b/packages/ui/src/views/canvas/NodeInputHandler.js @@ -1,6 +1,7 @@ import PropTypes from 'prop-types' import { Handle, Position, useUpdateNodeInternals } from 'reactflow' import { useEffect, useRef, useState, useContext } from 'react' +import { useSelector } from 'react-redux' // material-ui import { useTheme, styled } from '@mui/material/styles' @@ -15,6 +16,7 @@ import { File } from 'ui-component/file/File' import { SwitchInput } from 'ui-component/switch/Switch' import { flowContext } from 'store/context/ReactFlowContext' import { isValidConnection, getAvailableNodesForVariable } from 'utils/genericHelper' +import { JsonEditorInput } from 'ui-component/json/JsonEditor' const CustomWidthTooltip = styled(({ className, ...props }) => )({ [`& .${tooltipClasses.tooltip}`]: { @@ -26,6 +28,7 @@ const CustomWidthTooltip = styled(({ className, ...props }) => { const theme = useTheme() + const customization = useSelector((state) => state.customization) const ref = useRef(null) const { reactFlowInstance } = useContext(flowContext) const updateNodeInternals = useUpdateNodeInternals() @@ -166,6 +169,14 @@ const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isA onDialogConfirm={(newValue, inputParamName) => onExpandDialogSave(newValue, inputParamName)} /> )} + {inputParam.type === 'json' && ( + (data.inputs[inputParam.name] = newValue)} + value={data.inputs[inputParam.name] ?? inputParam.default ?? ''} + isDarkMode={customization.isDarkMode} + /> + )} {inputParam.type === 'options' && (