add metadata filter

This commit is contained in:
Henry 2023-05-12 13:57:38 +01:00
parent 7313cdd9c6
commit ab875cc1b3
14 changed files with 364 additions and 48 deletions

View File

@ -31,12 +31,21 @@ class Cheerio_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata
let url = nodeData.inputs?.url as string
var urlPattern = new RegExp(
@ -50,14 +59,31 @@ class Cheerio_DocumentLoaders implements INode {
) // validate fragment locator
const loader = new CheerioWebBaseLoader(urlPattern.test(url.trim()) ? url.trim() : '')
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}

View File

@ -41,6 +41,13 @@ class Csv_DocumentLoaders implements INode {
description: 'Extracting a single column',
placeholder: 'Enter column name',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@ -49,17 +56,35 @@ class Csv_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const csvFileBase64 = nodeData.inputs?.csvFile as string
const columnName = nodeData.inputs?.columnName as string
const metadata = nodeData.inputs?.metadata
const blob = new Blob(getBlob(csvFileBase64))
const loader = new CSVLoader(blob, columnName.trim().length === 0 ? undefined : columnName.trim())
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}

View File

@ -33,6 +33,13 @@ class Docx_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@ -40,17 +47,35 @@ class Docx_DocumentLoaders implements INode {
async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const docxFileBase64 = nodeData.inputs?.docxFile as string
const metadata = nodeData.inputs?.metadata
const blob = new Blob(getBlob(docxFileBase64))
const loader = new DocxLoader(blob)
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}

View File

@ -37,6 +37,13 @@ class Folder_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@ -44,6 +51,7 @@ class Folder_DocumentLoaders implements INode {
async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const folderPath = nodeData.inputs?.folderPath as string
const metadata = nodeData.inputs?.metadata
const loader = new DirectoryLoader(folderPath, {
'.json': (path) => new JSONLoader(path),
@ -53,14 +61,31 @@ class Folder_DocumentLoaders implements INode {
// @ts-ignore
'.pdf': (path) => new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
})
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}

View File

@ -45,6 +45,13 @@ class Github_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@ -54,6 +61,7 @@ class Github_DocumentLoaders implements INode {
const branch = nodeData.inputs?.branch as string
const accessToken = nodeData.inputs?.accessToken as string
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const metadata = nodeData.inputs?.metadata
const options: GithubRepoLoaderParams = {
branch,
@ -64,14 +72,31 @@ class Github_DocumentLoaders implements INode {
if (accessToken) options.accessToken = accessToken
const loader = new GithubRepoLoader(repoLink, options)
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}

View File

@ -41,6 +41,13 @@ class Json_DocumentLoaders implements INode {
description: 'Extracting multiple pointers',
placeholder: 'Enter pointers name',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@ -49,6 +56,7 @@ class Json_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const jsonFileBase64 = nodeData.inputs?.jsonFile as string
const pointersName = nodeData.inputs?.pointersName as string
const metadata = nodeData.inputs?.metadata
let pointers: string[] = []
if (pointersName) {
@ -58,14 +66,31 @@ class Json_DocumentLoaders implements INode {
const blob = new Blob(getBlob(jsonFileBase64))
const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined)
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}

View File

@ -33,6 +33,13 @@ class Notion_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@ -40,16 +47,34 @@ class Notion_DocumentLoaders implements INode {
async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const notionFolder = nodeData.inputs?.notionFolder as string
const metadata = nodeData.inputs?.metadata
const loader = new NotionLoader(notionFolder)
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}

View File

@ -49,6 +49,13 @@ class Pdf_DocumentLoaders implements INode {
}
],
default: 'perPage'
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@ -57,30 +64,45 @@ class Pdf_DocumentLoaders implements INode {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const pdfFileBase64 = nodeData.inputs?.pdfFile as string
const usage = nodeData.inputs?.usage as string
const metadata = nodeData.inputs?.metadata
const blob = new Blob(getBlob(pdfFileBase64))
let docs = []
if (usage === 'perFile') {
// @ts-ignore
const loader = new PDFLoader(blob, { splitPages: false, pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
} else {
// @ts-ignore
const loader = new PDFLoader(blob, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}

View File

@ -33,6 +33,13 @@ class Text_DocumentLoaders implements INode {
name: 'textSplitter',
type: 'TextSplitter',
optional: true
},
{
label: 'Metadata',
name: 'metadata',
type: 'json',
optional: true,
additionalParams: true
}
]
}
@ -40,17 +47,34 @@ class Text_DocumentLoaders implements INode {
async init(nodeData: INodeData): Promise<any> {
const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
const txtFileBase64 = nodeData.inputs?.txtFile as string
const metadata = nodeData.inputs?.metadata
const blob = new Blob(getBlob(txtFileBase64))
const loader = new TextLoader(blob)
let docs = []
if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter)
return docs
docs = await loader.loadAndSplit(textSplitter)
} else {
const docs = await loader.load()
return docs
docs = await loader.load()
}
if (metadata) {
const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata)
let finaldocs = []
for (const doc of docs) {
const newdoc = {
...doc,
metadata: {
...doc.metadata,
...parsedMetadata
}
}
finaldocs.push(newdoc)
}
return finaldocs
}
return docs
}
}

View File

@ -50,6 +50,13 @@ class Pinecone_Existing_VectorStores implements INode {
type: 'string',
placeholder: 'my-first-namespace',
optional: true
},
{
label: 'Pinecone Metadata Filter',
name: 'pineconeMetadataFilter',
type: 'json',
optional: true,
additionalParams: true
}
]
this.outputs = [
@ -71,6 +78,8 @@ class Pinecone_Existing_VectorStores implements INode {
const pineconeEnv = nodeData.inputs?.pineconeEnv as string
const index = nodeData.inputs?.pineconeIndex as string
const pineconeNamespace = nodeData.inputs?.pineconeNamespace as string
const pineconeMetadataFilter = nodeData.inputs?.pineconeMetadataFilter
const embeddings = nodeData.inputs?.embeddings as Embeddings
const output = nodeData.outputs?.output as string
@ -87,6 +96,10 @@ class Pinecone_Existing_VectorStores implements INode {
}
if (pineconeNamespace) obj.namespace = pineconeNamespace
if (pineconeMetadataFilter) {
const metadatafilter = typeof pineconeMetadataFilter === 'object' ? pineconeMetadataFilter : JSON.parse(pineconeMetadataFilter)
obj.filter = metadatafilter
}
const vectorStore = await PineconeStore.fromExistingIndex(embeddings, obj)

View File

@ -1,7 +1,7 @@
import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { Embeddings } from 'langchain/embeddings/base'
import { getBaseClasses } from '../../../src/utils'
import { SupabaseVectorStore } from 'langchain/vectorstores/supabase'
import { SupabaseLibArgs, SupabaseVectorStore } from 'langchain/vectorstores/supabase'
import { createClient } from '@supabase/supabase-js'
class Supabase_Existing_VectorStores implements INode {
@ -48,6 +48,13 @@ class Supabase_Existing_VectorStores implements INode {
label: 'Query Name',
name: 'queryName',
type: 'string'
},
{
label: 'Supabase Metadata Filter',
name: 'supabaseMetadataFilter',
type: 'json',
optional: true,
additionalParams: true
}
]
this.outputs = [
@ -70,15 +77,23 @@ class Supabase_Existing_VectorStores implements INode {
const tableName = nodeData.inputs?.tableName as string
const queryName = nodeData.inputs?.queryName as string
const embeddings = nodeData.inputs?.embeddings as Embeddings
const supabaseMetadataFilter = nodeData.inputs?.supabaseMetadataFilter
const output = nodeData.outputs?.output as string
const client = createClient(supabaseProjUrl, supabaseApiKey)
const vectorStore = await SupabaseVectorStore.fromExistingIndex(embeddings, {
const obj: SupabaseLibArgs = {
client,
tableName: tableName,
queryName: queryName
})
tableName,
queryName
}
if (supabaseMetadataFilter) {
const metadatafilter = typeof supabaseMetadataFilter === 'object' ? supabaseMetadataFilter : JSON.parse(supabaseMetadataFilter)
obj.filter = metadatafilter
}
const vectorStore = await SupabaseVectorStore.fromExistingIndex(embeddings, obj)
if (output === 'retriever') {
const retriever = vectorStore.asRetriever()

View File

@ -404,8 +404,10 @@ export const isSameOverrideConfig = (
existingOverrideConfig?: ICommonObject,
newOverrideConfig?: ICommonObject
): boolean => {
// Skip check if its internal call
if (isInternal) return true
if (isInternal) {
if (existingOverrideConfig && Object.keys(existingOverrideConfig).length) return false
return true
}
// If existing and new overrideconfig are the same
if (
existingOverrideConfig &&

View File

@ -0,0 +1,53 @@
import { useState } from 'react'
import PropTypes from 'prop-types'
import { FormControl } from '@mui/material'
import ReactJson from 'react-json-view'
export const JsonEditorInput = ({ value, onChange, disabled = false, isDarkMode = false }) => {
const [myValue, setMyValue] = useState(value ? JSON.parse(value) : {})
return (
<>
<FormControl sx={{ mt: 1, width: '100%' }} size='small'>
{disabled && (
<ReactJson
theme={isDarkMode ? 'ocean' : 'rjv-default'}
style={{ padding: 10, borderRadius: 10 }}
src={myValue}
name={null}
quotesOnKeys={false}
displayDataTypes={false}
/>
)}
{!disabled && (
<ReactJson
theme={isDarkMode ? 'ocean' : 'rjv-default'}
style={{ padding: 10, borderRadius: 10 }}
src={myValue}
name={null}
quotesOnKeys={false}
displayDataTypes={false}
onEdit={(edit) => {
setMyValue(edit.updated_src)
onChange(JSON.stringify(edit.updated_src))
}}
onAdd={() => {
//console.log(add)
}}
onDelete={(deleteobj) => {
setMyValue(deleteobj.updated_src)
onChange(JSON.stringify(deleteobj.updated_src))
}}
/>
)}
</FormControl>
</>
)
}
JsonEditorInput.propTypes = {
value: PropTypes.string,
onChange: PropTypes.func,
disabled: PropTypes.bool,
isDarkMode: PropTypes.bool
}

View File

@ -1,6 +1,7 @@
import PropTypes from 'prop-types'
import { Handle, Position, useUpdateNodeInternals } from 'reactflow'
import { useEffect, useRef, useState, useContext } from 'react'
import { useSelector } from 'react-redux'
// material-ui
import { useTheme, styled } from '@mui/material/styles'
@ -15,6 +16,7 @@ import { File } from 'ui-component/file/File'
import { SwitchInput } from 'ui-component/switch/Switch'
import { flowContext } from 'store/context/ReactFlowContext'
import { isValidConnection, getAvailableNodesForVariable } from 'utils/genericHelper'
import { JsonEditorInput } from 'ui-component/json/JsonEditor'
const CustomWidthTooltip = styled(({ className, ...props }) => <Tooltip {...props} classes={{ popper: className }} />)({
[`& .${tooltipClasses.tooltip}`]: {
@ -26,6 +28,7 @@ const CustomWidthTooltip = styled(({ className, ...props }) => <Tooltip {...prop
const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isAdditionalParams = false }) => {
const theme = useTheme()
const customization = useSelector((state) => state.customization)
const ref = useRef(null)
const { reactFlowInstance } = useContext(flowContext)
const updateNodeInternals = useUpdateNodeInternals()
@ -166,6 +169,14 @@ const NodeInputHandler = ({ inputAnchor, inputParam, data, disabled = false, isA
onDialogConfirm={(newValue, inputParamName) => onExpandDialogSave(newValue, inputParamName)}
/>
)}
{inputParam.type === 'json' && (
<JsonEditorInput
disabled={disabled}
onChange={(newValue) => (data.inputs[inputParam.name] = newValue)}
value={data.inputs[inputParam.name] ?? inputParam.default ?? ''}
isDarkMode={customization.isDarkMode}
/>
)}
{inputParam.type === 'options' && (
<Dropdown
disabled={disabled}