Feature/Add support for state-based metadata filter to Retriever Tool (#3501)
* Added support for state-based metadata filter to Retriever Tool * Update RetrieverTool.ts --------- Co-authored-by: Henry Heng <henryheng@flowiseai.com>
This commit is contained in:
parent
38ddbd8283
commit
16ceed1bda
|
|
@ -1,11 +1,121 @@
|
||||||
import { z } from 'zod'
|
import { z } from 'zod'
|
||||||
import { DynamicStructuredTool } from '@langchain/core/tools'
|
import { CallbackManager, CallbackManagerForToolRun, Callbacks, parseCallbackConfigArg } from '@langchain/core/callbacks/manager'
|
||||||
import { CallbackManagerForToolRun } from '@langchain/core/callbacks/manager'
|
import { BaseDynamicToolInput, DynamicTool, StructuredTool, ToolInputParsingException } from '@langchain/core/tools'
|
||||||
import { DynamicTool } from '@langchain/core/tools'
|
|
||||||
import { BaseRetriever } from '@langchain/core/retrievers'
|
import { BaseRetriever } from '@langchain/core/retrievers'
|
||||||
import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||||
import { getBaseClasses } from '../../../src/utils'
|
import { getBaseClasses } from '../../../src/utils'
|
||||||
import { SOURCE_DOCUMENTS_PREFIX } from '../../../src/agents'
|
import { SOURCE_DOCUMENTS_PREFIX } from '../../../src/agents'
|
||||||
|
import { RunnableConfig } from '@langchain/core/runnables'
|
||||||
|
import { customGet } from '../../sequentialagents/commonUtils'
|
||||||
|
import { VectorStoreRetriever } from '@langchain/core/vectorstores'
|
||||||
|
|
||||||
|
const howToUse = `Add additional filters to vector store. You can also filter with flow config, including the current "state":
|
||||||
|
- \`$flow.sessionId\`
|
||||||
|
- \`$flow.chatId\`
|
||||||
|
- \`$flow.chatflowId\`
|
||||||
|
- \`$flow.input\`
|
||||||
|
- \`$flow.state\`
|
||||||
|
`
|
||||||
|
|
||||||
|
type ZodObjectAny = z.ZodObject<any, any, any, any>
|
||||||
|
type IFlowConfig = { sessionId?: string; chatId?: string; input?: string; state?: ICommonObject }
|
||||||
|
interface DynamicStructuredToolInput<T extends z.ZodObject<any, any, any, any> = z.ZodObject<any, any, any, any>>
|
||||||
|
extends BaseDynamicToolInput {
|
||||||
|
func?: (input: z.infer<T>, runManager?: CallbackManagerForToolRun, flowConfig?: IFlowConfig) => Promise<string>
|
||||||
|
schema: T
|
||||||
|
}
|
||||||
|
|
||||||
|
class DynamicStructuredTool<T extends z.ZodObject<any, any, any, any> = z.ZodObject<any, any, any, any>> extends StructuredTool<
|
||||||
|
T extends ZodObjectAny ? T : ZodObjectAny
|
||||||
|
> {
|
||||||
|
static lc_name() {
|
||||||
|
return 'DynamicStructuredTool'
|
||||||
|
}
|
||||||
|
|
||||||
|
name: string
|
||||||
|
|
||||||
|
description: string
|
||||||
|
|
||||||
|
func: DynamicStructuredToolInput['func']
|
||||||
|
|
||||||
|
// @ts-ignore
|
||||||
|
schema: T
|
||||||
|
|
||||||
|
private flowObj: any
|
||||||
|
|
||||||
|
constructor(fields: DynamicStructuredToolInput<T>) {
|
||||||
|
super(fields)
|
||||||
|
this.name = fields.name
|
||||||
|
this.description = fields.description
|
||||||
|
this.func = fields.func
|
||||||
|
this.returnDirect = fields.returnDirect ?? this.returnDirect
|
||||||
|
this.schema = fields.schema
|
||||||
|
}
|
||||||
|
|
||||||
|
async call(arg: any, configArg?: RunnableConfig | Callbacks, tags?: string[], flowConfig?: IFlowConfig): Promise<string> {
|
||||||
|
const config = parseCallbackConfigArg(configArg)
|
||||||
|
if (config.runName === undefined) {
|
||||||
|
config.runName = this.name
|
||||||
|
}
|
||||||
|
let parsed
|
||||||
|
try {
|
||||||
|
parsed = await this.schema.parseAsync(arg)
|
||||||
|
} catch (e) {
|
||||||
|
throw new ToolInputParsingException(`Received tool input did not match expected schema`, JSON.stringify(arg))
|
||||||
|
}
|
||||||
|
const callbackManager_ = await CallbackManager.configure(
|
||||||
|
config.callbacks,
|
||||||
|
this.callbacks,
|
||||||
|
config.tags || tags,
|
||||||
|
this.tags,
|
||||||
|
config.metadata,
|
||||||
|
this.metadata,
|
||||||
|
{ verbose: this.verbose }
|
||||||
|
)
|
||||||
|
const runManager = await callbackManager_?.handleToolStart(
|
||||||
|
this.toJSON(),
|
||||||
|
typeof parsed === 'string' ? parsed : JSON.stringify(parsed),
|
||||||
|
undefined,
|
||||||
|
undefined,
|
||||||
|
undefined,
|
||||||
|
undefined,
|
||||||
|
config.runName
|
||||||
|
)
|
||||||
|
let result
|
||||||
|
try {
|
||||||
|
result = await this._call(parsed, runManager, flowConfig)
|
||||||
|
} catch (e) {
|
||||||
|
await runManager?.handleToolError(e)
|
||||||
|
throw e
|
||||||
|
}
|
||||||
|
if (result && typeof result !== 'string') {
|
||||||
|
result = JSON.stringify(result)
|
||||||
|
}
|
||||||
|
await runManager?.handleToolEnd(result)
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// @ts-ignore
|
||||||
|
protected _call(arg: any, runManager?: CallbackManagerForToolRun, flowConfig?: IFlowConfig): Promise<string> {
|
||||||
|
let flowConfiguration: ICommonObject = {}
|
||||||
|
if (typeof arg === 'object' && Object.keys(arg).length) {
|
||||||
|
for (const item in arg) {
|
||||||
|
flowConfiguration[`$${item}`] = arg[item]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// inject flow properties
|
||||||
|
if (this.flowObj) {
|
||||||
|
flowConfiguration['$flow'] = { ...this.flowObj, ...flowConfig }
|
||||||
|
}
|
||||||
|
|
||||||
|
return this.func!(arg as any, runManager, flowConfiguration)
|
||||||
|
}
|
||||||
|
|
||||||
|
setFlowObject(flow: any) {
|
||||||
|
this.flowObj = flow
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
class Retriever_Tools implements INode {
|
class Retriever_Tools implements INode {
|
||||||
label: string
|
label: string
|
||||||
|
|
@ -22,7 +132,7 @@ class Retriever_Tools implements INode {
|
||||||
constructor() {
|
constructor() {
|
||||||
this.label = 'Retriever Tool'
|
this.label = 'Retriever Tool'
|
||||||
this.name = 'retrieverTool'
|
this.name = 'retrieverTool'
|
||||||
this.version = 2.0
|
this.version = 3.0
|
||||||
this.type = 'RetrieverTool'
|
this.type = 'RetrieverTool'
|
||||||
this.icon = 'retrievertool.svg'
|
this.icon = 'retrievertool.svg'
|
||||||
this.category = 'Tools'
|
this.category = 'Tools'
|
||||||
|
|
@ -53,23 +163,55 @@ class Retriever_Tools implements INode {
|
||||||
name: 'returnSourceDocuments',
|
name: 'returnSourceDocuments',
|
||||||
type: 'boolean',
|
type: 'boolean',
|
||||||
optional: true
|
optional: true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Additional Metadata Filter',
|
||||||
|
name: 'retrieverToolMetadataFilter',
|
||||||
|
type: 'json',
|
||||||
|
description: 'Add additional metadata filter on top of the existing filter from vector store',
|
||||||
|
optional: true,
|
||||||
|
additionalParams: true,
|
||||||
|
hint: {
|
||||||
|
label: 'What can you filter?',
|
||||||
|
value: howToUse
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
||||||
async init(nodeData: INodeData): Promise<any> {
|
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||||
const name = nodeData.inputs?.name as string
|
const name = nodeData.inputs?.name as string
|
||||||
const description = nodeData.inputs?.description as string
|
const description = nodeData.inputs?.description as string
|
||||||
const retriever = nodeData.inputs?.retriever as BaseRetriever
|
const retriever = nodeData.inputs?.retriever as BaseRetriever
|
||||||
const returnSourceDocuments = nodeData.inputs?.returnSourceDocuments as boolean
|
const returnSourceDocuments = nodeData.inputs?.returnSourceDocuments as boolean
|
||||||
|
const retrieverToolMetadataFilter = nodeData.inputs?.retrieverToolMetadataFilter
|
||||||
|
|
||||||
const input = {
|
const input = {
|
||||||
name,
|
name,
|
||||||
description
|
description
|
||||||
}
|
}
|
||||||
|
|
||||||
const func = async ({ input }: { input: string }, runManager?: CallbackManagerForToolRun) => {
|
const flow = { chatflowId: options.chatflowid }
|
||||||
const docs = await retriever.getRelevantDocuments(input, runManager?.getChild('retriever'))
|
|
||||||
|
const func = async ({ input }: { input: string }, _?: CallbackManagerForToolRun, flowConfig?: IFlowConfig) => {
|
||||||
|
if (retrieverToolMetadataFilter) {
|
||||||
|
const flowObj = flowConfig
|
||||||
|
|
||||||
|
const metadatafilter =
|
||||||
|
typeof retrieverToolMetadataFilter === 'object' ? retrieverToolMetadataFilter : JSON.parse(retrieverToolMetadataFilter)
|
||||||
|
const newMetadataFilter: any = {}
|
||||||
|
for (const key in metadatafilter) {
|
||||||
|
let value = metadatafilter[key]
|
||||||
|
if (value.startsWith('$flow')) {
|
||||||
|
value = customGet(flowObj, value)
|
||||||
|
}
|
||||||
|
newMetadataFilter[key] = value
|
||||||
|
}
|
||||||
|
|
||||||
|
const vectorStore = (retriever as VectorStoreRetriever<any>).vectorStore
|
||||||
|
vectorStore.filter = newMetadataFilter
|
||||||
|
}
|
||||||
|
const docs = await retriever.invoke(input)
|
||||||
const content = docs.map((doc) => doc.pageContent).join('\n\n')
|
const content = docs.map((doc) => doc.pageContent).join('\n\n')
|
||||||
const sourceDocuments = JSON.stringify(docs)
|
const sourceDocuments = JSON.stringify(docs)
|
||||||
return returnSourceDocuments ? content + SOURCE_DOCUMENTS_PREFIX + sourceDocuments : content
|
return returnSourceDocuments ? content + SOURCE_DOCUMENTS_PREFIX + sourceDocuments : content
|
||||||
|
|
@ -80,6 +222,7 @@ class Retriever_Tools implements INode {
|
||||||
}) as any
|
}) as any
|
||||||
|
|
||||||
const tool = new DynamicStructuredTool({ ...input, func, schema })
|
const tool = new DynamicStructuredTool({ ...input, func, schema })
|
||||||
|
tool.setFlowObject(flow)
|
||||||
return tool
|
return tool
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue