Added meilisearch retriever component (#2824)
* added meilisearch retriever and credentials.ts * added semantic ratio * removed a TODO implementatio * meilisearch component implemented with searching and upsert functionality (#3) meilisearch retriever component created , searching for an existing index and upserting a new or existing index has been implemented , component utilizes langchain and meilisearch vector search Reviewed-on: https://git.beyond.cc/ntg/flowise/pulls/3 Reviewed-by: mohamed1999akram <mohamed1999akram@gmail.com> * added CI/CD for ntg branch, added proper dockerfile for flowise-ntg (#4) Reviewed-on: https://git.beyond.cc/ntg/flowise/pulls/4 Reviewed-by: mohammad <mohammad@noreply.git.beyond.cc> * modified os version , removed linting errors , removed cypress github actions (#5) added --no-lock-file flag to pass CICD , made the runner run on debian and not ubuntu , removed code that caused warnings to pass linting Reviewed-on: https://git.beyond.cc/ntg/flowise/pulls/5 Reviewed-by: omaryassery <omarryassser@gmail.com> * removed unnecessary QEMU install action (#6) Reviewed-on: https://git.beyond.cc/ntg/flowise/pulls/6 Reviewed-by: omaryassery <omarryassser@gmail.com> * removed cypress installation and linting from dockerfile (#7) Reviewed-on: https://git.beyond.cc/ntg/flowise/pulls/7 Reviewed-by: isameh <isameh@ntgclarity.com> * dockerfile-ntg-modification (#9) dockerfile-ntg modified to copy all working directory before calling pnpm install Reviewed-on: https://git.beyond.cc/ntg/flowise/pulls/9 Reviewed-by: isameh <isameh@ntgclarity.com> * resolved comments, reverted CI/CD * add test docker build yml back * moved meilisearch to vector store folder * Update Meilisearch.ts --------- Co-authored-by: Henry <hzj94@hotmail.com> Co-authored-by: Henry Heng <henryheng@flowiseai.com>
This commit is contained in:
parent
0a36aa7ef4
commit
d5153c3840
|
|
@ -0,0 +1,32 @@
|
||||||
|
import { INodeParams, INodeCredential } from '../src/Interface'
|
||||||
|
|
||||||
|
class MeilisearchApi implements INodeCredential {
|
||||||
|
label: string
|
||||||
|
name: string
|
||||||
|
version: number
|
||||||
|
description: string
|
||||||
|
inputs: INodeParams[]
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.label = 'Meilisearch API'
|
||||||
|
this.name = 'meilisearchApi'
|
||||||
|
this.version = 1.0
|
||||||
|
this.description =
|
||||||
|
'Refer to <a target="_blank" href="https://meilisearch.com">official guide</a> on how to get an API Key, you need a search API KEY for basic searching functionality, admin API KEY is optional but needed for upsert functionality '
|
||||||
|
this.inputs = [
|
||||||
|
{
|
||||||
|
label: 'Meilisearch Search API Key',
|
||||||
|
name: 'meilisearchSearchApiKey',
|
||||||
|
type: 'password'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Meilisearch Admin API Key',
|
||||||
|
name: 'meilisearchAdminApiKey',
|
||||||
|
type: 'password',
|
||||||
|
optional: true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = { credClass: MeilisearchApi }
|
||||||
Binary file not shown.
|
After Width: | Height: | Size: 8.6 KiB |
|
|
@ -0,0 +1,174 @@
|
||||||
|
import { getCredentialData, getCredentialParam } from '../../../src'
|
||||||
|
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
|
||||||
|
import { Meilisearch } from 'meilisearch'
|
||||||
|
import { MeilisearchRetriever } from './core'
|
||||||
|
import { flatten } from 'lodash'
|
||||||
|
import { Document } from '@langchain/core/documents'
|
||||||
|
import { v4 as uuidv4 } from 'uuid'
|
||||||
|
import { Embeddings } from '@langchain/core/embeddings'
|
||||||
|
|
||||||
|
class MeilisearchRetriever_node implements INode {
|
||||||
|
label: string
|
||||||
|
name: string
|
||||||
|
version: number
|
||||||
|
description: string
|
||||||
|
type: string
|
||||||
|
icon: string
|
||||||
|
category: string
|
||||||
|
baseClasses: string[]
|
||||||
|
inputs: INodeParams[]
|
||||||
|
credential: INodeParams
|
||||||
|
badge: string
|
||||||
|
outputs: INodeOutputsValue[]
|
||||||
|
author?: string
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.label = 'Meilisearch'
|
||||||
|
this.name = 'meilisearch'
|
||||||
|
this.version = 1.0
|
||||||
|
this.type = 'Meilisearch'
|
||||||
|
this.icon = 'Meilisearch.png'
|
||||||
|
this.category = 'Vector Stores'
|
||||||
|
this.badge = 'NEW'
|
||||||
|
this.description = `Upsert embedded data and perform similarity search upon query using Meilisearch hybrid search functionality`
|
||||||
|
this.baseClasses = ['BaseRetriever']
|
||||||
|
this.credential = {
|
||||||
|
label: 'Connect Credential',
|
||||||
|
name: 'credential',
|
||||||
|
type: 'credential',
|
||||||
|
credentialNames: ['meilisearchApi']
|
||||||
|
}
|
||||||
|
this.inputs = [
|
||||||
|
{
|
||||||
|
label: 'Document',
|
||||||
|
name: 'document',
|
||||||
|
type: 'Document',
|
||||||
|
list: true,
|
||||||
|
optional: true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Embeddings',
|
||||||
|
name: 'embeddings',
|
||||||
|
type: 'Embeddings'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Host',
|
||||||
|
name: 'host',
|
||||||
|
type: 'string',
|
||||||
|
description: 'This is the URL for the desired Meilisearch instance'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Index Uid',
|
||||||
|
name: 'indexUid',
|
||||||
|
type: 'string',
|
||||||
|
description: 'UID for the index to answer from'
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Top K',
|
||||||
|
name: 'K',
|
||||||
|
type: 'number',
|
||||||
|
description: 'number of top searches to return as context',
|
||||||
|
additionalParams: true,
|
||||||
|
optional: true
|
||||||
|
},
|
||||||
|
{
|
||||||
|
label: 'Semantic Ratio',
|
||||||
|
name: 'semanticRatio',
|
||||||
|
type: 'number',
|
||||||
|
description: 'percentage of sematic reasoning in meilisearch hybrid search',
|
||||||
|
additionalParams: true,
|
||||||
|
optional: true
|
||||||
|
}
|
||||||
|
]
|
||||||
|
this.outputs = [
|
||||||
|
{
|
||||||
|
label: 'Meilisearch Retriever',
|
||||||
|
name: 'MeilisearchRetriever',
|
||||||
|
description: 'retrieve answers',
|
||||||
|
baseClasses: this.baseClasses
|
||||||
|
}
|
||||||
|
]
|
||||||
|
this.outputs = [
|
||||||
|
{
|
||||||
|
label: 'Meilisearch Retriever',
|
||||||
|
name: 'retriever',
|
||||||
|
baseClasses: this.baseClasses
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
//@ts-ignore
|
||||||
|
vectorStoreMethods = {
|
||||||
|
async upsert(nodeData: INodeData, options: ICommonObject): Promise<any> {
|
||||||
|
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||||
|
const meilisearchAdminApiKey = getCredentialParam('meilisearchAdminApiKey', credentialData, nodeData)
|
||||||
|
const docs = nodeData.inputs?.document as Document[]
|
||||||
|
const host = nodeData.inputs?.host as string
|
||||||
|
const indexUid = nodeData.inputs?.indexUid as string
|
||||||
|
const embeddings = nodeData.inputs?.embeddings as Embeddings
|
||||||
|
let embeddingDimension: number = 384
|
||||||
|
const client = new Meilisearch({
|
||||||
|
host: host,
|
||||||
|
apiKey: meilisearchAdminApiKey
|
||||||
|
})
|
||||||
|
const flattenDocs = docs && docs.length ? flatten(docs) : []
|
||||||
|
const finalDocs = []
|
||||||
|
for (let i = 0; i < flattenDocs.length; i += 1) {
|
||||||
|
if (flattenDocs[i] && flattenDocs[i].pageContent) {
|
||||||
|
const uniqueId = uuidv4()
|
||||||
|
const { pageContent, metadata } = flattenDocs[i]
|
||||||
|
const docEmbedding = await embeddings.embedQuery(pageContent)
|
||||||
|
embeddingDimension = docEmbedding.length
|
||||||
|
const documentForIndexing = {
|
||||||
|
pageContent,
|
||||||
|
metadata,
|
||||||
|
objectID: uniqueId,
|
||||||
|
_vectors: {
|
||||||
|
ollama: {
|
||||||
|
embeddings: docEmbedding,
|
||||||
|
regenerate: false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
finalDocs.push(documentForIndexing)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let index: any
|
||||||
|
try {
|
||||||
|
index = await client.getIndex(indexUid)
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error fetching index:', error)
|
||||||
|
await client.createIndex(indexUid, { primaryKey: 'objectID' })
|
||||||
|
} finally {
|
||||||
|
index = await client.getIndex(indexUid)
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
await index.updateSettings({
|
||||||
|
embedders: {
|
||||||
|
ollama: {
|
||||||
|
source: 'userProvided',
|
||||||
|
dimensions: embeddingDimension
|
||||||
|
}
|
||||||
|
}
|
||||||
|
})
|
||||||
|
await index.addDocuments(finalDocs)
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error occurred while adding documents:', error)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||||
|
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
|
||||||
|
const meilisearchSearchApiKey = getCredentialParam('meilisearchSearchApiKey', credentialData, nodeData)
|
||||||
|
const host = nodeData.inputs?.host as string
|
||||||
|
const indexUid = nodeData.inputs?.indexUid as string
|
||||||
|
const K = nodeData.inputs?.K as string
|
||||||
|
const semanticRatio = nodeData.inputs?.semanticRatio as string
|
||||||
|
const embeddings = nodeData.inputs?.embeddings as Embeddings
|
||||||
|
|
||||||
|
const hybridsearchretriever = new MeilisearchRetriever(host, meilisearchSearchApiKey, indexUid, K, semanticRatio, embeddings)
|
||||||
|
return hybridsearchretriever
|
||||||
|
}
|
||||||
|
}
|
||||||
|
module.exports = { nodeClass: MeilisearchRetriever_node }
|
||||||
|
|
@ -0,0 +1,92 @@
|
||||||
|
import { BaseRetriever, type BaseRetrieverInput } from '@langchain/core/retrievers'
|
||||||
|
import { Document } from '@langchain/core/documents'
|
||||||
|
import { Meilisearch } from 'meilisearch'
|
||||||
|
import { Embeddings } from '@langchain/core/embeddings'
|
||||||
|
|
||||||
|
export interface CustomRetrieverInput extends BaseRetrieverInput {}
|
||||||
|
|
||||||
|
export class MeilisearchRetriever extends BaseRetriever {
|
||||||
|
lc_namespace = ['langchain', 'retrievers']
|
||||||
|
private readonly meilisearchSearchApiKey: any
|
||||||
|
private readonly host: any
|
||||||
|
private indexUid: string
|
||||||
|
private K: string
|
||||||
|
private semanticRatio: string
|
||||||
|
private embeddings: Embeddings
|
||||||
|
constructor(
|
||||||
|
host: string,
|
||||||
|
meilisearchSearchApiKey: any,
|
||||||
|
indexUid: string,
|
||||||
|
K: string,
|
||||||
|
semanticRatio: string,
|
||||||
|
embeddings: Embeddings,
|
||||||
|
fields?: CustomRetrieverInput
|
||||||
|
) {
|
||||||
|
super(fields)
|
||||||
|
this.meilisearchSearchApiKey = meilisearchSearchApiKey
|
||||||
|
this.host = host
|
||||||
|
this.indexUid = indexUid
|
||||||
|
this.embeddings = embeddings
|
||||||
|
|
||||||
|
if (semanticRatio == '') {
|
||||||
|
this.semanticRatio = '0.5'
|
||||||
|
} else {
|
||||||
|
let semanticRatio_Float = parseFloat(semanticRatio)
|
||||||
|
if (semanticRatio_Float > 1.0) {
|
||||||
|
this.semanticRatio = '1.0'
|
||||||
|
} else if (semanticRatio_Float < 0.0) {
|
||||||
|
this.semanticRatio = '0.0'
|
||||||
|
} else {
|
||||||
|
this.semanticRatio = semanticRatio
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (K == '') {
|
||||||
|
K = '4'
|
||||||
|
}
|
||||||
|
this.K = K
|
||||||
|
}
|
||||||
|
|
||||||
|
async _getRelevantDocuments(query: string): Promise<Document[]> {
|
||||||
|
// Pass `runManager?.getChild()` when invoking internal runnables to enable tracing
|
||||||
|
// const additionalDocs = await someOtherRunnable.invoke(params, runManager?.getChild())
|
||||||
|
const client = new Meilisearch({
|
||||||
|
host: this.host,
|
||||||
|
apiKey: this.meilisearchSearchApiKey
|
||||||
|
})
|
||||||
|
|
||||||
|
const index = await client.index(this.indexUid)
|
||||||
|
const questionEmbedding = await this.embeddings.embedQuery(query)
|
||||||
|
// Perform the search
|
||||||
|
const searchResults = await index.search(query, {
|
||||||
|
vector: questionEmbedding,
|
||||||
|
limit: parseInt(this.K), // Optional: Limit the number of results
|
||||||
|
attributesToRetrieve: ['*'], // Optional: Specify which fields to retrieve
|
||||||
|
hybrid: {
|
||||||
|
semanticRatio: parseFloat(this.semanticRatio),
|
||||||
|
embedder: 'ollama'
|
||||||
|
}
|
||||||
|
})
|
||||||
|
const hits = searchResults.hits
|
||||||
|
let documents: Document[] = [
|
||||||
|
new Document({
|
||||||
|
pageContent: 'mock page',
|
||||||
|
metadata: {}
|
||||||
|
})
|
||||||
|
]
|
||||||
|
try {
|
||||||
|
documents = hits.map(
|
||||||
|
(hit: any) =>
|
||||||
|
new Document({
|
||||||
|
pageContent: hit.pageContent,
|
||||||
|
metadata: {
|
||||||
|
objectID: hit.objectID
|
||||||
|
}
|
||||||
|
})
|
||||||
|
)
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Error occurred while adding documents:', e)
|
||||||
|
}
|
||||||
|
return documents
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -94,6 +94,7 @@
|
||||||
"lodash": "^4.17.21",
|
"lodash": "^4.17.21",
|
||||||
"lunary": "^0.6.16",
|
"lunary": "^0.6.16",
|
||||||
"mammoth": "^1.5.1",
|
"mammoth": "^1.5.1",
|
||||||
|
"meilisearch": "^0.41.0",
|
||||||
"moment": "^2.29.3",
|
"moment": "^2.29.3",
|
||||||
"mongodb": "6.3.0",
|
"mongodb": "6.3.0",
|
||||||
"mysql2": "^3.9.2",
|
"mysql2": "^3.9.2",
|
||||||
|
|
|
||||||
|
|
@ -304,6 +304,9 @@ importers:
|
||||||
mammoth:
|
mammoth:
|
||||||
specifier: ^1.5.1
|
specifier: ^1.5.1
|
||||||
version: 1.7.0
|
version: 1.7.0
|
||||||
|
meilisearch:
|
||||||
|
specifier: ^0.41.0
|
||||||
|
version: 0.41.0(encoding@0.1.13)
|
||||||
moment:
|
moment:
|
||||||
specifier: ^2.29.3
|
specifier: ^2.29.3
|
||||||
version: 2.30.1
|
version: 2.30.1
|
||||||
|
|
@ -11698,6 +11701,9 @@ packages:
|
||||||
resolution: { integrity: sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ== }
|
resolution: { integrity: sha512-dq+qelQ9akHpcOl/gUVRTxVIOkAJ1wR3QAvb4RsVjS8oVoFjDGTc679wJYmUmknUF5HwMLOgb5O+a3KxfWapPQ== }
|
||||||
engines: { node: '>= 0.6' }
|
engines: { node: '>= 0.6' }
|
||||||
|
|
||||||
|
meilisearch@0.41.0:
|
||||||
|
resolution: { integrity: sha512-5KcGLxEXD7E+uNO7R68rCbGSHgCqeM3Q3RFFLSsN7ZrIgr8HPDXVAIlP4LHggAZfk0FkSzo8VSXifHCwa2k80g== }
|
||||||
|
|
||||||
mem-fs-editor@9.7.0:
|
mem-fs-editor@9.7.0:
|
||||||
resolution: { integrity: sha512-ReB3YD24GNykmu4WeUL/FDIQtkoyGB6zfJv60yfCo3QjKeimNcTqv2FT83bP0ccs6uu+sm5zyoBlspAzigmsdg== }
|
resolution: { integrity: sha512-ReB3YD24GNykmu4WeUL/FDIQtkoyGB6zfJv60yfCo3QjKeimNcTqv2FT83bP0ccs6uu+sm5zyoBlspAzigmsdg== }
|
||||||
engines: { node: '>=12.10.0' }
|
engines: { node: '>=12.10.0' }
|
||||||
|
|
@ -31754,6 +31760,12 @@ snapshots:
|
||||||
|
|
||||||
media-typer@0.3.0: {}
|
media-typer@0.3.0: {}
|
||||||
|
|
||||||
|
meilisearch@0.41.0(encoding@0.1.13):
|
||||||
|
dependencies:
|
||||||
|
cross-fetch: 3.1.8(encoding@0.1.13)
|
||||||
|
transitivePeerDependencies:
|
||||||
|
- encoding
|
||||||
|
|
||||||
mem-fs-editor@9.7.0(mem-fs@2.3.0):
|
mem-fs-editor@9.7.0(mem-fs@2.3.0):
|
||||||
dependencies:
|
dependencies:
|
||||||
binaryextensions: 4.19.0
|
binaryextensions: 4.19.0
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue