From f108c62acf009d4b0ef26690cdffe0f18804b6ea Mon Sep 17 00:00:00 2001 From: vinodkiran Date: Thu, 28 Sep 2023 10:31:40 +0530 Subject: [PATCH] Support for ElasticSearch as a vector store --- .../ElasticsearchAPI.credential.ts | 31 ++++ .../ElectricsearchUserPassword.credential.ts | 31 ++++ .../Elasticsearch/Elasticsearch_Existing.ts | 111 ++++++++++++ .../Elasticsearch/Elasticsearch_Upsert.ts | 165 ++++++++++++++++++ .../Elasticsearch/elasticsearch.png | Bin 0 -> 3719 bytes packages/components/package.json | 1 + 6 files changed, 339 insertions(+) create mode 100644 packages/components/credentials/ElasticsearchAPI.credential.ts create mode 100644 packages/components/credentials/ElectricsearchUserPassword.credential.ts create mode 100644 packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Existing.ts create mode 100644 packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Upsert.ts create mode 100644 packages/components/nodes/vectorstores/Elasticsearch/elasticsearch.png diff --git a/packages/components/credentials/ElasticsearchAPI.credential.ts b/packages/components/credentials/ElasticsearchAPI.credential.ts new file mode 100644 index 000000000..e377243d4 --- /dev/null +++ b/packages/components/credentials/ElasticsearchAPI.credential.ts @@ -0,0 +1,31 @@ +import { INodeParams, INodeCredential } from '../src/Interface' + +class ElectricsearchAPI implements INodeCredential { + label: string + name: string + version: number + description: string + inputs: INodeParams[] + + constructor() { + this.label = 'Elasticsearch API' + this.name = 'elasticsearchApi' + this.version = 1.0 + this.description = + 'Refer to official guide on how to get an API Key from ElasticSearch' + this.inputs = [ + { + label: 'Elasticsearch Endpoint', + name: 'endpoint', + type: 'string' + }, + { + label: 'Elasticsearch API ID', + name: 'apiKey', + type: 'password' + } + ] + } +} + +module.exports = { credClass: ElectricsearchAPI } diff --git a/packages/components/credentials/ElectricsearchUserPassword.credential.ts b/packages/components/credentials/ElectricsearchUserPassword.credential.ts new file mode 100644 index 000000000..2dd889373 --- /dev/null +++ b/packages/components/credentials/ElectricsearchUserPassword.credential.ts @@ -0,0 +1,31 @@ +import { INodeParams, INodeCredential } from '../src/Interface' + +class ElasticSearchUserPassword implements INodeCredential { + label: string + name: string + version: number + description: string + inputs: INodeParams[] + + constructor() { + this.label = 'ElasticSearch User Password' + this.name = 'elasticSearchUserPassword' + this.version = 1.0 + this.description = + 'Refer to official guide on how to get User Password from ElasticSearch' + this.inputs = [ + { + label: 'ElasticSearch User', + name: 'elasticSearchUser', + type: 'string' + }, + { + label: 'ElasticSearch Password', + name: 'elasticSearchPassword', + type: 'password' + } + ] + } +} + +module.exports = { credClass: ElasticSearchUserPassword } diff --git a/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Existing.ts b/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Existing.ts new file mode 100644 index 000000000..6e785c857 --- /dev/null +++ b/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Existing.ts @@ -0,0 +1,111 @@ +import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface' +import { Embeddings } from 'langchain/embeddings/base' +import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src' + +import { Client, ClientOptions } from '@elastic/elasticsearch' +import { ElasticClientArgs, ElasticVectorSearch } from 'langchain/vectorstores/elasticsearch' + +class ElasicsearchExisting_VectorStores implements INode { + label: string + name: string + version: number + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + credential: INodeParams + outputs: INodeOutputsValue[] + + constructor() { + this.label = 'Elasticsearch Load Existing Index' + this.name = 'ElasticsearchIndex' + this.version = 1.0 + this.type = 'Elasticsearch' + this.icon = 'elasticsearch.png' + this.category = 'Vector Stores' + this.description = 'Load existing index from Elasticsearch (i.e: Document has been upserted)' + this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever'] + this.credential = { + label: 'Connect Credential', + name: 'credential', + type: 'credential', + credentialNames: ['elasticsearchApi', 'elasticSearchUserPassword'] + } + this.inputs = [ + { + label: 'Embeddings', + name: 'embeddings', + type: 'Embeddings' + }, + { + label: 'Index Name', + name: 'indexName', + placeholder: '', + type: 'string' + }, + { + label: 'Top K', + name: 'topK', + description: 'Number of top results to fetch. Default to 4', + placeholder: '4', + type: 'number', + additionalParams: true, + optional: true + } + ] + this.outputs = [ + { + label: 'Elasticsearch Retriever', + name: 'retriever', + baseClasses: this.baseClasses + }, + { + label: 'Elasticsearch Vector Store', + name: 'vectorStore', + baseClasses: [this.type, ...getBaseClasses(ElasticVectorSearch)] + } + ] + } + + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { + const credentialData = await getCredentialData(nodeData.credential ?? '', options) + const endPoint = getCredentialParam('endpoint', credentialData, nodeData) + const apiKey = getCredentialParam('apiKey', credentialData, nodeData) + const indexName = nodeData.inputs?.indexName as string + const embeddings = nodeData.inputs?.embeddings as Embeddings + const topK = nodeData.inputs?.topK as string + + const k = topK ? parseFloat(topK) : 4 + const output = nodeData.outputs?.output as string + + // eslint-disable-next-line no-console + console.log('EndPoint:: ' + endPoint + ', APIKey:: ' + apiKey + ', Index:: ' + indexName) + + const elasticSearchClientOptions: ClientOptions = { + node: endPoint, + auth: { + apiKey: apiKey + } + } + + const elasticSearchClientArgs: ElasticClientArgs = { + client: new Client(elasticSearchClientOptions), + indexName: indexName + } + + const vectorStore = await ElasticVectorSearch.fromExistingIndex(embeddings, elasticSearchClientArgs) + // eslint-disable-next-line no-console + console.log('vectorStore ::' + vectorStore._vectorstoreType()) + if (output === 'retriever') { + return vectorStore.asRetriever(k) + } else if (output === 'vectorStore') { + ;(vectorStore as any).k = k + return vectorStore + } + return vectorStore + } +} + +module.exports = { nodeClass: ElasicsearchExisting_VectorStores } diff --git a/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Upsert.ts b/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Upsert.ts new file mode 100644 index 000000000..5a0065d54 --- /dev/null +++ b/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch_Upsert.ts @@ -0,0 +1,165 @@ +import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface' +import { Embeddings } from 'langchain/embeddings/base' +import { Document } from 'langchain/document' +import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src' + +import { Client, ClientOptions } from '@elastic/elasticsearch' +import { ElasticClientArgs, ElasticVectorSearch } from 'langchain/vectorstores/elasticsearch' +import { flatten } from 'lodash' + +class ElasicsearchUpsert_VectorStores implements INode { + label: string + name: string + version: number + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + credential: INodeParams + outputs: INodeOutputsValue[] + + constructor() { + this.label = 'Elasticsearch Upsert Document' + this.name = 'ElasticsearchUpsert' + this.version = 1.0 + this.type = 'Elasticsearch' + this.icon = 'elasticsearch.png' + this.category = 'Vector Stores' + this.description = 'Upsert documents to Elasticsearch' + this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever'] + this.credential = { + label: 'Connect Credential', + name: 'credential', + type: 'credential', + credentialNames: ['elasticsearchApi', 'elasticSearchUserPassword'] + } + this.inputs = [ + { + label: 'Document', + name: 'document', + type: 'Document', + list: true + }, + { + label: 'Embeddings', + name: 'embeddings', + type: 'Embeddings' + }, + { + label: 'Index Name', + name: 'indexName', + placeholder: '', + type: 'string' + }, + { + label: 'Top K', + name: 'topK', + description: 'Number of top results to fetch. Default to 4', + placeholder: '4', + type: 'number', + additionalParams: true, + optional: true + }, + { + label: 'Similarity', + name: 'similarity', + description: 'Similarity measure used in Elasticsearch.', + type: 'options', + default: 'l2_norm', + options: [ + { + label: 'l2_norm', + name: 'l2_norm' + }, + { + label: 'dot_product', + name: 'dot_product' + }, + { + label: 'cosine', + name: 'cosine' + } + ], + additionalParams: true, + optional: true + } + ] + this.outputs = [ + { + label: 'Elasticsearch Retriever', + name: 'retriever', + baseClasses: this.baseClasses + }, + { + label: 'Elasticsearch Vector Store', + name: 'vectorStore', + baseClasses: [this.type, ...getBaseClasses(ElasticVectorSearch)] + } + ] + } + + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { + const credentialData = await getCredentialData(nodeData.credential ?? '', options) + const endPoint = getCredentialParam('endpoint', credentialData, nodeData) + const apiKey = getCredentialParam('apiKey', credentialData, nodeData) + const docs = nodeData.inputs?.document as Document[] + const indexName = nodeData.inputs?.indexName as string + const embeddings = nodeData.inputs?.embeddings as Embeddings + const topK = nodeData.inputs?.topK as string + const k = topK ? parseFloat(topK) : 4 + const output = nodeData.outputs?.output as string + const similarityMeasure = nodeData.inputs?.similarityMeasure as string + + // eslint-disable-next-line no-console + console.log('EndPoint:: ' + endPoint + ', APIKey:: ' + apiKey + ', Index:: ' + indexName) + + const elasticSearchClientOptions: ClientOptions = { + node: endPoint, + auth: { + apiKey: apiKey + } + } + let vectorSearchOptions = {} + switch (similarityMeasure) { + case 'dot_product': + vectorSearchOptions = { + similarity: 'dot_product' + } + break + case 'cosine': + vectorSearchOptions = { + similarity: 'cosine' + } + break + default: + vectorSearchOptions = { + similarity: 'l2_norm' + } + } + const elasticSearchClientArgs: ElasticClientArgs = { + client: new Client(elasticSearchClientOptions), + indexName: indexName, + vectorSearchOptions: vectorSearchOptions + } + + const flattenDocs = docs && docs.length ? flatten(docs) : [] + const finalDocs = [] + for (let i = 0; i < flattenDocs.length; i += 1) { + finalDocs.push(new Document(flattenDocs[i])) + } + + const vectorStore = await ElasticVectorSearch.fromDocuments(finalDocs, embeddings, elasticSearchClientArgs) + + if (output === 'retriever') { + return vectorStore.asRetriever(k) + } else if (output === 'vectorStore') { + ;(vectorStore as any).k = k + return vectorStore + } + return vectorStore + } +} + +module.exports = { nodeClass: ElasicsearchUpsert_VectorStores } diff --git a/packages/components/nodes/vectorstores/Elasticsearch/elasticsearch.png b/packages/components/nodes/vectorstores/Elasticsearch/elasticsearch.png new file mode 100644 index 0000000000000000000000000000000000000000..fdb6686369bf0186319e24f2dd96ebee1ee6a54b GIT binary patch literal 3719 zcmV;24tVj2P)9SFK8O}Ki5ohKy5!%H#4**_Slt?-k#^|sCG zRFnSp`Qqv4ufx5-&&$~G?dI{<^~6K@-lObfoA}m;{Ot1i*yQ%Y)$Hx_-r(vsO_OeW zt6OWLqN&GbkcEnrx+!g5)85;{$J&;#sYH8jr`^}Q+K;8qV$0&G-t5W7-=OoP!uGt; z@QSkb%U|nan(cF><`Q|dy3u!ouwihefT5X3TA5abd7R2lwAO&)?$0N9@5%rG4G>90 zK~#90?VWpD6FC&dX(?^#g%%2h0)-+_xnD#D5fm>VcLdR0*9AfJ`+tRFMeDsaAgv4VqtFSlme_s8-fy zcAxxTNS7AY5{YCo8D(Vsk3=Fmv;62OR~Kt^QJE~vm4)e6f64G#n(?9|l!=wy#~I1x zm4rWD6S8;l@da`h5IrW4J5O5ul>;dtUtNiv#}miZXmGwdLKbI%IBZ0yDnSS0ca&g?RjzUS$?Rmhu2WP z=P&lUd6YyiAl%UmxRD=n;WZ+efAP+Fp&q^(;*3td^&tsggYj>Ar3RO=mC)!m8+rHm zbSl4LU-?AOBy1vTn~Fd3o_rP|G1XmtA+*M+u7$=w9wW3#xMyFhagau}jZ=TXo;Gu>!y4fNKHxXMIcRB!7|oJqLJel~4glDbCrMwc(bR;W6km9! zi6$XQ`Ff3}re5SAK8J+RVKghc0i(&cxP&0UzJJK|x~HL~1S;S+e;@5}8qKr;!X)Z? z9|Y(ynq}?NuqH&n_n<-QFq#8nl+%U!X2j3ycPBd9eo~d}&{fc>2PZ5Uai4=RXl!Vl>g{8WQ1#Xit=z zhR^Jab+@}2VWs?+?>AoLy(y{DXx&%iwSq)gr(FN`#)C3X_VKm1yLA!Pwf^l#J}=w` zrbeT`-R+Q;J^gp}gC<-s`+CTq8|2eOSkoRqFa zW{ZqQ@4*uyYhn)lON_=yyPFVV)TQN{Gr(1Qn5F));z0lH%}2mT47Iy$LbZ+H(I#J0W5x0DpNFG9A2QVLwh5I0ZwH)w z4fPL?4|oq*qehcZNfu|tF&CdrSsd6uzL8KGT>eHAD!Jl#K*C3d?C^nT$QmqZl#aE3 zDtZHwV17D0E3+RpHcVAce3$oSBUCbEofC6P)6rmF35E0C<(S)IJSnXqTRbEmH?n$4 zMiufzP{4);$mdP8OV!SL?}q`54bvd%Z|buxwsP6U;HjU(VE|*XBDsV9bBRorD4*o+ zHUj`2LR?fxcth@?c?+1$5}TP!cP4iPW*nUe322GjWL0Yr*(`Bd=&mV~y8{x2nuE5PgQ$y; z%MwS0Zl|&*zOMR7VUSuu{6ub~saeCd)-JN$oC>GjfTKR;wx0%QC9cJ5_$-m(0j-hh zJsH(PXa(_r+)BqHs+Vay)v|;@o`@e6Ll}!qkP2MGB8q)M_Ou}4^Fh1?uP9_1yCpY6 zUqrD@r>&d>&_a|a|8Z47XAqI!uS_Q8S%Oza=BQ_3>otdlkIC&&7g5|#MjXk8XoK)V zLU^F)Gj)xV+fDc4A^K(sR@rlHgOpiB%`ajx0>pV4bo0RKVz8&4terRJ@yh3{P16`@u=ohvIJFzZ=Z&(X~vm6KjH$6NKNO7q?+Th zL^d3tCGL44>(YTk=S@C~0Oh7LEixT8OS}rqO~;l> zmLP<#u4#Z}KHIR6<#``QvFX&MlqGEO8Q;=96cC)}{TPL&qnagb2|9{rz#y^>3!gmi z!zile6tjd4_)GA2cMnP!Y{SAo&wDFXqT*+Lnj z*7{Jm8SHPTY{LSa=Y1F%(>cK*>Ci;>**6;2Q(`=n>y<8+v9SSWam zODSqO7Z`xDZPKn|uPCyGOo%-1r7F2GYzNIp@&gO^@~H@DSSZL5#9s(*D%;GrBs~N@ z0cu!;7Y*X3dd()Y)H5UUn zEW#*Fs~goIdWNv3d(KhAG%Ui1JfW->M6X%c6#L#&$c9B=Io6DKN zIsZf5Sn=6OyjQJc7Y>N{k!7Xe=;bqcv*;VpL%ZJll%Wn`2Y zC6DPc3Ftvs)8>{nWZjg7iy&{xx@2;UGR3JxZ`6H zYnG5ab}-JXq-$atB_!Jy^KQbpriocLT9uOZ;kB4h%fvKFNNTpPB`G19m{^b|2^~h` zO<1S={TKh_Hg%Q#9cXSq9U^A;@SdSpQgVQwEk$WN`>G8mtZm zmG(nl^oYVWWEvvo!lty(L|U^!j0QDRG^N{4snJ{k0JZFZYbK;MoBuSdpq1Q=OdWxi z?5pVq)NM9oap>Fb9@KV7L#x@)isp6P(`iZ%8jok_9H&*sb1vm6zf?h6*9)buvPm&# zCtqJl_P8roy3g0>+J{QSh9aA4T`(5!i(8)~}h|4_3+Mr=PoWW;tGq75O= z#r6b5bX3O=Vzil%-V#dr{UlZ^h48LkIs!kn-U=!EO-hpI+ehW93S+vhN2sllROs0# z{@K>{Q#s^pOjX-^20FDp7vdN9e1BE2y<{;48ms=ln%I8Yo&+;Wes1>>=yMeSnNFo= zAJ6=Asa%7UQ>Gs|UPI-lv(=P^ndadvy2+78k7sLpWOlNeP7y`sWOYVv9CTA0p*R2l002ovPDHLkV1i_tAvOR2 literal 0 HcmV?d00001 diff --git a/packages/components/package.json b/packages/components/package.json index 936091062..3fdd8923a 100644 --- a/packages/components/package.json +++ b/packages/components/package.json @@ -19,6 +19,7 @@ "@aws-sdk/client-dynamodb": "^3.360.0", "@dqbd/tiktoken": "^1.0.7", "@getzep/zep-js": "^0.6.3", + "@elastic/elasticsearch": "^8.9.0", "@google-ai/generativelanguage": "^0.2.1", "@huggingface/inference": "^2.6.1", "@notionhq/client": "^2.2.8",