add Milvus

This commit is contained in:
Henry 2023-08-17 21:23:40 +01:00
parent bfa14fb290
commit b4c2550c77
9 changed files with 503 additions and 280 deletions

View File

@ -0,0 +1,31 @@
import { INodeParams, INodeCredential } from '../src/Interface'
class MilvusCredential implements INodeCredential {
label: string
name: string
version: number
description: string
inputs: INodeParams[]
constructor() {
this.label = 'Milvus Auth'
this.name = 'milvusAuth'
this.version = 1.0
this.description =
'You can find the Milvus Authentication from <a target="_blank" href="https://milvus.io/docs/authenticate.md#Authenticate-User-Access">here</a> page.'
this.inputs = [
{
label: 'Milvus User',
name: 'milvusUser',
type: 'string'
},
{
label: 'Milvus Password',
name: 'milvusPassword',
type: 'password'
}
]
}
}
module.exports = { credClass: MilvusCredential }

View File

@ -0,0 +1,185 @@
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { DataType, ErrorCode } from '@zilliz/milvus2-sdk-node'
import { MilvusLibArgs, Milvus } from 'langchain/vectorstores/milvus'
import { Embeddings } from 'langchain/embeddings/base'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { Document } from 'langchain/document'
class Milvus_Existing_VectorStores implements INode {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
baseClasses: string[]
inputs: INodeParams[]
credential: INodeParams
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Milvus Load Existing collection'
this.name = 'milvusExistingCollection'
this.version = 1.0
this.type = 'Milvus'
this.icon = 'milvus.svg'
this.category = 'Vector Stores'
this.description = 'Load existing collection from Milvus (i.e: Document has been upserted)'
this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever']
this.credential = {
label: 'Connect Credential',
name: 'credential',
type: 'credential',
optional: true,
credentialNames: ['milvusAuth']
}
this.inputs = [
{
label: 'Embeddings',
name: 'embeddings',
type: 'Embeddings'
},
{
label: 'Milvus Server URL',
name: 'milvusServerUrl',
type: 'string',
placeholder: 'http://localhost:19530'
},
{
label: 'Milvus Collection Name',
name: 'milvusCollection',
type: 'string'
}
]
this.outputs = [
{
label: 'Milvus Retriever',
name: 'retriever',
baseClasses: this.baseClasses
},
{
label: 'Milvus Vector Store',
name: 'vectorStore',
baseClasses: [this.type, ...getBaseClasses(Milvus)]
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
// server setup
const address = nodeData.inputs?.milvusServerUrl as string
const collectionName = nodeData.inputs?.milvusCollection as string
// embeddings
const embeddings = nodeData.inputs?.embeddings as Embeddings
const topK = nodeData.inputs?.topK as string
// output
const output = nodeData.outputs?.output as string
// format data
const k = topK ? parseInt(topK, 10) : 4
// credential
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const milvusUser = getCredentialParam('milvusUser', credentialData, nodeData)
const milvusPassword = getCredentialParam('milvusPassword', credentialData, nodeData)
// init MilvusLibArgs
const milVusArgs: MilvusLibArgs = {
url: address,
collectionName: collectionName
}
if (milvusUser) milVusArgs.username = milvusUser
if (milvusPassword) milVusArgs.password = milvusPassword
const vectorStore = await Milvus.fromExistingCollection(embeddings, milVusArgs)
// Avoid Illegal Invocation
vectorStore.similaritySearchVectorWithScore = async (query: number[], k: number, filter?: string) => {
const hasColResp = await vectorStore.client.hasCollection({
collection_name: vectorStore.collectionName
})
if (hasColResp.status.error_code !== ErrorCode.SUCCESS) {
throw new Error(`Error checking collection: ${hasColResp}`)
}
if (hasColResp.value === false) {
throw new Error(`Collection not found: ${vectorStore.collectionName}, please create collection before search.`)
}
const filterStr = filter ?? ''
await vectorStore.grabCollectionFields()
const loadResp = await vectorStore.client.loadCollectionSync({
collection_name: vectorStore.collectionName
})
if (loadResp.error_code !== ErrorCode.SUCCESS) {
throw new Error(`Error loading collection: ${loadResp}`)
}
const outputFields = vectorStore.fields.filter((field) => field !== vectorStore.vectorField)
const searchResp = await vectorStore.client.search({
collection_name: vectorStore.collectionName,
search_params: {
anns_field: vectorStore.vectorField,
topk: k.toString(),
metric_type: vectorStore.indexCreateParams.metric_type,
params: vectorStore.indexSearchParams
},
output_fields: outputFields,
vector_type: DataType.FloatVector,
vectors: [query],
filter: filterStr
})
if (searchResp.status.error_code !== ErrorCode.SUCCESS) {
throw new Error(`Error searching data: ${JSON.stringify(searchResp)}`)
}
const results: [Document, number][] = []
searchResp.results.forEach((result) => {
const fields = {
pageContent: '',
metadata: {} as Record<string, any>
}
Object.keys(result).forEach((key) => {
if (key === vectorStore.textField) {
fields.pageContent = result[key]
} else if (vectorStore.fields.includes(key) || key === vectorStore.primaryField) {
if (typeof result[key] === 'string') {
const { isJson, obj } = checkJsonString(result[key])
fields.metadata[key] = isJson ? obj : result[key]
} else {
fields.metadata[key] = result[key]
}
}
})
results.push([new Document(fields), result.score])
})
return results
}
if (output === 'retriever') {
const retriever = vectorStore.asRetriever(k)
return retriever
} else if (output === 'vectorStore') {
;(vectorStore as any).k = k
return vectorStore
}
return vectorStore
}
}
function checkJsonString(value: string): { isJson: boolean; obj: any } {
try {
const result = JSON.parse(value)
return { isJson: true, obj: result }
} catch (e) {
return { isJson: false, obj: null }
}
}
module.exports = { nodeClass: Milvus_Existing_VectorStores }

View File

@ -0,0 +1,281 @@
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { DataType, ErrorCode, MetricType, IndexType } from '@zilliz/milvus2-sdk-node'
import { MilvusLibArgs, Milvus } from 'langchain/vectorstores/milvus'
import { Embeddings } from 'langchain/embeddings/base'
import { Document } from 'langchain/document'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { flatten } from 'lodash'
interface InsertRow {
[x: string]: string | number[]
}
class Milvus_Upsert_VectorStores implements INode {
label: string
name: string
version: number
description: string
type: string
icon: string
category: string
baseClasses: string[]
inputs: INodeParams[]
credential: INodeParams
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Milvus Upsert Document'
this.name = 'milvusUpsert'
this.version = 1.0
this.type = 'Milvus'
this.icon = 'milvus.svg'
this.category = 'Vector Stores'
this.description = 'Upsert documents to Milvus'
this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever']
this.credential = {
label: 'Connect Credential',
name: 'credential',
type: 'credential',
optional: true,
credentialNames: ['milvusAuth']
}
this.inputs = [
{
label: 'Document',
name: 'document',
type: 'Document',
list: true
},
{
label: 'Embeddings',
name: 'embeddings',
type: 'Embeddings'
},
{
label: 'Milvus Server URL',
name: 'milvusServerUrl',
type: 'string',
placeholder: 'http://localhost:19530'
},
{
label: 'Milvus Collection Name',
name: 'milvusCollection',
type: 'string'
}
]
this.outputs = [
{
label: 'Milvus Retriever',
name: 'retriever',
baseClasses: this.baseClasses
},
{
label: 'Milvus Vector Store',
name: 'vectorStore',
baseClasses: [this.type, ...getBaseClasses(Milvus)]
}
]
}
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
// server setup
const address = nodeData.inputs?.milvusServerUrl as string
const collectionName = nodeData.inputs?.milvusCollection as string
// embeddings
const docs = nodeData.inputs?.document as Document[]
const embeddings = nodeData.inputs?.embeddings as Embeddings
const topK = nodeData.inputs?.topK as string
// output
const output = nodeData.outputs?.output as string
// format data
const k = topK ? parseInt(topK, 10) : 4
// credential
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const milvusUser = getCredentialParam('milvusUser', credentialData, nodeData)
const milvusPassword = getCredentialParam('milvusPassword', credentialData, nodeData)
// init MilvusLibArgs
const milVusArgs: MilvusLibArgs = {
url: address,
collectionName: collectionName
}
if (milvusUser) milVusArgs.username = milvusUser
if (milvusPassword) milVusArgs.password = milvusPassword
const flattenDocs = docs && docs.length ? flatten(docs) : []
const finalDocs = []
for (let i = 0; i < flattenDocs.length; i += 1) {
finalDocs.push(new Document(flattenDocs[i]))
}
const vectorStore = await MilvusUpsert.fromDocuments(finalDocs, embeddings, milVusArgs)
// Avoid Illegal Invocation
vectorStore.similaritySearchVectorWithScore = async (query: number[], k: number, filter?: string) => {
const hasColResp = await vectorStore.client.hasCollection({
collection_name: vectorStore.collectionName
})
if (hasColResp.status.error_code !== ErrorCode.SUCCESS) {
throw new Error(`Error checking collection: ${hasColResp}`)
}
if (hasColResp.value === false) {
throw new Error(`Collection not found: ${vectorStore.collectionName}, please create collection before search.`)
}
const filterStr = filter ?? ''
await vectorStore.grabCollectionFields()
const loadResp = await vectorStore.client.loadCollectionSync({
collection_name: vectorStore.collectionName
})
if (loadResp.error_code !== ErrorCode.SUCCESS) {
throw new Error(`Error loading collection: ${loadResp}`)
}
const outputFields = vectorStore.fields.filter((field) => field !== vectorStore.vectorField)
const searchResp = await vectorStore.client.search({
collection_name: vectorStore.collectionName,
search_params: {
anns_field: vectorStore.vectorField,
topk: k.toString(),
metric_type: vectorStore.indexCreateParams.metric_type,
params: vectorStore.indexSearchParams
},
output_fields: outputFields,
vector_type: DataType.FloatVector,
vectors: [query],
filter: filterStr
})
if (searchResp.status.error_code !== ErrorCode.SUCCESS) {
throw new Error(`Error searching data: ${JSON.stringify(searchResp)}`)
}
const results: [Document, number][] = []
searchResp.results.forEach((result) => {
const fields = {
pageContent: '',
metadata: {} as Record<string, any>
}
Object.keys(result).forEach((key) => {
if (key === vectorStore.textField) {
fields.pageContent = result[key]
} else if (vectorStore.fields.includes(key) || key === vectorStore.primaryField) {
if (typeof result[key] === 'string') {
const { isJson, obj } = checkJsonString(result[key])
fields.metadata[key] = isJson ? obj : result[key]
} else {
fields.metadata[key] = result[key]
}
}
})
results.push([new Document(fields), result.score])
})
return results
}
if (output === 'retriever') {
const retriever = vectorStore.asRetriever(k)
return retriever
} else if (output === 'vectorStore') {
;(vectorStore as any).k = k
return vectorStore
}
return vectorStore
}
}
function checkJsonString(value: string): { isJson: boolean; obj: any } {
try {
const result = JSON.parse(value)
return { isJson: true, obj: result }
} catch (e) {
return { isJson: false, obj: null }
}
}
class MilvusUpsert extends Milvus {
async addVectors(vectors: number[][], documents: Document[]): Promise<void> {
if (vectors.length === 0) {
return
}
await this.ensureCollection(vectors, documents)
const insertDatas: InsertRow[] = []
for (let index = 0; index < vectors.length; index++) {
const vec = vectors[index]
const doc = documents[index]
const data: InsertRow = {
[this.textField]: doc.pageContent,
[this.vectorField]: vec
}
this.fields.forEach((field) => {
switch (field) {
case this.primaryField:
if (!this.autoId) {
if (doc.metadata[this.primaryField] === undefined) {
throw new Error(
`The Collection's primaryField is configured with autoId=false, thus its value must be provided through metadata.`
)
}
data[field] = doc.metadata[this.primaryField]
}
break
case this.textField:
data[field] = doc.pageContent
break
case this.vectorField:
data[field] = vec
break
default: // metadata fields
if (doc.metadata[field] === undefined) {
throw new Error(`The field "${field}" is not provided in documents[${index}].metadata.`)
} else if (typeof doc.metadata[field] === 'object') {
data[field] = JSON.stringify(doc.metadata[field])
} else {
data[field] = doc.metadata[field]
}
break
}
})
insertDatas.push(data)
}
const descIndexResp = await this.client.describeIndex({
collection_name: this.collectionName
})
if (descIndexResp.status.error_code === ErrorCode.INDEX_NOT_EXIST) {
const resp = await this.client.createIndex({
collection_name: this.collectionName,
field_name: this.vectorField,
index_name: `myindex_${Date.now().toString()}`,
index_type: IndexType.AUTOINDEX,
metric_type: MetricType.L2
})
if (resp.error_code !== ErrorCode.SUCCESS) {
throw new Error(`Error creating index`)
}
}
const insertResp = await this.client.insert({
collection_name: this.collectionName,
fields_data: insertDatas
})
if (insertResp.status.error_code !== ErrorCode.SUCCESS) {
throw new Error(`Error inserting data: ${JSON.stringify(insertResp)}`)
}
await this.client.flushSync({ collection_names: [this.collectionName] })
}
}
module.exports = { nodeClass: Milvus_Upsert_VectorStores }

View File

@ -0,0 +1,5 @@
<svg width="362" height="246" viewBox="0 0 362 246" fill="none" xmlns="http://www.w3.org/2000/svg">
<path d="M255.407 36.2761C207.644 -11.661 130.191 -11.661 82.427 36.2761L4.35891 114.626C-0.255949 119.262 -0.255949 126.728 4.35891 131.364L82.427 209.714C130.191 257.651 207.644 257.651 255.407 209.793C303.248 161.934 303.248 84.2132 255.407 36.2761ZM236.948 186.767C201.875 221.973 144.958 221.973 109.885 186.767L52.4304 129.164C49.0461 125.785 49.0461 120.284 52.4304 116.826L109.808 59.3016C144.881 24.0953 201.798 24.0953 236.871 59.3016C272.021 94.5078 272.021 151.561 236.948 186.767Z" fill="#00B3FF"/>
<path d="M357.699 114.704L323.318 79.5765C321.241 77.4547 317.78 79.4193 318.472 82.327C324.395 109.125 324.395 137.101 318.472 163.899C317.857 166.806 321.318 168.692 323.318 166.649L357.699 131.521C362.237 126.806 362.237 119.341 357.699 114.704Z" fill="#00B3FF"/>
<path d="M173.799 184.646C207.059 184.646 234.023 157.097 234.023 123.113C234.023 89.13 207.059 61.5811 173.799 61.5811C140.538 61.5811 113.575 89.13 113.575 123.113C113.575 157.097 140.538 184.646 173.799 184.646Z" fill="#00B3FF"/>
</svg>

After

Width:  |  Height:  |  Size: 1.1 KiB

View File

@ -1,132 +0,0 @@
import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { Milvus, MilvusLibArgs } from 'langchain/vectorstores/milvus'
import { Embeddings } from 'langchain/embeddings/base'
import { getBaseClasses } from '../../../src/utils'
class Milvus_Existing_VectorStores implements INode {
label: string
name: string
description: string
type: string
icon: string
category: string
baseClasses: string[]
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Milvus Load Existing Index'
this.name = 'milvusExistingIndex'
this.type = 'Milvus'
this.icon = 'milvus.jpg'
this.category = 'Vector Stores'
this.description = 'Load existing index from Milvus (i.e: Document has been upserted)'
this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever']
this.inputs = [
{
label: 'Embeddings',
name: 'embeddings',
type: 'Embeddings'
},
{
label: 'Collection Name',
name: 'milvusCollectionName',
type: 'string',
placeholder: 'my-milvus-collection'
},
{
label: 'Milvus URL',
name: 'milvusURL',
type: 'string',
placeholder: 'http://localhost:19530'
},
{
label: 'Primary Field',
name: 'milvusPrimaryField',
type: 'string',
optional: true,
additionalParams: true
},
{
label: 'Vector Field',
name: 'milvusVectorField',
type: 'string',
optional: true,
additionalParams: true
},
{
label: 'Vector Text Field',
name: 'milvusTextField',
type: 'string',
optional: true,
additionalParams: true
},
{
label: 'SSL',
name: 'milvusSSL',
type: 'boolean',
optional: true,
additionalParams: true
},
{
label: 'Username',
name: 'milvusUsername',
type: 'string',
optional: true,
additionalParams: true
},
{
label: 'Password',
name: 'milvusPassword',
type: 'password',
optional: true,
additionalParams: true
}
]
this.outputs = [
{
label: 'Milvus Retriever',
name: 'retriever',
baseClasses: this.baseClasses
},
{
label: 'Milvus Vector Store',
name: 'vectorStore',
baseClasses: [this.type, ...getBaseClasses(Milvus)]
}
]
}
async init(nodeData: INodeData): Promise<any> {
const collectionName = nodeData.inputs?.milvusCollectionName as string
const embeddings = nodeData.inputs?.embeddings as Embeddings
const milvusURL = nodeData.inputs?.milvusURL as string
const milvusPrimaryField = nodeData.inputs?.milvusPrimaryField as string
const milvusVectorField = nodeData.inputs?.milvusVectorField as string
const milvusTextField = nodeData.inputs?.milvusTextField as string
const milvusSSL = nodeData.inputs?.milvusSSL as boolean
const milvusUsername = nodeData.inputs?.milvusUsername as string
const milvusPassword = nodeData.inputs?.milvusPassword as string
const output = nodeData.outputs?.output as string
const obj: MilvusLibArgs = { collectionName, url: milvusURL }
if (milvusPrimaryField) obj.primaryField = milvusPrimaryField
if (milvusVectorField) obj.vectorField = milvusVectorField
if (milvusTextField) obj.textField = milvusTextField
if (milvusSSL) obj.ssl = milvusSSL
if (milvusUsername) obj.username = milvusUsername
if (milvusPassword) obj.password = milvusPassword
const vectorStore = await Milvus.fromExistingCollection(embeddings, obj)
if (output === 'retriever') {
const retriever = vectorStore.asRetriever()
return retriever
} else if (output === 'vectorStore') {
return vectorStore
}
return vectorStore
}
}
module.exports = { nodeClass: Milvus_Existing_VectorStores }

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.1 KiB

View File

@ -1,148 +0,0 @@
import { INode, INodeData, INodeOutputsValue, INodeParams } from '../../../src/Interface'
import { Milvus, MilvusLibArgs } from 'langchain/vectorstores/milvus'
import { Embeddings } from 'langchain/embeddings/base'
import { getBaseClasses } from '../../../src/utils'
import { Document } from 'langchain/document'
class Milvus_Upsert_VectorStores implements INode {
label: string
name: string
description: string
type: string
icon: string
category: string
baseClasses: string[]
inputs: INodeParams[]
outputs: INodeOutputsValue[]
constructor() {
this.label = 'Milvus Upsert Document'
this.name = 'milvusUpsert'
this.type = 'Milvus'
this.icon = 'milvus.jpg'
this.category = 'Vector Stores'
this.description = 'Upsert documents to Milvus'
this.baseClasses = [this.type, 'VectorStoreRetriever', 'BaseRetriever']
this.inputs = [
{
label: 'Document',
name: 'document',
type: 'Document',
list: true
},
{
label: 'Embeddings',
name: 'embeddings',
type: 'Embeddings'
},
{
label: 'Collection Name',
name: 'milvusCollectionName',
type: 'string',
placeholder: 'my_milvus_collection'
},
{
label: 'Milvus URL',
name: 'milvusURL',
type: 'string',
placeholder: 'http://localhost:19530'
},
{
label: 'Primary Field',
name: 'milvusPrimaryField',
type: 'string',
optional: true,
additionalParams: true
},
{
label: 'Vector Field',
name: 'milvusVectorField',
type: 'string',
optional: true,
additionalParams: true
},
{
label: 'Vector Text Field',
name: 'milvusTextField',
type: 'string',
optional: true,
additionalParams: true
},
{
label: 'SSL',
name: 'milvusSSL',
type: 'boolean',
optional: true,
additionalParams: true
},
{
label: 'Username',
name: 'milvusUsername',
type: 'string',
placeholder: 'db_admin',
optional: true,
additionalParams: true
},
{
label: 'Password',
name: 'milvusPassword',
type: 'password',
optional: true,
additionalParams: true
}
]
this.outputs = [
{
label: 'Milvus Retriever',
name: 'retriever',
baseClasses: this.baseClasses
},
{
label: 'Milvus Vector Store',
name: 'vectorStore',
baseClasses: [this.type, ...getBaseClasses(Milvus)]
}
]
}
async init(nodeData: INodeData): Promise<any> {
const collectionName = nodeData.inputs?.milvusCollectionName as string
const embeddings = nodeData.inputs?.embeddings as Embeddings
const milvusURL = nodeData.inputs?.milvusURL as string
const milvusPrimaryField = nodeData.inputs?.milvusPrimaryField as string
const milvusVectorField = nodeData.inputs?.milvusVectorField as string
const milvusTextField = nodeData.inputs?.milvusTextField as string
const milvusSSL = nodeData.inputs?.milvusSSL as boolean
const milvusUsername = nodeData.inputs?.milvusUsername as string
const milvusPassword = nodeData.inputs?.milvusPassword as string
const output = nodeData.outputs?.output as string
const docs = nodeData.inputs?.document as Document[]
const flattenDocs = docs && docs.length ? docs.flat() : []
const finalDocs = []
for (let i = 0; i < flattenDocs.length; i += 1) {
finalDocs.push(new Document(flattenDocs[i]))
}
const obj: MilvusLibArgs = { collectionName, url: milvusURL }
if (milvusPrimaryField) obj.primaryField = milvusPrimaryField
if (milvusVectorField) obj.vectorField = milvusVectorField
if (milvusTextField) obj.textField = milvusTextField
if (milvusSSL) obj.ssl = milvusSSL
if (milvusUsername) obj.username = milvusUsername
if (milvusPassword) obj.password = milvusPassword
const vectorStore = await Milvus.fromDocuments(finalDocs, embeddings, obj)
console.log('vectorStore = ', vectorStore)
if (output === 'retriever') {
const retriever = vectorStore.asRetriever()
return retriever
} else if (output === 'vectorStore') {
return vectorStore
}
return vectorStore
}
}
module.exports = { nodeClass: Milvus_Upsert_VectorStores }

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.1 KiB

View File

@ -28,6 +28,7 @@
"@types/js-yaml": "^4.0.5",
"apify-client": "^2.7.1",
"@types/jsdom": "^21.1.1",
"@zilliz/milvus2-sdk-node": "^2.2.24",
"axios": "^0.27.2",
"cheerio": "^1.0.0-rc.12",
"chromadb": "^1.5.3",