Compare commits

...

3 Commits

Author SHA1 Message Date
Henry f3f8b71073 refactor Chroma configuration handling 2025-11-30 16:44:58 +00:00
Henry fb4c800d83 refactor Chroma configuration handling 2025-11-30 15:11:20 +00:00
Henry 079b212a5f update chroma 2025-11-30 13:11:41 +00:00
4 changed files with 435 additions and 111 deletions

View File

@ -1,10 +1,9 @@
import { flatten } from 'lodash' import { flatten } from 'lodash'
import { Chroma } from '@langchain/community/vectorstores/chroma'
import { Embeddings } from '@langchain/core/embeddings' import { Embeddings } from '@langchain/core/embeddings'
import { Document } from '@langchain/core/documents' import { Document } from '@langchain/core/documents'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface' import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam, parseJsonBody } from '../../../src/utils' import { getBaseClasses, getCredentialData, getCredentialParam, parseJsonBody } from '../../../src/utils'
import { ChromaExtended } from './core' import { Chroma } from './core'
import { index } from '../../../src/indexing' import { index } from '../../../src/indexing'
class Chroma_VectorStores implements INode { class Chroma_VectorStores implements INode {
@ -123,21 +122,11 @@ class Chroma_VectorStores implements INode {
} }
} }
const obj: { const obj = _buildChromaConfig(collectionName, chromaURL, chromaApiKey, chromaTenant, chromaDatabase)
collectionName: string
url?: string
chromaApiKey?: string
chromaTenant?: string
chromaDatabase?: string
} = { collectionName }
if (chromaURL) obj.url = chromaURL
if (chromaApiKey) obj.chromaApiKey = chromaApiKey
if (chromaTenant) obj.chromaTenant = chromaTenant
if (chromaDatabase) obj.chromaDatabase = chromaDatabase
try { try {
if (recordManager) { if (recordManager) {
const vectorStore = await ChromaExtended.fromExistingCollection(embeddings, obj) const vectorStore = await Chroma.fromExistingCollection(embeddings, obj)
await recordManager.createSchema() await recordManager.createSchema()
const res = await index({ const res = await index({
docsSource: finalDocs, docsSource: finalDocs,
@ -151,7 +140,7 @@ class Chroma_VectorStores implements INode {
}) })
return res return res
} else { } else {
await ChromaExtended.fromDocuments(finalDocs, embeddings, obj) await Chroma.fromDocuments(finalDocs, embeddings, obj)
return { numAdded: finalDocs.length, addedDocs: finalDocs } return { numAdded: finalDocs.length, addedDocs: finalDocs }
} }
} catch (e) { } catch (e) {
@ -169,17 +158,7 @@ class Chroma_VectorStores implements INode {
const chromaTenant = getCredentialParam('chromaTenant', credentialData, nodeData) const chromaTenant = getCredentialParam('chromaTenant', credentialData, nodeData)
const chromaDatabase = getCredentialParam('chromaDatabase', credentialData, nodeData) const chromaDatabase = getCredentialParam('chromaDatabase', credentialData, nodeData)
const obj: { const obj = _buildChromaConfig(collectionName, chromaURL, chromaApiKey, chromaTenant, chromaDatabase)
collectionName: string
url?: string
chromaApiKey?: string
chromaTenant?: string
chromaDatabase?: string
} = { collectionName }
if (chromaURL) obj.url = chromaURL
if (chromaApiKey) obj.chromaApiKey = chromaApiKey
if (chromaTenant) obj.chromaTenant = chromaTenant
if (chromaDatabase) obj.chromaDatabase = chromaDatabase
try { try {
if (recordManager) { if (recordManager) {
@ -192,12 +171,12 @@ class Chroma_VectorStores implements INode {
} }
const keys: string[] = await recordManager.listKeys(filterKeys) const keys: string[] = await recordManager.listKeys(filterKeys)
const chromaStore = new ChromaExtended(embeddings, obj) const chromaStore = new Chroma(embeddings, obj)
await chromaStore.delete({ ids: keys }) await chromaStore.delete({ ids: keys })
await recordManager.deleteKeys(keys) await recordManager.deleteKeys(keys)
} else { } else {
const chromaStore = new ChromaExtended(embeddings, obj) const chromaStore = new Chroma(embeddings, obj)
await chromaStore.delete({ ids }) await chromaStore.delete({ ids })
} }
} catch (e) { } catch (e) {
@ -220,24 +199,14 @@ class Chroma_VectorStores implements INode {
const chromaDatabase = getCredentialParam('chromaDatabase', credentialData, nodeData) const chromaDatabase = getCredentialParam('chromaDatabase', credentialData, nodeData)
const chromaMetadataFilter = nodeData.inputs?.chromaMetadataFilter const chromaMetadataFilter = nodeData.inputs?.chromaMetadataFilter
const obj: { const obj: ICommonObject = _buildChromaConfig(collectionName, chromaURL, chromaApiKey, chromaTenant, chromaDatabase)
collectionName: string
url?: string
chromaApiKey?: string
chromaTenant?: string
chromaDatabase?: string
filter?: object | undefined
} = { collectionName }
if (chromaURL) obj.url = chromaURL
if (chromaApiKey) obj.chromaApiKey = chromaApiKey
if (chromaTenant) obj.chromaTenant = chromaTenant
if (chromaDatabase) obj.chromaDatabase = chromaDatabase
if (chromaMetadataFilter) { if (chromaMetadataFilter) {
const metadatafilter = typeof chromaMetadataFilter === 'object' ? chromaMetadataFilter : parseJsonBody(chromaMetadataFilter) const metadatafilter = typeof chromaMetadataFilter === 'object' ? chromaMetadataFilter : parseJsonBody(chromaMetadataFilter)
obj.filter = metadatafilter obj.filter = metadatafilter
} }
const vectorStore = await ChromaExtended.fromExistingCollection(embeddings, obj) const vectorStore = await Chroma.fromExistingCollection(embeddings, obj)
if (output === 'retriever') { if (output === 'retriever') {
const retriever = vectorStore.asRetriever(k) const retriever = vectorStore.asRetriever(k)
@ -253,4 +222,41 @@ class Chroma_VectorStores implements INode {
} }
} }
const _buildChromaConfig = (
collectionName: string,
chromaURL: string | undefined,
chromaApiKey: string | undefined,
chromaTenant: string | undefined,
chromaDatabase: string | undefined
): ICommonObject => {
const obj: {
collectionName: string
url?: string
chromaCloudAPIKey?: string
clientParams?: {
host?: string
port?: number
ssl?: boolean
tenant?: string
database?: string
}
} = { collectionName }
if (chromaURL) obj.url = chromaURL
if (chromaApiKey) obj.chromaCloudAPIKey = chromaApiKey
if (chromaTenant || chromaDatabase) {
obj.clientParams = {}
if (chromaTenant) obj.clientParams.tenant = chromaTenant
if (chromaDatabase) obj.clientParams.database = chromaDatabase
if (chromaApiKey) {
obj.clientParams.host = 'api.trychroma.com'
obj.clientParams.port = 8000
obj.clientParams.ssl = true
}
}
return obj
}
module.exports = { nodeClass: Chroma_VectorStores } module.exports = { nodeClass: Chroma_VectorStores }

View File

@ -1,58 +1,345 @@
import { Chroma, ChromaLibArgs } from '@langchain/community/vectorstores/chroma' import * as uuid from 'uuid'
import { Embeddings } from '@langchain/core/embeddings' import type {
import type { Collection } from 'chromadb' ChromaClient as ChromaClientT,
import { ChromaClient } from 'chromadb' ChromaClientArgs,
Collection,
CollectionConfiguration,
CollectionMetadata,
Where
} from 'chromadb'
interface ChromaAuth { import type { EmbeddingsInterface } from '@langchain/core/embeddings'
chromaApiKey?: string import { VectorStore } from '@langchain/core/vectorstores'
import { Document } from '@langchain/core/documents'
type SharedChromaLibArgs = {
numDimensions?: number
collectionName?: string
filter?: object
collectionMetadata?: CollectionMetadata
collectionConfiguration?: CollectionConfiguration
chromaCloudAPIKey?: string
clientParams?: Omit<ChromaClientArgs, 'path'>
} }
export class ChromaExtended extends Chroma { export type ChromaLibArgs =
chromaApiKey?: string | ({
chromaTenant?: string url?: string
chromaDatabase?: string } & SharedChromaLibArgs)
| ({
index?: ChromaClientT
} & SharedChromaLibArgs)
constructor(embeddings: Embeddings, args: ChromaLibArgs & Partial<ChromaAuth>) { export interface ChromaDeleteParams<T> {
ids?: string[]
filter?: T
}
export class Chroma extends VectorStore {
declare FilterType: Where
index?: ChromaClientT
collection?: Collection
collectionName: string
collectionMetadata?: CollectionMetadata
numDimensions?: number
clientParams?: Omit<ChromaClientArgs, 'path'>
url: string
filter?: object
_vectorstoreType(): string {
return 'chroma'
}
constructor(embeddings: EmbeddingsInterface, args: ChromaLibArgs) {
super(embeddings, args) super(embeddings, args)
this.chromaApiKey = args.chromaApiKey this.numDimensions = args.numDimensions
this.embeddings = embeddings
this.collectionName = ensureCollectionName(args.collectionName)
this.collectionMetadata = args.collectionMetadata
this.clientParams = args.clientParams || {}
if ('index' in args) {
this.index = args.index
} else if ('url' in args) {
this.url = args.url || 'http://localhost:8000'
} }
static async fromExistingCollection(embeddings: Embeddings, dbConfig: ChromaLibArgs & Partial<ChromaAuth>): Promise<Chroma> { if (args.chromaCloudAPIKey) {
const instance = new this(embeddings, dbConfig) this.clientParams.headers = {
await instance.ensureCollection() ...(this.clientParams?.headers || {}),
return instance 'x-chroma-token': args.chromaCloudAPIKey
}
} }
this.filter = args.filter
}
/**
* Adds documents to the Chroma database. The documents are first
* converted to vectors using the `embeddings` instance, and then added to
* the database.
* @param documents An array of `Document` instances to be added to the database.
* @param options Optional. An object containing an array of `ids` for the documents.
* @returns A promise that resolves when the documents have been added to the database.
*/
async addDocuments(documents: Document[], options?: { ids?: string[] }) {
const texts = documents.map(({ pageContent }) => pageContent)
return this.addVectors(await this.embeddings.embedDocuments(texts), documents, options)
}
/**
* Ensures that a collection exists in the Chroma database. If the
* collection does not exist, it is created.
* @returns A promise that resolves with the `Collection` instance.
*/
async ensureCollection(): Promise<Collection> { async ensureCollection(): Promise<Collection> {
if (!this.collection) { if (!this.collection) {
if (!this.index) { if (!this.index) {
const obj: any = { this.index = new (await Chroma.imports()).ChromaClient({
path: this.url path: this.url,
} ...(this.clientParams ?? {})
if (this.chromaApiKey) { })
obj.fetchOptions = {
headers: {
'x-chroma-token': this.chromaApiKey
}
}
}
if (this.chromaTenant) {
obj.tenant = this.chromaTenant
}
if (this.chromaDatabase) {
obj.database = this.chromaDatabase
}
this.index = new ChromaClient(obj)
} }
try { try {
this.collection = await this.index!.getOrCreateCollection({ this.collection = await this.index.getOrCreateCollection({
name: this.collectionName, name: this.collectionName,
embeddingFunction: null,
...(this.collectionMetadata && { metadata: this.collectionMetadata }) ...(this.collectionMetadata && { metadata: this.collectionMetadata })
}) })
} catch (err) { } catch (err) {
throw new Error(`Chroma getOrCreateCollection error: ${err}`) throw new Error(`Chroma getOrCreateCollection error: ${err}`)
} }
} }
return this.collection return this.collection
} }
/**
* Adds vectors to the Chroma database. The vectors are associated with
* the provided documents.
* @param vectors An array of vectors to be added to the database.
* @param documents An array of `Document` instances associated with the vectors.
* @param options Optional. An object containing an array of `ids` for the vectors.
* @returns A promise that resolves with an array of document IDs when the vectors have been added to the database.
*/
async addVectors(vectors: number[][], documents: Document[], options?: { ids?: string[] }) {
if (vectors.length === 0) {
return []
}
if (this.numDimensions === undefined) {
this.numDimensions = vectors[0].length
}
if (vectors.length !== documents.length) {
throw new Error(`Vectors and metadatas must have the same length`)
}
if (vectors[0].length !== this.numDimensions) {
throw new Error(`Vectors must have the same length as the number of dimensions (${this.numDimensions})`)
}
const documentIds = options?.ids ?? Array.from({ length: vectors.length }, () => uuid.v1())
const collection = await this.ensureCollection()
const mappedMetadatas = documents.map(({ metadata }) => {
let locFrom
let locTo
if (metadata?.loc) {
if (metadata.loc.lines?.from !== undefined) locFrom = metadata.loc.lines.from
if (metadata.loc.lines?.to !== undefined) locTo = metadata.loc.lines.to
}
const newMetadata: Document['metadata'] = {
...metadata,
...(locFrom !== undefined && { locFrom }),
...(locTo !== undefined && { locTo })
}
if (newMetadata.loc) delete newMetadata.loc
return newMetadata
})
await collection.upsert({
ids: documentIds,
embeddings: vectors,
metadatas: mappedMetadatas,
documents: documents.map(({ pageContent }) => pageContent)
})
return documentIds
}
/**
* Deletes documents from the Chroma database. The documents to be deleted
* can be specified by providing an array of `ids` or a `filter` object.
* @param params An object containing either an array of `ids` of the documents to be deleted or a `filter` object to specify the documents to be deleted.
* @returns A promise that resolves when the specified documents have been deleted from the database.
*/
async delete(params: ChromaDeleteParams<this['FilterType']>): Promise<void> {
const collection = await this.ensureCollection()
if (Array.isArray(params.ids)) {
await collection.delete({ ids: params.ids })
} else if (params.filter) {
await collection.delete({
where: { ...params.filter }
})
} else {
throw new Error(`You must provide one of "ids or "filter".`)
}
}
/**
* Searches for vectors in the Chroma database that are similar to the
* provided query vector. The search can be filtered using the provided
* `filter` object or the `filter` property of the `Chroma` instance.
* @param query The query vector.
* @param k The number of similar vectors to return.
* @param filter Optional. A `filter` object to filter the search results.
* @returns A promise that resolves with an array of tuples, each containing a `Document` instance and a similarity score.
*/
async similaritySearchVectorWithScore(query: number[], k: number, filter?: this['FilterType']) {
if (filter && this.filter) {
throw new Error('cannot provide both `filter` and `this.filter`')
}
const _filter = filter ?? this.filter
const where = _filter === undefined ? undefined : { ..._filter }
const collection = await this.ensureCollection()
// similaritySearchVectorWithScore supports one query vector at a time
// chroma supports multiple query vectors at a time
const result = await collection.query({
queryEmbeddings: [query],
nResults: k,
where
})
const { ids, distances, documents, metadatas } = result
if (!ids || !distances || !documents || !metadatas) {
return []
}
// get the result data from the first and only query vector
const [firstIds] = ids
const [firstDistances] = distances
const [firstDocuments] = documents
const [firstMetadatas] = metadatas
if (firstDistances.some((item) => item === null)) {
return []
}
const cleanDistances = firstDistances.filter((item) => item !== null)
const results: [Document, number][] = []
for (let i = 0; i < firstIds.length; i += 1) {
let metadata: Document['metadata'] = firstMetadatas?.[i] ?? {}
if (metadata.locFrom && metadata.locTo) {
metadata = {
...metadata,
loc: {
lines: {
from: metadata.locFrom,
to: metadata.locTo
}
}
}
delete metadata.locFrom
delete metadata.locTo
}
results.push([
new Document({
pageContent: firstDocuments?.[i] ?? '',
metadata,
id: firstIds[i]
}),
cleanDistances[i]
])
}
return results
}
/**
* Creates a new `Chroma` instance from an array of text strings. The text
* strings are converted to `Document` instances and added to the Chroma
* database.
* @param texts An array of text strings.
* @param metadatas An array of metadata objects or a single metadata object. If an array is provided, it must have the same length as the `texts` array.
* @param embeddings An `Embeddings` instance used to generate embeddings for the documents.
* @param dbConfig A `ChromaLibArgs` object containing the configuration for the Chroma database.
* @returns A promise that resolves with a new `Chroma` instance.
*/
static async fromTexts(
texts: string[],
metadatas: object[] | object,
embeddings: EmbeddingsInterface,
dbConfig: ChromaLibArgs
): Promise<Chroma> {
const docs: Document[] = []
for (let i = 0; i < texts.length; i += 1) {
const metadata = Array.isArray(metadatas) ? metadatas[i] : metadatas
const newDoc = new Document({
pageContent: texts[i],
metadata
})
docs.push(newDoc)
}
return this.fromDocuments(docs, embeddings, dbConfig)
}
/**
* Creates a new `Chroma` instance from an array of `Document` instances.
* The documents are added to the Chroma database.
* @param docs An array of `Document` instances.
* @param embeddings An `Embeddings` instance used to generate embeddings for the documents.
* @param dbConfig A `ChromaLibArgs` object containing the configuration for the Chroma database.
* @returns A promise that resolves with a new `Chroma` instance.
*/
static async fromDocuments(docs: Document[], embeddings: EmbeddingsInterface, dbConfig: ChromaLibArgs): Promise<Chroma> {
const instance = new this(embeddings, dbConfig)
await instance.addDocuments(docs)
return instance
}
/**
* Creates a new `Chroma` instance from an existing collection in the
* Chroma database.
* @param embeddings An `Embeddings` instance used to generate embeddings for the documents.
* @param dbConfig A `ChromaLibArgs` object containing the configuration for the Chroma database.
* @returns A promise that resolves with a new `Chroma` instance.
*/
static async fromExistingCollection(embeddings: EmbeddingsInterface, dbConfig: ChromaLibArgs): Promise<Chroma> {
const instance = new this(embeddings, dbConfig)
await instance.ensureCollection()
return instance
}
/** @ignore */
static async imports(): Promise<{
ChromaClient: typeof ChromaClientT
}> {
try {
const { ChromaClient } = await import('chromadb')
return { ChromaClient }
} catch {
throw new Error('Please install chromadb as a dependency with, e.g. `npm install -S chromadb`')
}
}
}
/**
* Generates a unique collection name if none is provided.
*/
function ensureCollectionName(collectionName?: string) {
if (!collectionName) {
return `langchain-${uuid.v4()}`
}
return collectionName
} }

View File

@ -101,7 +101,7 @@
"assemblyai": "^4.2.2", "assemblyai": "^4.2.2",
"axios": "1.12.0", "axios": "1.12.0",
"cheerio": "^1.0.0-rc.12", "cheerio": "^1.0.0-rc.12",
"chromadb": "^1.10.0", "chromadb": "3.1.6",
"cohere-ai": "^7.7.5", "cohere-ai": "^7.7.5",
"composio-core": "^0.4.7", "composio-core": "^0.4.7",
"couchbase": "4.4.1", "couchbase": "4.4.1",

File diff suppressed because one or more lines are too long