From 44f111b4585b0fdefcd2933beca90f3d4063035d Mon Sep 17 00:00:00 2001 From: Henry Date: Sun, 30 Nov 2025 01:14:04 +0000 Subject: [PATCH] Remove postgres vector store data when deletion - Introduced a new `doc_id` column in MySQL, Postgres, and SQLite record managers to support document identification. - Updated the `update` method to handle both string and object formats for keys, allowing for better flexibility in document updates. - Enhanced `listKeys` method to filter by `doc_id` when provided in options. - Updated vector store integrations to utilize the new `doc_id` filtering capability --- .../MySQLRecordManager/MySQLrecordManager.ts | 47 ++- .../PostgresRecordManager.ts | 41 ++- .../SQLiteRecordManager.ts | 45 ++- .../nodes/vectorstores/Chroma/Chroma.ts | 6 +- .../Elasticsearch/Elasticsearch.ts | 6 +- .../nodes/vectorstores/Pinecone/Pinecone.ts | 6 +- .../nodes/vectorstores/Postgres/Postgres.ts | 28 +- .../vectorstores/Postgres/driver/TypeORM.ts | 43 ++- .../nodes/vectorstores/Qdrant/Qdrant.ts | 6 +- .../nodes/vectorstores/Supabase/Supabase.ts | 6 +- .../nodes/vectorstores/Upstash/Upstash.ts | 6 +- .../nodes/vectorstores/Weaviate/Weaviate.ts | 6 +- packages/components/src/indexing.ts | 12 +- .../src/controllers/documentstore/index.ts | 9 +- .../src/services/documentstore/index.ts | 57 +++- packages/ui/src/api/documentstore.js | 5 +- .../views/docstore/DeleteDocStoreDialog.jsx | 294 +++++++++++------- .../views/docstore/DocumentStoreDetail.jsx | 141 +++++++-- .../src/views/docstore/ShowStoredChunks.jsx | 23 +- .../views/docstore/VectorStoreConfigure.jsx | 50 ++- 20 files changed, 620 insertions(+), 217 deletions(-) diff --git a/packages/components/nodes/recordmanager/MySQLRecordManager/MySQLrecordManager.ts b/packages/components/nodes/recordmanager/MySQLRecordManager/MySQLrecordManager.ts index eb38144e7..9c6e25a41 100644 --- a/packages/components/nodes/recordmanager/MySQLRecordManager/MySQLrecordManager.ts +++ b/packages/components/nodes/recordmanager/MySQLRecordManager/MySQLrecordManager.ts @@ -62,7 +62,6 @@ class MySQLRecordManager_RecordManager implements INode { label: 'Namespace', name: 'namespace', type: 'string', - description: 'If not specified, chatflowid will be used', additionalParams: true, optional: true }, @@ -219,7 +218,16 @@ class MySQLRecordManager implements RecordManagerInterface { unique key \`unique_key_namespace\` (\`key\`, \`namespace\`));`) - const columns = [`updated_at`, `key`, `namespace`, `group_id`] + // Add doc_id column if it doesn't exist (migration for existing tables) + const checkColumn = await queryRunner.manager.query( + `SELECT COUNT(1) ColumnExists FROM INFORMATION_SCHEMA.COLUMNS + WHERE table_schema=DATABASE() AND table_name='${tableName}' AND column_name='doc_id';` + ) + if (checkColumn[0].ColumnExists === 0) { + await queryRunner.manager.query(`ALTER TABLE \`${tableName}\` ADD COLUMN \`doc_id\` longtext;`) + } + + const columns = [`updated_at`, `key`, `namespace`, `group_id`, `doc_id`] for (const column of columns) { // MySQL does not support 'IF NOT EXISTS' function for Index const Check = await queryRunner.manager.query( @@ -261,7 +269,7 @@ class MySQLRecordManager implements RecordManagerInterface { } } - async update(keys: string[], updateOptions?: UpdateOptions): Promise { + async update(keys: Array<{ uid: string; docId: string }> | string[], updateOptions?: UpdateOptions): Promise { if (keys.length === 0) { return } @@ -277,23 +285,23 @@ class MySQLRecordManager implements RecordManagerInterface { throw new Error(`Time sync issue with database ${updatedAt} < ${timeAtLeast}`) } - const groupIds = _groupIds ?? keys.map(() => null) + // Handle both new format (objects with uid and docId) and old format (strings) + const isNewFormat = keys.length > 0 && typeof keys[0] === 'object' && 'uid' in keys[0] + const keyStrings = isNewFormat ? (keys as Array<{ uid: string; docId: string }>).map((k) => k.uid) : (keys as string[]) + const docIds = isNewFormat ? (keys as Array<{ uid: string; docId: string }>).map((k) => k.docId) : keys.map(() => null) - if (groupIds.length !== keys.length) { - throw new Error(`Number of keys (${keys.length}) does not match number of group_ids (${groupIds.length})`) + const groupIds = _groupIds ?? keyStrings.map(() => null) + + if (groupIds.length !== keyStrings.length) { + throw new Error(`Number of keys (${keyStrings.length}) does not match number of group_ids (${groupIds.length})`) } - const recordsToUpsert = keys.map((key, i) => [ - key, - this.namespace, - updatedAt, - groupIds[i] ?? null // Ensure groupIds[i] is null if undefined - ]) + const recordsToUpsert = keyStrings.map((key, i) => [key, this.namespace, updatedAt, groupIds[i] ?? null, docIds[i] ?? null]) const query = ` - INSERT INTO \`${tableName}\` (\`key\`, \`namespace\`, \`updated_at\`, \`group_id\`) - VALUES (?, ?, ?, ?) - ON DUPLICATE KEY UPDATE \`updated_at\` = VALUES(\`updated_at\`)` + INSERT INTO \`${tableName}\` (\`key\`, \`namespace\`, \`updated_at\`, \`group_id\`, \`doc_id\`) + VALUES (?, ?, ?, ?, ?) + ON DUPLICATE KEY UPDATE \`updated_at\` = VALUES(\`updated_at\`), \`doc_id\` = VALUES(\`doc_id\`)` // To handle multiple files upsert try { @@ -349,13 +357,13 @@ class MySQLRecordManager implements RecordManagerInterface { } } - async listKeys(options?: ListKeyOptions): Promise { + async listKeys(options?: ListKeyOptions & { docId?: string }): Promise { const dataSource = await this.getDataSource() const queryRunner = dataSource.createQueryRunner() const tableName = this.sanitizeTableName(this.tableName) try { - const { before, after, limit, groupIds } = options ?? {} + const { before, after, limit, groupIds, docId } = options ?? {} let query = `SELECT \`key\` FROM \`${tableName}\` WHERE \`namespace\` = ?` const values: (string | number | string[])[] = [this.namespace] @@ -382,6 +390,11 @@ class MySQLRecordManager implements RecordManagerInterface { values.push(...groupIds.filter((gid): gid is string => gid !== null)) } + if (docId) { + query += ` AND \`doc_id\` = ?` + values.push(docId) + } + query += ';' // Directly using try/catch with async/await for cleaner flow diff --git a/packages/components/nodes/recordmanager/PostgresRecordManager/PostgresRecordManager.ts b/packages/components/nodes/recordmanager/PostgresRecordManager/PostgresRecordManager.ts index ab4b564e8..2070370b2 100644 --- a/packages/components/nodes/recordmanager/PostgresRecordManager/PostgresRecordManager.ts +++ b/packages/components/nodes/recordmanager/PostgresRecordManager/PostgresRecordManager.ts @@ -78,7 +78,6 @@ class PostgresRecordManager_RecordManager implements INode { label: 'Namespace', name: 'namespace', type: 'string', - description: 'If not specified, chatflowid will be used', additionalParams: true, optional: true }, @@ -241,6 +240,19 @@ class PostgresRecordManager implements RecordManagerInterface { CREATE INDEX IF NOT EXISTS namespace_index ON "${tableName}" (namespace); CREATE INDEX IF NOT EXISTS group_id_index ON "${tableName}" (group_id);`) + // Add doc_id column if it doesn't exist (migration for existing tables) + await queryRunner.manager.query(` + DO $$ + BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = '${tableName}' AND column_name = 'doc_id' + ) THEN + ALTER TABLE "${tableName}" ADD COLUMN doc_id TEXT; + CREATE INDEX IF NOT EXISTS doc_id_index ON "${tableName}" (doc_id); + END IF; + END $$;`) + await queryRunner.release() } catch (e: any) { // This error indicates that the table already exists @@ -286,7 +298,7 @@ class PostgresRecordManager implements RecordManagerInterface { return `(${placeholders.join(', ')})` } - async update(keys: string[], updateOptions?: UpdateOptions): Promise { + async update(keys: Array<{ uid: string; docId: string }> | string[], updateOptions?: UpdateOptions): Promise { if (keys.length === 0) { return } @@ -302,17 +314,22 @@ class PostgresRecordManager implements RecordManagerInterface { throw new Error(`Time sync issue with database ${updatedAt} < ${timeAtLeast}`) } - const groupIds = _groupIds ?? keys.map(() => null) + // Handle both new format (objects with uid and docId) and old format (strings) + const isNewFormat = keys.length > 0 && typeof keys[0] === 'object' && 'uid' in keys[0] + const keyStrings = isNewFormat ? (keys as Array<{ uid: string; docId: string }>).map((k) => k.uid) : (keys as string[]) + const docIds = isNewFormat ? (keys as Array<{ uid: string; docId: string }>).map((k) => k.docId) : keys.map(() => null) - if (groupIds.length !== keys.length) { - throw new Error(`Number of keys (${keys.length}) does not match number of group_ids ${groupIds.length})`) + const groupIds = _groupIds ?? keyStrings.map(() => null) + + if (groupIds.length !== keyStrings.length) { + throw new Error(`Number of keys (${keyStrings.length}) does not match number of group_ids ${groupIds.length})`) } - const recordsToUpsert = keys.map((key, i) => [key, this.namespace, updatedAt, groupIds[i]]) + const recordsToUpsert = keyStrings.map((key, i) => [key, this.namespace, updatedAt, groupIds[i], docIds[i]]) const valuesPlaceholders = recordsToUpsert.map((_, j) => this.generatePlaceholderForRowAt(j, recordsToUpsert[0].length)).join(', ') - const query = `INSERT INTO "${tableName}" (key, namespace, updated_at, group_id) VALUES ${valuesPlaceholders} ON CONFLICT (key, namespace) DO UPDATE SET updated_at = EXCLUDED.updated_at;` + const query = `INSERT INTO "${tableName}" (key, namespace, updated_at, group_id, doc_id) VALUES ${valuesPlaceholders} ON CONFLICT (key, namespace) DO UPDATE SET updated_at = EXCLUDED.updated_at, doc_id = EXCLUDED.doc_id;` try { await queryRunner.manager.query(query, recordsToUpsert.flat()) await queryRunner.release() @@ -351,8 +368,8 @@ class PostgresRecordManager implements RecordManagerInterface { } } - async listKeys(options?: ListKeyOptions): Promise { - const { before, after, limit, groupIds } = options ?? {} + async listKeys(options?: ListKeyOptions & { docId?: string }): Promise { + const { before, after, limit, groupIds, docId } = options ?? {} const tableName = this.sanitizeTableName(this.tableName) let query = `SELECT key FROM "${tableName}" WHERE namespace = $1` @@ -383,6 +400,12 @@ class PostgresRecordManager implements RecordManagerInterface { index += 1 } + if (docId) { + values.push(docId) + query += ` AND doc_id = $${index}` + index += 1 + } + query += ';' const dataSource = await this.getDataSource() diff --git a/packages/components/nodes/recordmanager/SQLiteRecordManager/SQLiteRecordManager.ts b/packages/components/nodes/recordmanager/SQLiteRecordManager/SQLiteRecordManager.ts index 4b7376041..137d452d6 100644 --- a/packages/components/nodes/recordmanager/SQLiteRecordManager/SQLiteRecordManager.ts +++ b/packages/components/nodes/recordmanager/SQLiteRecordManager/SQLiteRecordManager.ts @@ -51,7 +51,6 @@ class SQLiteRecordManager_RecordManager implements INode { label: 'Namespace', name: 'namespace', type: 'string', - description: 'If not specified, chatflowid will be used', additionalParams: true, optional: true }, @@ -198,6 +197,15 @@ CREATE INDEX IF NOT EXISTS key_index ON "${tableName}" (key); CREATE INDEX IF NOT EXISTS namespace_index ON "${tableName}" (namespace); CREATE INDEX IF NOT EXISTS group_id_index ON "${tableName}" (group_id);`) + // Add doc_id column if it doesn't exist (migration for existing tables) + const checkColumn = await queryRunner.manager.query( + `SELECT COUNT(*) as count FROM pragma_table_info('${tableName}') WHERE name='doc_id';` + ) + if (checkColumn[0].count === 0) { + await queryRunner.manager.query(`ALTER TABLE "${tableName}" ADD COLUMN doc_id TEXT;`) + await queryRunner.manager.query(`CREATE INDEX IF NOT EXISTS doc_id_index ON "${tableName}" (doc_id);`) + } + await queryRunner.release() } catch (e: any) { // This error indicates that the table already exists @@ -228,7 +236,7 @@ CREATE INDEX IF NOT EXISTS group_id_index ON "${tableName}" (group_id);`) } } - async update(keys: string[], updateOptions?: UpdateOptions): Promise { + async update(keys: Array<{ uid: string; docId: string }> | string[], updateOptions?: UpdateOptions): Promise { if (keys.length === 0) { return } @@ -243,23 +251,23 @@ CREATE INDEX IF NOT EXISTS group_id_index ON "${tableName}" (group_id);`) throw new Error(`Time sync issue with database ${updatedAt} < ${timeAtLeast}`) } - const groupIds = _groupIds ?? keys.map(() => null) + // Handle both new format (objects with uid and docId) and old format (strings) + const isNewFormat = keys.length > 0 && typeof keys[0] === 'object' && 'uid' in keys[0] + const keyStrings = isNewFormat ? (keys as Array<{ uid: string; docId: string }>).map((k) => k.uid) : (keys as string[]) + const docIds = isNewFormat ? (keys as Array<{ uid: string; docId: string }>).map((k) => k.docId) : keys.map(() => null) - if (groupIds.length !== keys.length) { - throw new Error(`Number of keys (${keys.length}) does not match number of group_ids (${groupIds.length})`) + const groupIds = _groupIds ?? keyStrings.map(() => null) + + if (groupIds.length !== keyStrings.length) { + throw new Error(`Number of keys (${keyStrings.length}) does not match number of group_ids (${groupIds.length})`) } - const recordsToUpsert = keys.map((key, i) => [ - key, - this.namespace, - updatedAt, - groupIds[i] ?? null // Ensure groupIds[i] is null if undefined - ]) + const recordsToUpsert = keyStrings.map((key, i) => [key, this.namespace, updatedAt, groupIds[i] ?? null, docIds[i] ?? null]) const query = ` - INSERT INTO "${tableName}" (key, namespace, updated_at, group_id) - VALUES (?, ?, ?, ?) - ON CONFLICT (key, namespace) DO UPDATE SET updated_at = excluded.updated_at` + INSERT INTO "${tableName}" (key, namespace, updated_at, group_id, doc_id) + VALUES (?, ?, ?, ?, ?) + ON CONFLICT (key, namespace) DO UPDATE SET updated_at = excluded.updated_at, doc_id = excluded.doc_id` try { // To handle multiple files upsert @@ -314,8 +322,8 @@ CREATE INDEX IF NOT EXISTS group_id_index ON "${tableName}" (group_id);`) } } - async listKeys(options?: ListKeyOptions): Promise { - const { before, after, limit, groupIds } = options ?? {} + async listKeys(options?: ListKeyOptions & { docId?: string }): Promise { + const { before, after, limit, groupIds, docId } = options ?? {} const tableName = this.sanitizeTableName(this.tableName) let query = `SELECT key FROM "${tableName}" WHERE namespace = ?` @@ -344,6 +352,11 @@ CREATE INDEX IF NOT EXISTS group_id_index ON "${tableName}" (group_id);`) values.push(...groupIds.filter((gid): gid is string => gid !== null)) } + if (docId) { + query += ` AND doc_id = ?` + values.push(docId) + } + query += ';' const dataSource = await this.getDataSource() diff --git a/packages/components/nodes/vectorstores/Chroma/Chroma.ts b/packages/components/nodes/vectorstores/Chroma/Chroma.ts index dabd747ca..c3b7a7acf 100644 --- a/packages/components/nodes/vectorstores/Chroma/Chroma.ts +++ b/packages/components/nodes/vectorstores/Chroma/Chroma.ts @@ -186,7 +186,11 @@ class Chroma_VectorStores implements INode { const vectorStoreName = collectionName await recordManager.createSchema() ;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName - const keys: string[] = await recordManager.listKeys({}) + const filterKeys: ICommonObject = {} + if (options.docId) { + filterKeys.docId = options.docId + } + const keys: string[] = await recordManager.listKeys(filterKeys) const chromaStore = new ChromaExtended(embeddings, obj) diff --git a/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch.ts b/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch.ts index 9cd9969e7..426fed2df 100644 --- a/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch.ts +++ b/packages/components/nodes/vectorstores/Elasticsearch/Elasticsearch.ts @@ -198,7 +198,11 @@ class Elasticsearch_VectorStores implements INode { const vectorStoreName = indexName await recordManager.createSchema() ;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName - const keys: string[] = await recordManager.listKeys({}) + const filterKeys: ICommonObject = {} + if (options.docId) { + filterKeys.docId = options.docId + } + const keys: string[] = await recordManager.listKeys(filterKeys) await vectorStore.delete({ ids: keys }) await recordManager.deleteKeys(keys) diff --git a/packages/components/nodes/vectorstores/Pinecone/Pinecone.ts b/packages/components/nodes/vectorstores/Pinecone/Pinecone.ts index 0e2fb8247..00f0c7668 100644 --- a/packages/components/nodes/vectorstores/Pinecone/Pinecone.ts +++ b/packages/components/nodes/vectorstores/Pinecone/Pinecone.ts @@ -212,7 +212,11 @@ class Pinecone_VectorStores implements INode { const vectorStoreName = pineconeNamespace await recordManager.createSchema() ;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName - const keys: string[] = await recordManager.listKeys({}) + const filterKeys: ICommonObject = {} + if (options.docId) { + filterKeys.docId = options.docId + } + const keys: string[] = await recordManager.listKeys(filterKeys) await pineconeStore.delete({ ids: keys }) await recordManager.deleteKeys(keys) diff --git a/packages/components/nodes/vectorstores/Postgres/Postgres.ts b/packages/components/nodes/vectorstores/Postgres/Postgres.ts index 98f8b07f6..904e28ab8 100644 --- a/packages/components/nodes/vectorstores/Postgres/Postgres.ts +++ b/packages/components/nodes/vectorstores/Postgres/Postgres.ts @@ -49,7 +49,7 @@ class Postgres_VectorStores implements INode { constructor() { this.label = 'Postgres' this.name = 'postgres' - this.version = 7.0 + this.version = 7.1 this.type = 'Postgres' this.icon = 'postgres.svg' this.category = 'Vector Stores' @@ -173,6 +173,15 @@ class Postgres_VectorStores implements INode { additionalParams: true, optional: true }, + { + label: 'Upsert Batch Size', + name: 'batchSize', + type: 'number', + step: 1, + description: 'Upsert in batches of size N', + additionalParams: true, + optional: true + }, { label: 'Additional Configuration', name: 'additionalConfig', @@ -232,6 +241,7 @@ class Postgres_VectorStores implements INode { const docs = nodeData.inputs?.document as Document[] const recordManager = nodeData.inputs?.recordManager const isFileUploadEnabled = nodeData.inputs?.fileUpload as boolean + const _batchSize = nodeData.inputs?.batchSize const vectorStoreDriver: VectorStoreDriver = Postgres_VectorStores.getDriverFromConfig(nodeData, options) const flattenDocs = docs && docs.length ? flatten(docs) : [] @@ -265,7 +275,15 @@ class Postgres_VectorStores implements INode { return res } else { - await vectorStoreDriver.fromDocuments(finalDocs) + if (_batchSize) { + const batchSize = parseInt(_batchSize, 10) + for (let i = 0; i < finalDocs.length; i += batchSize) { + const batch = finalDocs.slice(i, i + batchSize) + await vectorStoreDriver.fromDocuments(batch) + } + } else { + await vectorStoreDriver.fromDocuments(finalDocs) + } return { numAdded: finalDocs.length, addedDocs: finalDocs } } @@ -285,7 +303,11 @@ class Postgres_VectorStores implements INode { const vectorStoreName = tableName await recordManager.createSchema() ;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName - const keys: string[] = await recordManager.listKeys({}) + const filterKeys: ICommonObject = {} + if (options.docId) { + filterKeys.docId = options.docId + } + const keys: string[] = await recordManager.listKeys(filterKeys) await vectorStore.delete({ ids: keys }) await recordManager.deleteKeys(keys) diff --git a/packages/components/nodes/vectorstores/Postgres/driver/TypeORM.ts b/packages/components/nodes/vectorstores/Postgres/driver/TypeORM.ts index 3a0c5ab00..38876111d 100644 --- a/packages/components/nodes/vectorstores/Postgres/driver/TypeORM.ts +++ b/packages/components/nodes/vectorstores/Postgres/driver/TypeORM.ts @@ -5,6 +5,11 @@ import { TypeORMVectorStore, TypeORMVectorStoreArgs, TypeORMVectorStoreDocument import { VectorStore } from '@langchain/core/vectorstores' import { Document } from '@langchain/core/documents' import { Pool } from 'pg' +import { v4 as uuid } from 'uuid' + +type TypeORMAddDocumentOptions = { + ids?: string[] +} export class TypeORMDriver extends VectorStoreDriver { protected _postgresConnectionOptions: DataSourceOptions @@ -95,15 +100,45 @@ export class TypeORMDriver extends VectorStoreDriver { try { instance.appDataSource.getRepository(instance.documentEntity).delete(ids) } catch (e) { - console.error('Failed to delete') + console.error('Failed to delete', e) } } } - const baseAddVectorsFn = instance.addVectors.bind(instance) + instance.addVectors = async ( + vectors: number[][], + documents: Document[], + documentOptions?: TypeORMAddDocumentOptions + ): Promise => { + const rows = vectors.map((embedding, idx) => { + const embeddingString = `[${embedding.join(',')}]` + const documentRow = { + id: documentOptions?.ids?.length ? documentOptions.ids[idx] : uuid(), + pageContent: documents[idx].pageContent, + embedding: embeddingString, + metadata: documents[idx].metadata + } + return documentRow + }) - instance.addVectors = async (vectors, documents) => { - return baseAddVectorsFn(vectors, this.sanitizeDocuments(documents)) + const documentRepository = instance.appDataSource.getRepository(instance.documentEntity) + const _batchSize = this.nodeData.inputs?.batchSize + const chunkSize = _batchSize ? parseInt(_batchSize, 10) : 500 + + for (let i = 0; i < rows.length; i += chunkSize) { + const chunk = rows.slice(i, i + chunkSize) + try { + await documentRepository.save(chunk) + } catch (e) { + console.error(e) + throw new Error(`Error inserting: ${chunk[0].pageContent}`) + } + } + } + + instance.addDocuments = async (documents: Document[], options?: { ids?: string[] }): Promise => { + const texts = documents.map(({ pageContent }) => pageContent) + return (instance.addVectors as any)(await this.getEmbeddings().embedDocuments(texts), documents, options) } return instance diff --git a/packages/components/nodes/vectorstores/Qdrant/Qdrant.ts b/packages/components/nodes/vectorstores/Qdrant/Qdrant.ts index 6d292119e..afc3cf25b 100644 --- a/packages/components/nodes/vectorstores/Qdrant/Qdrant.ts +++ b/packages/components/nodes/vectorstores/Qdrant/Qdrant.ts @@ -385,7 +385,11 @@ class Qdrant_VectorStores implements INode { const vectorStoreName = collectionName await recordManager.createSchema() ;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName - const keys: string[] = await recordManager.listKeys({}) + const filterKeys: ICommonObject = {} + if (options.docId) { + filterKeys.docId = options.docId + } + const keys: string[] = await recordManager.listKeys(filterKeys) await vectorStore.delete({ ids: keys }) await recordManager.deleteKeys(keys) diff --git a/packages/components/nodes/vectorstores/Supabase/Supabase.ts b/packages/components/nodes/vectorstores/Supabase/Supabase.ts index abe023309..36e6ade0f 100644 --- a/packages/components/nodes/vectorstores/Supabase/Supabase.ts +++ b/packages/components/nodes/vectorstores/Supabase/Supabase.ts @@ -197,7 +197,11 @@ class Supabase_VectorStores implements INode { const vectorStoreName = tableName + '_' + queryName await recordManager.createSchema() ;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName - const keys: string[] = await recordManager.listKeys({}) + const filterKeys: ICommonObject = {} + if (options.docId) { + filterKeys.docId = options.docId + } + const keys: string[] = await recordManager.listKeys(filterKeys) await supabaseStore.delete({ ids: keys }) await recordManager.deleteKeys(keys) diff --git a/packages/components/nodes/vectorstores/Upstash/Upstash.ts b/packages/components/nodes/vectorstores/Upstash/Upstash.ts index 2886afcfa..9855e760a 100644 --- a/packages/components/nodes/vectorstores/Upstash/Upstash.ts +++ b/packages/components/nodes/vectorstores/Upstash/Upstash.ts @@ -187,7 +187,11 @@ class Upstash_VectorStores implements INode { const vectorStoreName = UPSTASH_VECTOR_REST_URL await recordManager.createSchema() ;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName - const keys: string[] = await recordManager.listKeys({}) + const filterKeys: ICommonObject = {} + if (options.docId) { + filterKeys.docId = options.docId + } + const keys: string[] = await recordManager.listKeys(filterKeys) await upstashStore.delete({ ids: keys }) await recordManager.deleteKeys(keys) diff --git a/packages/components/nodes/vectorstores/Weaviate/Weaviate.ts b/packages/components/nodes/vectorstores/Weaviate/Weaviate.ts index eb61f0205..cab9287eb 100644 --- a/packages/components/nodes/vectorstores/Weaviate/Weaviate.ts +++ b/packages/components/nodes/vectorstores/Weaviate/Weaviate.ts @@ -252,7 +252,11 @@ class Weaviate_VectorStores implements INode { const vectorStoreName = weaviateTextKey ? weaviateIndex + '_' + weaviateTextKey : weaviateIndex await recordManager.createSchema() ;(recordManager as any).namespace = (recordManager as any).namespace + '_' + vectorStoreName - const keys: string[] = await recordManager.listKeys({}) + const filterKeys: ICommonObject = {} + if (options.docId) { + filterKeys.docId = options.docId + } + const keys: string[] = await recordManager.listKeys(filterKeys) await weaviateStore.delete({ ids: keys }) await recordManager.deleteKeys(keys) diff --git a/packages/components/src/indexing.ts b/packages/components/src/indexing.ts index f9cacfc5d..25d0f2707 100644 --- a/packages/components/src/indexing.ts +++ b/packages/components/src/indexing.ts @@ -8,6 +8,10 @@ import { IndexingResult } from './Interface' type Metadata = Record +export interface ExtendedRecordManagerInterface extends RecordManagerInterface { + update(keys: Array<{ uid: string; docId: string }> | string[], updateOptions?: Record): Promise +} + type StringOrDocFunc = string | ((doc: DocumentInterface) => string) export interface HashedDocumentInterface extends DocumentInterface { @@ -207,7 +211,7 @@ export const _isBaseDocumentLoader = (arg: any): arg is BaseDocumentLoader => { interface IndexArgs { docsSource: BaseDocumentLoader | DocumentInterface[] - recordManager: RecordManagerInterface + recordManager: ExtendedRecordManagerInterface vectorStore: VectorStore options?: IndexOptions } @@ -275,7 +279,7 @@ export async function index(args: IndexArgs): Promise { const uids: string[] = [] const docsToIndex: DocumentInterface[] = [] - const docsToUpdate: string[] = [] + const docsToUpdate: Array<{ uid: string; docId: string }> = [] const seenDocs = new Set() hashedDocs.forEach((hashedDoc, i) => { const docExists = batchExists[i] @@ -283,7 +287,7 @@ export async function index(args: IndexArgs): Promise { if (forceUpdate) { seenDocs.add(hashedDoc.uid) } else { - docsToUpdate.push(hashedDoc.uid) + docsToUpdate.push({ uid: hashedDoc.uid, docId: hashedDoc.metadata.docId as string }) return } } @@ -308,7 +312,7 @@ export async function index(args: IndexArgs): Promise { } await recordManager.update( - hashedDocs.map((doc) => doc.uid), + hashedDocs.map((doc) => ({ uid: doc.uid, docId: doc.metadata.docId as string })), { timeAtLeast: indexStartDt, groupIds: sourceIds } ) diff --git a/packages/server/src/controllers/documentstore/index.ts b/packages/server/src/controllers/documentstore/index.ts index 1ac4f4395..98e03ca6a 100644 --- a/packages/server/src/controllers/documentstore/index.ts +++ b/packages/server/src/controllers/documentstore/index.ts @@ -465,9 +465,10 @@ const insertIntoVectorStore = async (req: Request, res: Response, next: NextFunc } const subscriptionId = req.user?.activeOrganizationSubscriptionId || '' const body = req.body + const isStrictSave = body.isStrictSave ?? false const apiResponse = await documentStoreService.insertIntoVectorStoreMiddleware( body, - false, + isStrictSave, orgId, workspaceId, subscriptionId, @@ -513,7 +514,11 @@ const deleteVectorStoreFromStore = async (req: Request, res: Response, next: Nex `Error: documentStoreController.deleteVectorStoreFromStore - workspaceId not provided!` ) } - const apiResponse = await documentStoreService.deleteVectorStoreFromStore(req.params.storeId, workspaceId) + const apiResponse = await documentStoreService.deleteVectorStoreFromStore( + req.params.storeId, + workspaceId, + (req.query.docId as string) || undefined + ) return res.json(apiResponse) } catch (error) { next(error) diff --git a/packages/server/src/services/documentstore/index.ts b/packages/server/src/services/documentstore/index.ts index 0ee1cad20..95f0f4711 100644 --- a/packages/server/src/services/documentstore/index.ts +++ b/packages/server/src/services/documentstore/index.ts @@ -391,7 +391,7 @@ const deleteDocumentStoreFileChunk = async (storeId: string, docId: string, chun } } -const deleteVectorStoreFromStore = async (storeId: string, workspaceId: string) => { +const deleteVectorStoreFromStore = async (storeId: string, workspaceId: string, docId?: string) => { try { const appServer = getRunningExpressApp() const componentNodes = appServer.nodesPool.componentNodes @@ -461,7 +461,7 @@ const deleteVectorStoreFromStore = async (storeId: string, workspaceId: string) // Call the delete method of the vector store if (vectorStoreObj.vectorStoreMethods.delete) { - await vectorStoreObj.vectorStoreMethods.delete(vStoreNodeData, idsToDelete, options) + await vectorStoreObj.vectorStoreMethods.delete(vStoreNodeData, idsToDelete, { ...options, docId }) } } catch (error) { throw new InternalFlowiseError( @@ -1157,6 +1157,18 @@ const updateVectorStoreConfigOnly = async (data: ICommonObject, workspaceId: str ) } } +/** + * Saves vector store configuration to the document store entity. + * Handles embedding, vector store, and record manager configurations. + * + * @example + * // Strict mode: Only save what's provided, clear the rest + * await saveVectorStoreConfig(ds, { storeId, embeddingName, embeddingConfig }, true, wsId) + * + * @example + * // Lenient mode: Reuse existing configs if not provided + * await saveVectorStoreConfig(ds, { storeId, vectorStoreName, vectorStoreConfig }, false, wsId) + */ const saveVectorStoreConfig = async (appDataSource: DataSource, data: ICommonObject, isStrictSave = true, workspaceId: string) => { try { const entity = await appDataSource.getRepository(DocumentStore).findOneBy({ @@ -1221,6 +1233,15 @@ const saveVectorStoreConfig = async (appDataSource: DataSource, data: ICommonObj } } +/** + * Inserts documents from document store into the configured vector store. + * + * Process: + * 1. Saves vector store configuration (embedding, vector store, record manager) + * 2. Sets document store status to UPSERTING + * 3. Performs the actual vector store upsert operation + * 4. Updates status to UPSERTED upon completion + */ export const insertIntoVectorStore = async ({ appDataSource, componentNodes, @@ -1231,19 +1252,16 @@ export const insertIntoVectorStore = async ({ workspaceId }: IExecuteVectorStoreInsert) => { try { + // Step 1: Save configuration based on isStrictSave mode const entity = await saveVectorStoreConfig(appDataSource, data, isStrictSave, workspaceId) + + // Step 2: Mark as UPSERTING before starting the operation entity.status = DocumentStoreStatus.UPSERTING await appDataSource.getRepository(DocumentStore).save(entity) - const indexResult = await _insertIntoVectorStoreWorkerThread( - appDataSource, - componentNodes, - telemetry, - data, - isStrictSave, - orgId, - workspaceId - ) + // Step 3: Perform the actual vector store upsert + // Note: Configuration already saved above, worker thread just retrieves and uses it + const indexResult = await _insertIntoVectorStoreWorkerThread(appDataSource, componentNodes, telemetry, data, orgId, workspaceId) return indexResult } catch (error) { throw new InternalFlowiseError( @@ -1308,12 +1326,18 @@ const _insertIntoVectorStoreWorkerThread = async ( componentNodes: IComponentNodes, telemetry: Telemetry, data: ICommonObject, - isStrictSave = true, orgId: string, workspaceId: string ) => { try { - const entity = await saveVectorStoreConfig(appDataSource, data, isStrictSave, workspaceId) + // Configuration already saved by insertIntoVectorStore, just retrieve the entity + const entity = await appDataSource.getRepository(DocumentStore).findOneBy({ + id: data.storeId, + workspaceId: workspaceId + }) + if (!entity) { + throw new InternalFlowiseError(StatusCodes.NOT_FOUND, `Document store ${data.storeId} not found`) + } let upsertHistory: Record = {} const chatflowid = data.storeId // fake chatflowid because this is not tied to any chatflow @@ -1350,7 +1374,10 @@ const _insertIntoVectorStoreWorkerThread = async ( const docs: Document[] = chunks.map((chunk: DocumentStoreFileChunk) => { return new Document({ pageContent: chunk.pageContent, - metadata: JSON.parse(chunk.metadata) + metadata: { + ...JSON.parse(chunk.metadata), + docId: chunk.docId + } }) }) vStoreNodeData.inputs.document = docs @@ -1911,6 +1938,8 @@ const upsertDocStore = async ( recordManagerConfig } + // Use isStrictSave: false to preserve existing configurations during upsert + // This allows the operation to reuse existing embedding/vector store/record manager configs const res = await insertIntoVectorStore({ appDataSource, componentNodes, diff --git a/packages/ui/src/api/documentstore.js b/packages/ui/src/api/documentstore.js index ac0c4bbff..ba77e599e 100644 --- a/packages/ui/src/api/documentstore.js +++ b/packages/ui/src/api/documentstore.js @@ -22,7 +22,10 @@ const refreshLoader = (storeId) => client.post(`/document-store/refresh/${storeI const insertIntoVectorStore = (body) => client.post(`/document-store/vectorstore/insert`, body) const saveVectorStoreConfig = (body) => client.post(`/document-store/vectorstore/save`, body) const updateVectorStoreConfig = (body) => client.post(`/document-store/vectorstore/update`, body) -const deleteVectorStoreDataFromStore = (storeId) => client.delete(`/document-store/vectorstore/${storeId}`) +const deleteVectorStoreDataFromStore = (storeId, docId) => { + const url = docId ? `/document-store/vectorstore/${storeId}?docId=${docId}` : `/document-store/vectorstore/${storeId}` + return client.delete(url) +} const queryVectorStore = (body) => client.post(`/document-store/vectorstore/query`, body) const getVectorStoreProviders = () => client.get('/document-store/components/vectorstore') const getEmbeddingProviders = () => client.get('/document-store/components/embeddings') diff --git a/packages/ui/src/views/docstore/DeleteDocStoreDialog.jsx b/packages/ui/src/views/docstore/DeleteDocStoreDialog.jsx index 85621a25c..261d9a38a 100644 --- a/packages/ui/src/views/docstore/DeleteDocStoreDialog.jsx +++ b/packages/ui/src/views/docstore/DeleteDocStoreDialog.jsx @@ -18,11 +18,15 @@ import { TableContainer, TableRow, TableCell, - Checkbox, - FormControlLabel, - DialogActions + DialogActions, + Card, + Stack, + Link } from '@mui/material' +import { useTheme } from '@mui/material/styles' import ExpandMoreIcon from '@mui/icons-material/ExpandMore' +import SettingsIcon from '@mui/icons-material/Settings' +import { IconAlertTriangle } from '@tabler/icons-react' import { TableViewOnly } from '@/ui-component/table/Table' import { v4 as uuidv4 } from 'uuid' @@ -36,12 +40,13 @@ import { initNode } from '@/utils/genericHelper' const DeleteDocStoreDialog = ({ show, dialogProps, onCancel, onDelete }) => { const portalElement = document.getElementById('portal') + const theme = useTheme() const [nodeConfigExpanded, setNodeConfigExpanded] = useState({}) - const [removeFromVS, setRemoveFromVS] = useState(false) const [vsFlowData, setVSFlowData] = useState([]) const [rmFlowData, setRMFlowData] = useState([]) - const getSpecificNodeApi = useApi(nodesApi.getSpecificNode) + const getVectorStoreNodeApi = useApi(nodesApi.getSpecificNode) + const getRecordManagerNodeApi = useApi(nodesApi.getSpecificNode) const handleAccordionChange = (nodeName) => (event, isExpanded) => { const accordianNodes = { ...nodeConfigExpanded } @@ -52,42 +57,37 @@ const DeleteDocStoreDialog = ({ show, dialogProps, onCancel, onDelete }) => { useEffect(() => { if (dialogProps.recordManagerConfig) { const nodeName = dialogProps.recordManagerConfig.name - if (nodeName) getSpecificNodeApi.request(nodeName) + if (nodeName) getRecordManagerNodeApi.request(nodeName) + } - if (dialogProps.vectorStoreConfig) { - const nodeName = dialogProps.vectorStoreConfig.name - if (nodeName) getSpecificNodeApi.request(nodeName) - } + if (dialogProps.vectorStoreConfig) { + const nodeName = dialogProps.vectorStoreConfig.name + if (nodeName) getVectorStoreNodeApi.request(nodeName) } return () => { setNodeConfigExpanded({}) - setRemoveFromVS(false) setVSFlowData([]) setRMFlowData([]) } // eslint-disable-next-line react-hooks/exhaustive-deps }, [dialogProps]) + // Process Vector Store node data useEffect(() => { - if (getSpecificNodeApi.data) { - const nodeData = cloneDeep(initNode(getSpecificNodeApi.data, uuidv4())) - - let config = 'vectorStoreConfig' - if (nodeData.category === 'Record Manager') config = 'recordManagerConfig' + if (getVectorStoreNodeApi.data && dialogProps.vectorStoreConfig) { + const nodeData = cloneDeep(initNode(getVectorStoreNodeApi.data, uuidv4())) const paramValues = [] - for (const inputName in dialogProps[config].config) { + for (const inputName in dialogProps.vectorStoreConfig.config) { const inputParam = nodeData.inputParams.find((inp) => inp.name === inputName) if (!inputParam) continue if (inputParam.type === 'credential') continue - let paramValue = {} - - const inputValue = dialogProps[config].config[inputName] + const inputValue = dialogProps.vectorStoreConfig.config[inputName] if (!inputValue) continue @@ -95,40 +95,71 @@ const DeleteDocStoreDialog = ({ show, dialogProps, onCancel, onDelete }) => { continue } - paramValue = { + paramValues.push({ label: inputParam?.label, name: inputParam?.name, type: inputParam?.type, value: inputValue - } - paramValues.push(paramValue) + }) } - if (config === 'vectorStoreConfig') { - setVSFlowData([ - { - label: nodeData.label, - name: nodeData.name, - category: nodeData.category, - id: nodeData.id, - paramValues - } - ]) - } else if (config === 'recordManagerConfig') { - setRMFlowData([ - { - label: nodeData.label, - name: nodeData.name, - category: nodeData.category, - id: nodeData.id, - paramValues - } - ]) - } + setVSFlowData([ + { + label: nodeData.label, + name: nodeData.name, + category: nodeData.category, + id: nodeData.id, + paramValues + } + ]) } // eslint-disable-next-line react-hooks/exhaustive-deps - }, [getSpecificNodeApi.data]) + }, [getVectorStoreNodeApi.data]) + + // Process Record Manager node data + useEffect(() => { + if (getRecordManagerNodeApi.data && dialogProps.recordManagerConfig) { + const nodeData = cloneDeep(initNode(getRecordManagerNodeApi.data, uuidv4())) + + const paramValues = [] + + for (const inputName in dialogProps.recordManagerConfig.config) { + const inputParam = nodeData.inputParams.find((inp) => inp.name === inputName) + + if (!inputParam) continue + + if (inputParam.type === 'credential') continue + + const inputValue = dialogProps.recordManagerConfig.config[inputName] + + if (!inputValue) continue + + if (typeof inputValue === 'string' && inputValue.startsWith('{{') && inputValue.endsWith('}}')) { + continue + } + + paramValues.push({ + label: inputParam?.label, + name: inputParam?.name, + type: inputParam?.type, + value: inputValue + }) + } + + setRMFlowData([ + { + label: nodeData.label, + name: nodeData.name, + category: nodeData.category, + id: nodeData.id, + paramValues + } + ]) + } + + // eslint-disable-next-line react-hooks/exhaustive-deps + }, [getRecordManagerNodeApi.data]) const component = show ? ( { {dialogProps.title} - + {dialogProps.description} - {dialogProps.type === 'STORE' && dialogProps.recordManagerConfig && ( - setRemoveFromVS(event.target.checked)} />} - label='Remove data from vector store and record manager' - /> + {dialogProps.vectorStoreConfig && !dialogProps.recordManagerConfig && ( +
+ + + Note: Without a Record Manager configured, only the document chunks will be removed from the + document store. The actual vector embeddings in your vector store database will remain unchanged. To enable + automatic cleanup of vector store data, please configure a Record Manager.{' '} + + Learn more + + +
)} - {removeFromVS && ( -
- - - - - - - {([...vsFlowData, ...rmFlowData] || []).map((node, index) => { - return ( - - } - aria-controls={`nodes-accordian-${node.name}`} - id={`nodes-accordian-header-${node.name}`} + {vsFlowData && vsFlowData.length > 0 && rmFlowData && rmFlowData.length > 0 && ( + + + + Configuration + + + +
+ + + + + {([...vsFlowData, ...rmFlowData] || []).map((node, index) => { + return ( + -
} + aria-controls={`nodes-accordian-${node.name}`} + id={`nodes-accordian-header-${node.name}`} >
- {node.name} + > + {node.name} +
+ {node.label}
- {node.label} - - - - {node.paramValues[0] && ( - - )} - -
- ) - })} -
-
-
-
-
-
- - * Only data that were upserted with Record Manager will be deleted from vector store - -
+ + + {node.paramValues[0] && ( + + )} + + + ) + })} + + + + + + + + )}
- diff --git a/packages/ui/src/views/docstore/DocumentStoreDetail.jsx b/packages/ui/src/views/docstore/DocumentStoreDetail.jsx index 8aff7cb2d..dac334fa4 100644 --- a/packages/ui/src/views/docstore/DocumentStoreDetail.jsx +++ b/packages/ui/src/views/docstore/DocumentStoreDetail.jsx @@ -186,19 +186,19 @@ const DocumentStoreDetails = () => { setShowDocumentLoaderListDialog(true) } - const deleteVectorStoreDataFromStore = async (storeId) => { + const deleteVectorStoreDataFromStore = async (storeId, docId) => { try { - await documentsApi.deleteVectorStoreDataFromStore(storeId) + await documentsApi.deleteVectorStoreDataFromStore(storeId, docId) } catch (error) { console.error(error) } } - const onDocStoreDelete = async (type, file, removeFromVectorStore) => { + const onDocStoreDelete = async (type, file) => { setBackdropLoading(true) setShowDeleteDocStoreDialog(false) if (type === 'STORE') { - if (removeFromVectorStore) { + if (documentStore.recordManagerConfig) { await deleteVectorStoreDataFromStore(storeId) } try { @@ -239,6 +239,9 @@ const DocumentStoreDetails = () => { }) } } else if (type === 'LOADER') { + if (documentStore.recordManagerConfig) { + await deleteVectorStoreDataFromStore(storeId, file.id) + } try { const deleteResp = await documentsApi.deleteLoaderFromStore(storeId, file.id) setBackdropLoading(false) @@ -280,9 +283,40 @@ const DocumentStoreDetails = () => { } const onLoaderDelete = (file, vectorStoreConfig, recordManagerConfig) => { + // Get the display name in the format "LoaderName (sourceName)" + const loaderName = file.loaderName || 'Unknown' + let sourceName = '' + + // Prefer files.name when files array exists and has items + if (file.files && Array.isArray(file.files) && file.files.length > 0) { + sourceName = file.files.map((f) => f.name).join(', ') + } else if (file.source) { + // Fallback to source logic + if (typeof file.source === 'string' && file.source.includes('base64')) { + sourceName = getFileName(file.source) + } else if (typeof file.source === 'string' && file.source.startsWith('[') && file.source.endsWith(']')) { + sourceName = JSON.parse(file.source).join(', ') + } else if (typeof file.source === 'string') { + sourceName = file.source + } + } + + const displayName = sourceName ? `${loaderName} (${sourceName})` : loaderName + + let description = `Delete "${displayName}"? This will delete all the associated document chunks from the document store.` + + if ( + recordManagerConfig && + vectorStoreConfig && + Object.keys(recordManagerConfig).length > 0 && + Object.keys(vectorStoreConfig).length > 0 + ) { + description = `Delete "${displayName}"? This will delete all the associated document chunks from the document store and remove the actual data from the vector store database.` + } + const props = { title: `Delete`, - description: `Delete Loader ${file.loaderName} ? This will delete all the associated document chunks.`, + description, vectorStoreConfig, recordManagerConfig, type: 'LOADER', @@ -294,9 +328,20 @@ const DocumentStoreDetails = () => { } const onStoreDelete = (vectorStoreConfig, recordManagerConfig) => { + let description = `Delete Store ${getSpecificDocumentStore.data?.name}? This will delete all the associated loaders and document chunks from the document store.` + + if ( + recordManagerConfig && + vectorStoreConfig && + Object.keys(recordManagerConfig).length > 0 && + Object.keys(vectorStoreConfig).length > 0 + ) { + description = `Delete Store ${getSpecificDocumentStore.data?.name}? This will delete all the associated loaders and document chunks from the document store, and remove the actual data from the vector store database.` + } + const props = { title: `Delete`, - description: `Delete Store ${getSpecificDocumentStore.data?.name} ? This will delete all the associated loaders and document chunks.`, + description, vectorStoreConfig, recordManagerConfig, type: 'STORE' @@ -481,7 +526,10 @@ const DocumentStoreDetails = () => { > showStoredChunks('all')} + onClick={() => { + handleClose() + showStoredChunks('all') + }} disableRipple > @@ -490,7 +538,10 @@ const DocumentStoreDetails = () => { showVectorStore(documentStore.id)} + onClick={() => { + handleClose() + showVectorStore(documentStore.id) + }} disableRipple > @@ -499,7 +550,10 @@ const DocumentStoreDetails = () => { showVectorStoreQuery(documentStore.id)} + onClick={() => { + handleClose() + showVectorStoreQuery(documentStore.id) + }} disableRipple > @@ -518,7 +572,10 @@ const DocumentStoreDetails = () => { onStoreDelete(documentStore.vectorStoreConfig, documentStore.recordManagerConfig)} + onClick={() => { + handleClose() + onStoreDelete(documentStore.vectorStoreConfig, documentStore.recordManagerConfig) + }} disableRipple > @@ -756,20 +813,26 @@ function LoaderRow(props) { setAnchorEl(null) } - const formatSources = (files, source) => { + const formatSources = (files, source, loaderName) => { + let sourceName = '' + // Prefer files.name when files array exists and has items if (files && Array.isArray(files) && files.length > 0) { - return files.map((file) => file.name).join(', ') + sourceName = files.map((file) => file.name).join(', ') + } else if (source && typeof source === 'string' && source.includes('base64')) { + // Fallback to original source logic + sourceName = getFileName(source) + } else if (source && typeof source === 'string' && source.startsWith('[') && source.endsWith(']')) { + sourceName = JSON.parse(source).join(', ') + } else if (source) { + sourceName = source } - // Fallback to original source logic - if (source && typeof source === 'string' && source.includes('base64')) { - return getFileName(source) + // Return format: "LoaderName (sourceName)" or just "LoaderName" if no source + if (!sourceName) { + return loaderName || 'No source' } - if (source && typeof source === 'string' && source.startsWith('[') && source.endsWith(']')) { - return JSON.parse(source).join(', ') - } - return source || 'No source' + return loaderName ? `${loaderName} (${sourceName})` : sourceName } return ( @@ -823,32 +886,62 @@ function LoaderRow(props) { onClose={handleClose} > - + { + handleClose() + props.onEditClick() + }} + disableRipple + > Preview & Process - + { + handleClose() + props.onViewChunksClick() + }} + disableRipple + > View & Edit Chunks - + { + handleClose() + props.onChunkUpsert() + }} + disableRipple + > Upsert Chunks - + { + handleClose() + props.onViewUpsertAPI() + }} + disableRipple + > View API - + { + handleClose() + props.onDeleteClick() + }} + disableRipple + > Delete diff --git a/packages/ui/src/views/docstore/ShowStoredChunks.jsx b/packages/ui/src/views/docstore/ShowStoredChunks.jsx index e02ecadb4..68e973a79 100644 --- a/packages/ui/src/views/docstore/ShowStoredChunks.jsx +++ b/packages/ui/src/views/docstore/ShowStoredChunks.jsx @@ -26,6 +26,7 @@ import useApi from '@/hooks/useApi' import useConfirm from '@/hooks/useConfirm' import useNotifier from '@/utils/useNotifier' import { useAuth } from '@/hooks/useAuth' +import { getFileName } from '@/utils/genericHelper' // store import { closeSnackbar as closeSnackbarAction, enqueueSnackbar as enqueueSnackbarAction } from '@/store/actions' @@ -76,6 +77,7 @@ const ShowStoredChunks = () => { const [showExpandedChunkDialog, setShowExpandedChunkDialog] = useState(false) const [expandedChunkDialogProps, setExpandedChunkDialogProps] = useState({}) const [fileNames, setFileNames] = useState([]) + const [loaderDisplayName, setLoaderDisplayName] = useState('') const chunkSelected = (chunkId) => { const selectedChunk = documentChunks.find((chunk) => chunk.id === chunkId) @@ -212,13 +214,32 @@ const ShowStoredChunks = () => { setCurrentPage(data.currentPage) setStart(data.currentPage * 50 - 49) setEnd(data.currentPage * 50 > data.count ? data.count : data.currentPage * 50) + + // Build the loader display name in format "LoaderName (sourceName)" + const loaderName = data.file?.loaderName || data.storeName || '' + let sourceName = '' + if (data.file?.files && data.file.files.length > 0) { const fileNames = [] for (const attachedFile of data.file.files) { fileNames.push(attachedFile.name) } setFileNames(fileNames) + sourceName = fileNames.join(', ') + } else if (data.file?.source) { + const source = data.file.source + if (typeof source === 'string' && source.includes('base64')) { + sourceName = getFileName(source) + } else if (typeof source === 'string' && source.startsWith('[') && source.endsWith(']')) { + sourceName = JSON.parse(source).join(', ') + } else if (typeof source === 'string') { + sourceName = source + } } + + // Set display name in format "LoaderName (sourceName)" or just "LoaderName" + const displayName = sourceName ? `${loaderName} (${sourceName})` : loaderName + setLoaderDisplayName(displayName) } // eslint-disable-next-line react-hooks/exhaustive-deps @@ -234,7 +255,7 @@ const ShowStoredChunks = () => { navigate(-1)} > diff --git a/packages/ui/src/views/docstore/VectorStoreConfigure.jsx b/packages/ui/src/views/docstore/VectorStoreConfigure.jsx index f05759a8e..13636b77f 100644 --- a/packages/ui/src/views/docstore/VectorStoreConfigure.jsx +++ b/packages/ui/src/views/docstore/VectorStoreConfigure.jsx @@ -40,7 +40,7 @@ import Storage from '@mui/icons-material/Storage' import DynamicFeed from '@mui/icons-material/Filter1' // utils -import { initNode, showHideInputParams } from '@/utils/genericHelper' +import { initNode, showHideInputParams, getFileName } from '@/utils/genericHelper' import useNotifier from '@/utils/useNotifier' // const @@ -69,6 +69,7 @@ const VectorStoreConfigure = () => { const [loading, setLoading] = useState(true) const [documentStore, setDocumentStore] = useState({}) const [dialogProps, setDialogProps] = useState({}) + const [currentLoader, setCurrentLoader] = useState(null) const [showEmbeddingsListDialog, setShowEmbeddingsListDialog] = useState(false) const [selectedEmbeddingsProvider, setSelectedEmbeddingsProvider] = useState({}) @@ -245,7 +246,8 @@ const VectorStoreConfigure = () => { const prepareConfigData = () => { const data = { storeId: storeId, - docId: docId + docId: docId, + isStrictSave: true } // Set embedding config if (selectedEmbeddingsProvider.inputs) { @@ -353,6 +355,39 @@ const VectorStoreConfigure = () => { return Object.keys(selectedEmbeddingsProvider).length === 0 } + const getLoaderDisplayName = (loader) => { + if (!loader) return '' + + const loaderName = loader.loaderName || 'Unknown' + let sourceName = '' + + // Prefer files.name when files array exists and has items + if (loader.files && Array.isArray(loader.files) && loader.files.length > 0) { + sourceName = loader.files.map((file) => file.name).join(', ') + } else if (loader.source) { + // Fallback to source logic + if (typeof loader.source === 'string' && loader.source.includes('base64')) { + sourceName = getFileName(loader.source) + } else if (typeof loader.source === 'string' && loader.source.startsWith('[') && loader.source.endsWith(']')) { + sourceName = JSON.parse(loader.source).join(', ') + } else if (typeof loader.source === 'string') { + sourceName = loader.source + } + } + + // Return format: "LoaderName (sourceName)" or just "LoaderName" if no source + return sourceName ? `${loaderName} (${sourceName})` : loaderName + } + + const getViewHeaderTitle = () => { + const storeName = getSpecificDocumentStoreApi.data?.name || '' + if (docId && currentLoader) { + const loaderName = getLoaderDisplayName(currentLoader) + return `${storeName} / ${loaderName}` + } + return storeName + } + useEffect(() => { if (saveVectorStoreConfigApi.data) { setLoading(false) @@ -411,6 +446,15 @@ const VectorStoreConfigure = () => { return } setDocumentStore(docStore) + + // Find the current loader if docId is provided + if (docId && docStore.loaders) { + const loader = docStore.loaders.find((l) => l.id === docId) + if (loader) { + setCurrentLoader(loader) + } + } + if (docStore.embeddingConfig) { getEmbeddingNodeDetailsApi.request(docStore.embeddingConfig.name) } @@ -473,7 +517,7 @@ const VectorStoreConfigure = () => { navigate(-1)} >