From 40b655e66b6b0b5efafd0fe98837175b9d1ccc11 Mon Sep 17 00:00:00 2001 From: Henry Date: Sun, 30 Apr 2023 22:57:12 +0100 Subject: [PATCH] add multiple loaders and documents --- .../nodes/documentloaders/Folder/Folder.ts | 67 +++++++++++++++++++ .../nodes/documentloaders/Folder/folder.svg | 4 ++ .../Chroma_Upsert/Chroma_Upsert.ts | 8 ++- .../InMemory/InMemoryVectorStore.ts | 8 ++- .../Pinecone_Upsert/Pinecone_Upsert.ts | 8 ++- .../Supabase_Upsert/Supabase_Upsert.ts | 8 ++- .../Weaviate_Upsert/Weaviate_Upsert.ts | 8 ++- 7 files changed, 96 insertions(+), 15 deletions(-) create mode 100644 packages/components/nodes/documentloaders/Folder/Folder.ts create mode 100644 packages/components/nodes/documentloaders/Folder/folder.svg diff --git a/packages/components/nodes/documentloaders/Folder/Folder.ts b/packages/components/nodes/documentloaders/Folder/Folder.ts new file mode 100644 index 000000000..8767303fe --- /dev/null +++ b/packages/components/nodes/documentloaders/Folder/Folder.ts @@ -0,0 +1,67 @@ +import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { TextSplitter } from 'langchain/text_splitter' +import { TextLoader } from 'langchain/document_loaders/fs/text' +import { DirectoryLoader } from 'langchain/document_loaders/fs/directory' +import { JSONLoader } from 'langchain/document_loaders/fs/json' +import { CSVLoader } from 'langchain/document_loaders/fs/csv' +import { PDFLoader } from 'langchain/document_loaders/fs/pdf' +import { DocxLoader } from 'langchain/document_loaders/fs/docx' + +class Folder_DocumentLoaders implements INode { + label: string + name: string + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + + constructor() { + this.label = 'Folder with Files' + this.name = 'folderFiles' + this.type = 'Document' + this.icon = 'folder.svg' + this.category = 'Document Loaders' + this.description = `Load data from folder with multiple files` + this.baseClasses = [this.type] + this.inputs = [ + { + label: 'Folder Path', + name: 'folderPath', + type: 'string', + placeholder: '' + }, + { + label: 'Text Splitter', + name: 'textSplitter', + type: 'TextSplitter', + optional: true + } + ] + } + + async init(nodeData: INodeData): Promise { + const textSplitter = nodeData.inputs?.textSplitter as TextSplitter + const folderPath = nodeData.inputs?.folderPath as string + + const loader = new DirectoryLoader(folderPath, { + '.json': (path) => new JSONLoader(path), + '.txt': (path) => new TextLoader(path), + '.csv': (path) => new CSVLoader(path), + '.docx': (path) => new DocxLoader(path), + // @ts-ignore + '.pdf': (path) => new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }) + }) + + if (textSplitter) { + const docs = await loader.loadAndSplit(textSplitter) + return docs + } else { + const docs = await loader.load() + return docs + } + } +} + +module.exports = { nodeClass: Folder_DocumentLoaders } diff --git a/packages/components/nodes/documentloaders/Folder/folder.svg b/packages/components/nodes/documentloaders/Folder/folder.svg new file mode 100644 index 000000000..eb2b9de9e --- /dev/null +++ b/packages/components/nodes/documentloaders/Folder/folder.svg @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/packages/components/nodes/vectorstores/Chroma_Upsert/Chroma_Upsert.ts b/packages/components/nodes/vectorstores/Chroma_Upsert/Chroma_Upsert.ts index 3f3001709..508b7ebac 100644 --- a/packages/components/nodes/vectorstores/Chroma_Upsert/Chroma_Upsert.ts +++ b/packages/components/nodes/vectorstores/Chroma_Upsert/Chroma_Upsert.ts @@ -27,7 +27,8 @@ class ChromaUpsert_VectorStores implements INode { { label: 'Document', name: 'document', - type: 'Document' + type: 'Document', + list: true }, { label: 'Embeddings', @@ -60,9 +61,10 @@ class ChromaUpsert_VectorStores implements INode { const embeddings = nodeData.inputs?.embeddings as Embeddings const output = nodeData.outputs?.output as string + const flattenDocs = docs.flat() const finalDocs = [] - for (let i = 0; i < docs.length; i += 1) { - finalDocs.push(new Document(docs[i])) + for (let i = 0; i < flattenDocs.length; i += 1) { + finalDocs.push(new Document(flattenDocs[i])) } const vectorStore = await Chroma.fromDocuments(finalDocs, embeddings, { diff --git a/packages/components/nodes/vectorstores/InMemory/InMemoryVectorStore.ts b/packages/components/nodes/vectorstores/InMemory/InMemoryVectorStore.ts index 8542c1b34..0d85825c9 100644 --- a/packages/components/nodes/vectorstores/InMemory/InMemoryVectorStore.ts +++ b/packages/components/nodes/vectorstores/InMemory/InMemoryVectorStore.ts @@ -27,7 +27,8 @@ class InMemoryVectorStore_VectorStores implements INode { { label: 'Document', name: 'document', - type: 'Document' + type: 'Document', + list: true }, { label: 'Embeddings', @@ -54,9 +55,10 @@ class InMemoryVectorStore_VectorStores implements INode { const embeddings = nodeData.inputs?.embeddings as Embeddings const output = nodeData.outputs?.output as string + const flattenDocs = docs.flat() const finalDocs = [] - for (let i = 0; i < docs.length; i += 1) { - finalDocs.push(new Document(docs[i])) + for (let i = 0; i < flattenDocs.length; i += 1) { + finalDocs.push(new Document(flattenDocs[i])) } const vectorStore = await MemoryVectorStore.fromDocuments(finalDocs, embeddings) diff --git a/packages/components/nodes/vectorstores/Pinecone_Upsert/Pinecone_Upsert.ts b/packages/components/nodes/vectorstores/Pinecone_Upsert/Pinecone_Upsert.ts index aecdf8d64..ed1a3d57f 100644 --- a/packages/components/nodes/vectorstores/Pinecone_Upsert/Pinecone_Upsert.ts +++ b/packages/components/nodes/vectorstores/Pinecone_Upsert/Pinecone_Upsert.ts @@ -28,7 +28,8 @@ class PineconeUpsert_VectorStores implements INode { { label: 'Document', name: 'document', - type: 'Document' + type: 'Document', + list: true }, { label: 'Embeddings', @@ -84,9 +85,10 @@ class PineconeUpsert_VectorStores implements INode { const pineconeIndex = client.Index(index) + const flattenDocs = docs.flat() const finalDocs = [] - for (let i = 0; i < docs.length; i += 1) { - finalDocs.push(new Document(docs[i])) + for (let i = 0; i < flattenDocs.length; i += 1) { + finalDocs.push(new Document(flattenDocs[i])) } const obj: PineconeLibArgs = { diff --git a/packages/components/nodes/vectorstores/Supabase_Upsert/Supabase_Upsert.ts b/packages/components/nodes/vectorstores/Supabase_Upsert/Supabase_Upsert.ts index ff4fe3f8c..4bb629389 100644 --- a/packages/components/nodes/vectorstores/Supabase_Upsert/Supabase_Upsert.ts +++ b/packages/components/nodes/vectorstores/Supabase_Upsert/Supabase_Upsert.ts @@ -28,7 +28,8 @@ class SupabaseUpsert_VectorStores implements INode { { label: 'Document', name: 'document', - type: 'Document' + type: 'Document', + list: true }, { label: 'Embeddings', @@ -81,9 +82,10 @@ class SupabaseUpsert_VectorStores implements INode { const client = createClient(supabaseProjUrl, supabaseApiKey) + const flattenDocs = docs.flat() const finalDocs = [] - for (let i = 0; i < docs.length; i += 1) { - finalDocs.push(new Document(docs[i])) + for (let i = 0; i < flattenDocs.length; i += 1) { + finalDocs.push(new Document(flattenDocs[i])) } const vectorStore = await SupabaseVectorStore.fromDocuments(finalDocs, embeddings, { diff --git a/packages/components/nodes/vectorstores/Weaviate_Upsert/Weaviate_Upsert.ts b/packages/components/nodes/vectorstores/Weaviate_Upsert/Weaviate_Upsert.ts index 6d31338bf..4f360710d 100644 --- a/packages/components/nodes/vectorstores/Weaviate_Upsert/Weaviate_Upsert.ts +++ b/packages/components/nodes/vectorstores/Weaviate_Upsert/Weaviate_Upsert.ts @@ -28,7 +28,8 @@ class WeaviateUpsert_VectorStores implements INode { { label: 'Document', name: 'document', - type: 'Document' + type: 'Document', + list: true }, { label: 'Embeddings', @@ -121,9 +122,10 @@ class WeaviateUpsert_VectorStores implements INode { const client: WeaviateClient = weaviate.client(clientConfig) + const flattenDocs = docs.flat() const finalDocs = [] - for (let i = 0; i < docs.length; i += 1) { - finalDocs.push(new Document(docs[i])) + for (let i = 0; i < flattenDocs.length; i += 1) { + finalDocs.push(new Document(flattenDocs[i])) } const obj: WeaviateLibArgs = {