From f5be889ea8047c6aa7b1fe8f7de16805c947b55e Mon Sep 17 00:00:00 2001 From: louyongjiu <921944921@qq.com> Date: Fri, 19 Apr 2024 18:43:22 +0800 Subject: [PATCH] Feature: Add pdfUsage parameter setting support to folderFiles (#2211) * Add pdfUsage parameter setting support to folderFiles * Add pdfUsage parameter setting additionalParams: true --- .../nodes/documentloaders/Folder/Folder.ts | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/packages/components/nodes/documentloaders/Folder/Folder.ts b/packages/components/nodes/documentloaders/Folder/Folder.ts index fb3db8e8e..2b233d9e0 100644 --- a/packages/components/nodes/documentloaders/Folder/Folder.ts +++ b/packages/components/nodes/documentloaders/Folder/Folder.ts @@ -21,7 +21,7 @@ class Folder_DocumentLoaders implements INode { constructor() { this.label = 'Folder with Files' this.name = 'folderFiles' - this.version = 1.0 + this.version = 2.0 this.type = 'Document' this.icon = 'folder.svg' this.category = 'Document Loaders' @@ -46,6 +46,24 @@ class Folder_DocumentLoaders implements INode { type: 'TextSplitter', optional: true }, + { + label: 'Pdf Usage', + name: 'pdfUsage', + type: 'options', + options: [ + { + label: 'One document per page', + name: 'perPage' + }, + { + label: 'One document per file', + name: 'perFile' + } + ], + default: 'perPage', + optional: true, + additionalParams: true + }, { label: 'Metadata', name: 'metadata', @@ -61,6 +79,7 @@ class Folder_DocumentLoaders implements INode { const folderPath = nodeData.inputs?.folderPath as string const metadata = nodeData.inputs?.metadata const recursive = nodeData.inputs?.recursive as boolean + const pdfUsage = nodeData.inputs?.pdfUsage const loader = new DirectoryLoader( folderPath, @@ -69,8 +88,12 @@ class Folder_DocumentLoaders implements INode { '.txt': (path) => new TextLoader(path), '.csv': (path) => new CSVLoader(path), '.docx': (path) => new DocxLoader(path), - // @ts-ignore - '.pdf': (path) => new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }), + '.pdf': (path) => + pdfUsage === 'perFile' + ? // @ts-ignore + new PDFLoader(path, { splitPages: false, pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }) + : // @ts-ignore + new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }), '.aspx': (path) => new TextLoader(path), '.asp': (path) => new TextLoader(path), '.cpp': (path) => new TextLoader(path), // C++