Feature: Add pdfUsage parameter setting support to folderFiles (#2211)

* Add pdfUsage parameter setting support to folderFiles

* Add pdfUsage parameter setting additionalParams: true
This commit is contained in:
louyongjiu 2024-04-19 18:43:22 +08:00 committed by GitHub
parent f4c7887e50
commit f5be889ea8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 26 additions and 3 deletions

View File

@ -21,7 +21,7 @@ class Folder_DocumentLoaders implements INode {
constructor() {
this.label = 'Folder with Files'
this.name = 'folderFiles'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 'folder.svg'
this.category = 'Document Loaders'
@ -46,6 +46,24 @@ class Folder_DocumentLoaders implements INode {
type: 'TextSplitter',
optional: true
},
{
label: 'Pdf Usage',
name: 'pdfUsage',
type: 'options',
options: [
{
label: 'One document per page',
name: 'perPage'
},
{
label: 'One document per file',
name: 'perFile'
}
],
default: 'perPage',
optional: true,
additionalParams: true
},
{
label: 'Metadata',
name: 'metadata',
@ -61,6 +79,7 @@ class Folder_DocumentLoaders implements INode {
const folderPath = nodeData.inputs?.folderPath as string
const metadata = nodeData.inputs?.metadata
const recursive = nodeData.inputs?.recursive as boolean
const pdfUsage = nodeData.inputs?.pdfUsage
const loader = new DirectoryLoader(
folderPath,
@ -69,8 +88,12 @@ class Folder_DocumentLoaders implements INode {
'.txt': (path) => new TextLoader(path),
'.csv': (path) => new CSVLoader(path),
'.docx': (path) => new DocxLoader(path),
// @ts-ignore
'.pdf': (path) => new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }),
'.pdf': (path) =>
pdfUsage === 'perFile'
? // @ts-ignore
new PDFLoader(path, { splitPages: false, pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
: // @ts-ignore
new PDFLoader(path, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }),
'.aspx': (path) => new TextLoader(path),
'.asp': (path) => new TextLoader(path),
'.cpp': (path) => new TextLoader(path), // C++