pdf loader add legacy option

This commit is contained in:
Henry 2023-06-14 12:34:25 +01:00
parent 92cd760b16
commit 8f4b5ba3f6
2 changed files with 17 additions and 4 deletions

View File

@ -49,6 +49,13 @@ class Pdf_DocumentLoaders implements INode {
], ],
default: 'perPage' default: 'perPage'
}, },
{
label: 'Use Legacy Build',
name: 'legacyBuild',
type: 'boolean',
optional: true,
additionalParams: true
},
{ {
label: 'Metadata', label: 'Metadata',
name: 'metadata', name: 'metadata',
@ -64,6 +71,7 @@ class Pdf_DocumentLoaders implements INode {
const pdfFileBase64 = nodeData.inputs?.pdfFile as string const pdfFileBase64 = nodeData.inputs?.pdfFile as string
const usage = nodeData.inputs?.usage as string const usage = nodeData.inputs?.usage as string
const metadata = nodeData.inputs?.metadata const metadata = nodeData.inputs?.metadata
const legacyBuild = nodeData.inputs?.legacyBuild as boolean
let alldocs = [] let alldocs = []
let files: string[] = [] let files: string[] = []
@ -82,7 +90,8 @@ class Pdf_DocumentLoaders implements INode {
const loader = new PDFLoader(new Blob([bf]), { const loader = new PDFLoader(new Blob([bf]), {
splitPages: false, splitPages: false,
// @ts-ignore // @ts-ignore
pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') pdfjs: () =>
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
}) })
if (textSplitter) { if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter) const docs = await loader.loadAndSplit(textSplitter)
@ -93,7 +102,10 @@ class Pdf_DocumentLoaders implements INode {
} }
} else { } else {
// @ts-ignore // @ts-ignore
const loader = new PDFLoader(new Blob([bf]), { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }) const loader = new PDFLoader(new Blob([bf]), {
pdfjs: () =>
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
})
if (textSplitter) { if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter) const docs = await loader.loadAndSplit(textSplitter)
alldocs.push(...docs) alldocs.push(...docs)

View File

@ -32,15 +32,16 @@
"faiss-node": "^0.2.1", "faiss-node": "^0.2.1",
"form-data": "^4.0.0", "form-data": "^4.0.0",
"graphql": "^16.6.0", "graphql": "^16.6.0",
"html-to-text": "^9.0.5",
"langchain": "^0.0.94", "langchain": "^0.0.94",
"linkifyjs": "^4.1.1", "linkifyjs": "^4.1.1",
"mammoth": "^1.5.1", "mammoth": "^1.5.1",
"moment": "^2.29.3", "moment": "^2.29.3",
"node-fetch": "^2.6.11", "node-fetch": "^2.6.11",
"pdf-parse": "^1.1.1", "pdf-parse": "^1.1.1",
"pdfjs-dist": "^3.7.107",
"weaviate-ts-client": "^1.1.0", "weaviate-ts-client": "^1.1.0",
"ws": "^8.9.0", "ws": "^8.9.0"
"html-to-text": "^9.0.5"
}, },
"devDependencies": { "devDependencies": {
"@types/gulp": "4.0.9", "@types/gulp": "4.0.9",