use pdf-parser

This commit is contained in:
Henry 2023-04-13 19:56:48 +01:00
parent 02d8284f58
commit 930449f956
2 changed files with 5 additions and 3 deletions

View File

@ -66,7 +66,8 @@ class Pdf_DocumentLoaders implements INode {
const blob = new Blob([bf]) const blob = new Blob([bf])
if (usage === 'perFile') { if (usage === 'perFile') {
const loader = new PDFLoader(blob, { splitPages: false }) // @ts-ignore
const loader = new PDFLoader(blob, { splitPages: false, pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
if (textSplitter) { if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter) const docs = await loader.loadAndSplit(textSplitter)
return docs return docs
@ -75,7 +76,8 @@ class Pdf_DocumentLoaders implements INode {
return docs return docs
} }
} else { } else {
const loader = new PDFLoader(blob) // @ts-ignore
const loader = new PDFLoader(blob, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
if (textSplitter) { if (textSplitter) {
const docs = await loader.loadAndSplit(textSplitter) const docs = await loader.loadAndSplit(textSplitter)
return docs return docs

View File

@ -27,7 +27,7 @@
"langchain": "^0.0.44", "langchain": "^0.0.44",
"moment": "^2.29.3", "moment": "^2.29.3",
"node-fetch": "2", "node-fetch": "2",
"pdfjs-dist": "^3.5.141", "pdf-parse": "^1.1.1",
"ws": "^8.9.0" "ws": "^8.9.0"
}, },
"devDependencies": { "devDependencies": {