From 930449f956f24adc6c6ce24d5a739ab7d60221fd Mon Sep 17 00:00:00 2001 From: Henry Date: Thu, 13 Apr 2023 19:56:48 +0100 Subject: [PATCH] use pdf-parser --- packages/components/nodes/documentloaders/Pdf/Pdf.ts | 6 ++++-- packages/components/package.json | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/packages/components/nodes/documentloaders/Pdf/Pdf.ts b/packages/components/nodes/documentloaders/Pdf/Pdf.ts index 10e122268..936259cb3 100644 --- a/packages/components/nodes/documentloaders/Pdf/Pdf.ts +++ b/packages/components/nodes/documentloaders/Pdf/Pdf.ts @@ -66,7 +66,8 @@ class Pdf_DocumentLoaders implements INode { const blob = new Blob([bf]) if (usage === 'perFile') { - const loader = new PDFLoader(blob, { splitPages: false }) + // @ts-ignore + const loader = new PDFLoader(blob, { splitPages: false, pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }) if (textSplitter) { const docs = await loader.loadAndSplit(textSplitter) return docs @@ -75,7 +76,8 @@ class Pdf_DocumentLoaders implements INode { return docs } } else { - const loader = new PDFLoader(blob) + // @ts-ignore + const loader = new PDFLoader(blob, { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') }) if (textSplitter) { const docs = await loader.loadAndSplit(textSplitter) return docs diff --git a/packages/components/package.json b/packages/components/package.json index 0a211553d..a80c434e9 100644 --- a/packages/components/package.json +++ b/packages/components/package.json @@ -27,7 +27,7 @@ "langchain": "^0.0.44", "moment": "^2.29.3", "node-fetch": "2", - "pdfjs-dist": "^3.5.141", + "pdf-parse": "^1.1.1", "ws": "^8.9.0" }, "devDependencies": {