pdf loader add legacy option
This commit is contained in:
parent
92cd760b16
commit
8f4b5ba3f6
|
|
@ -49,6 +49,13 @@ class Pdf_DocumentLoaders implements INode {
|
|||
],
|
||||
default: 'perPage'
|
||||
},
|
||||
{
|
||||
label: 'Use Legacy Build',
|
||||
name: 'legacyBuild',
|
||||
type: 'boolean',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
label: 'Metadata',
|
||||
name: 'metadata',
|
||||
|
|
@ -64,6 +71,7 @@ class Pdf_DocumentLoaders implements INode {
|
|||
const pdfFileBase64 = nodeData.inputs?.pdfFile as string
|
||||
const usage = nodeData.inputs?.usage as string
|
||||
const metadata = nodeData.inputs?.metadata
|
||||
const legacyBuild = nodeData.inputs?.legacyBuild as boolean
|
||||
|
||||
let alldocs = []
|
||||
let files: string[] = []
|
||||
|
|
@ -82,7 +90,8 @@ class Pdf_DocumentLoaders implements INode {
|
|||
const loader = new PDFLoader(new Blob([bf]), {
|
||||
splitPages: false,
|
||||
// @ts-ignore
|
||||
pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
|
||||
pdfjs: () =>
|
||||
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
|
||||
})
|
||||
if (textSplitter) {
|
||||
const docs = await loader.loadAndSplit(textSplitter)
|
||||
|
|
@ -93,7 +102,10 @@ class Pdf_DocumentLoaders implements INode {
|
|||
}
|
||||
} else {
|
||||
// @ts-ignore
|
||||
const loader = new PDFLoader(new Blob([bf]), { pdfjs: () => import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js') })
|
||||
const loader = new PDFLoader(new Blob([bf]), {
|
||||
pdfjs: () =>
|
||||
legacyBuild ? import('pdfjs-dist/legacy/build/pdf.js') : import('pdf-parse/lib/pdf.js/v1.10.100/build/pdf.js')
|
||||
})
|
||||
if (textSplitter) {
|
||||
const docs = await loader.loadAndSplit(textSplitter)
|
||||
alldocs.push(...docs)
|
||||
|
|
|
|||
|
|
@ -32,15 +32,16 @@
|
|||
"faiss-node": "^0.2.1",
|
||||
"form-data": "^4.0.0",
|
||||
"graphql": "^16.6.0",
|
||||
"html-to-text": "^9.0.5",
|
||||
"langchain": "^0.0.94",
|
||||
"linkifyjs": "^4.1.1",
|
||||
"mammoth": "^1.5.1",
|
||||
"moment": "^2.29.3",
|
||||
"node-fetch": "^2.6.11",
|
||||
"pdf-parse": "^1.1.1",
|
||||
"pdfjs-dist": "^3.7.107",
|
||||
"weaviate-ts-client": "^1.1.0",
|
||||
"ws": "^8.9.0",
|
||||
"html-to-text": "^9.0.5"
|
||||
"ws": "^8.9.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/gulp": "4.0.9",
|
||||
|
|
|
|||
Loading…
Reference in New Issue