Merge pull request #1424 from FlowiseAI/feature/S3

Feature/update S3 loader
This commit is contained in:
Henry Heng 2023-12-22 02:09:46 +00:00 committed by GitHub
commit 177d1ae0e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 68 additions and 9 deletions

View File

@ -30,7 +30,7 @@ class S3_DocumentLoaders implements INode {
constructor() { constructor() {
this.label = 'S3' this.label = 'S3'
this.name = 'S3' this.name = 'S3'
this.version = 1.0 this.version = 2.0
this.type = 'Document' this.type = 'Document'
this.icon = 's3.svg' this.icon = 's3.svg'
this.category = 'Document Loaders' this.category = 'Document Loaders'
@ -113,12 +113,62 @@ class S3_DocumentLoaders implements INode {
optional: true optional: true
}, },
{ {
label: 'NarrativeText Only', label: 'Element Type',
name: 'narrativeTextOnly', name: 'elementType',
description: description:
'Only load documents with NarrativeText metadata from Unstructured. See how Unstructured partition data <a target="_blank" href="https://unstructured-io.github.io/unstructured/bricks/partition.html#">here</a>', 'Unstructured partition document into different types, select the types to return. If not selected, all types will be returned',
default: true, type: 'multiOptions',
type: 'boolean', options: [
{
label: 'FigureCaption',
name: 'FigureCaption'
},
{
label: 'NarrativeText',
name: 'NarrativeText'
},
{
label: 'ListItem',
name: 'ListItem'
},
{
label: 'Title',
name: 'Title'
},
{
label: 'Address',
name: 'Address'
},
{
label: 'Table',
name: 'Table'
},
{
label: 'PageBreak',
name: 'PageBreak'
},
{
label: 'Header',
name: 'Header'
},
{
label: 'Footer',
name: 'Footer'
},
{
label: 'UncategorizedText',
name: 'UncategorizedText'
},
{
label: 'Image',
name: 'Image'
},
{
label: 'Formula',
name: 'Formula'
}
],
default: [],
optional: true, optional: true,
additionalParams: true additionalParams: true
}, },
@ -138,7 +188,7 @@ class S3_DocumentLoaders implements INode {
const unstructuredAPIUrl = nodeData.inputs?.unstructuredAPIUrl as string const unstructuredAPIUrl = nodeData.inputs?.unstructuredAPIUrl as string
const unstructuredAPIKey = nodeData.inputs?.unstructuredAPIKey as string const unstructuredAPIKey = nodeData.inputs?.unstructuredAPIKey as string
const metadata = nodeData.inputs?.metadata const metadata = nodeData.inputs?.metadata
const narrativeTextOnly = nodeData.inputs?.narrativeTextOnly as boolean const elementType = nodeData.inputs?.elementType as string
const credentialData = await getCredentialData(nodeData.credential ?? '', options) const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const accessKeyId = getCredentialParam('awsKey', credentialData, nodeData) const accessKeyId = getCredentialParam('awsKey', credentialData, nodeData)
@ -169,6 +219,15 @@ class S3_DocumentLoaders implements INode {
} }
} }
let elementTypes: string[] = []
if (elementType) {
try {
elementTypes = JSON.parse(elementType)
} catch (e) {
elementTypes = []
}
}
loader.load = async () => { loader.load = async () => {
const tempDir = fsDefault.mkdtempSync(path.join(os.tmpdir(), 's3fileloader-')) const tempDir = fsDefault.mkdtempSync(path.join(os.tmpdir(), 's3fileloader-'))
@ -235,10 +294,10 @@ class S3_DocumentLoaders implements INode {
} }
} }
}) })
return narrativeTextOnly ? finaldocs.filter((doc) => doc.metadata.category === 'NarrativeText') : finaldocs return elementTypes.length ? finaldocs.filter((doc) => elementTypes.includes(doc.metadata.category)) : finaldocs
} }
return narrativeTextOnly ? docs.filter((doc) => doc.metadata.category === 'NarrativeText') : docs return elementTypes.length ? docs.filter((doc) => elementTypes.includes(doc.metadata.category)) : docs
} }
} }
module.exports = { nodeClass: S3_DocumentLoaders } module.exports = { nodeClass: S3_DocumentLoaders }