Merge pull request #1424 from FlowiseAI/feature/S3

Feature/update S3 loader
This commit is contained in:
Henry Heng 2023-12-22 02:09:46 +00:00 committed by GitHub
commit 177d1ae0e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 68 additions and 9 deletions

View File

@ -30,7 +30,7 @@ class S3_DocumentLoaders implements INode {
constructor() {
this.label = 'S3'
this.name = 'S3'
this.version = 1.0
this.version = 2.0
this.type = 'Document'
this.icon = 's3.svg'
this.category = 'Document Loaders'
@ -113,12 +113,62 @@ class S3_DocumentLoaders implements INode {
optional: true
},
{
label: 'NarrativeText Only',
name: 'narrativeTextOnly',
label: 'Element Type',
name: 'elementType',
description:
'Only load documents with NarrativeText metadata from Unstructured. See how Unstructured partition data <a target="_blank" href="https://unstructured-io.github.io/unstructured/bricks/partition.html#">here</a>',
default: true,
type: 'boolean',
'Unstructured partition document into different types, select the types to return. If not selected, all types will be returned',
type: 'multiOptions',
options: [
{
label: 'FigureCaption',
name: 'FigureCaption'
},
{
label: 'NarrativeText',
name: 'NarrativeText'
},
{
label: 'ListItem',
name: 'ListItem'
},
{
label: 'Title',
name: 'Title'
},
{
label: 'Address',
name: 'Address'
},
{
label: 'Table',
name: 'Table'
},
{
label: 'PageBreak',
name: 'PageBreak'
},
{
label: 'Header',
name: 'Header'
},
{
label: 'Footer',
name: 'Footer'
},
{
label: 'UncategorizedText',
name: 'UncategorizedText'
},
{
label: 'Image',
name: 'Image'
},
{
label: 'Formula',
name: 'Formula'
}
],
default: [],
optional: true,
additionalParams: true
},
@ -138,7 +188,7 @@ class S3_DocumentLoaders implements INode {
const unstructuredAPIUrl = nodeData.inputs?.unstructuredAPIUrl as string
const unstructuredAPIKey = nodeData.inputs?.unstructuredAPIKey as string
const metadata = nodeData.inputs?.metadata
const narrativeTextOnly = nodeData.inputs?.narrativeTextOnly as boolean
const elementType = nodeData.inputs?.elementType as string
const credentialData = await getCredentialData(nodeData.credential ?? '', options)
const accessKeyId = getCredentialParam('awsKey', credentialData, nodeData)
@ -169,6 +219,15 @@ class S3_DocumentLoaders implements INode {
}
}
let elementTypes: string[] = []
if (elementType) {
try {
elementTypes = JSON.parse(elementType)
} catch (e) {
elementTypes = []
}
}
loader.load = async () => {
const tempDir = fsDefault.mkdtempSync(path.join(os.tmpdir(), 's3fileloader-'))
@ -235,10 +294,10 @@ class S3_DocumentLoaders implements INode {
}
}
})
return narrativeTextOnly ? finaldocs.filter((doc) => doc.metadata.category === 'NarrativeText') : finaldocs
return elementTypes.length ? finaldocs.filter((doc) => elementTypes.includes(doc.metadata.category)) : finaldocs
}
return narrativeTextOnly ? docs.filter((doc) => doc.metadata.category === 'NarrativeText') : docs
return elementTypes.length ? docs.filter((doc) => elementTypes.includes(doc.metadata.category)) : docs
}
}
module.exports = { nodeClass: S3_DocumentLoaders }