diff --git a/packages/components/nodes/documentloaders/S3File/S3File.ts b/packages/components/nodes/documentloaders/S3File/S3File.ts index 58ffd8af7..eadb4d992 100644 --- a/packages/components/nodes/documentloaders/S3File/S3File.ts +++ b/packages/components/nodes/documentloaders/S3File/S3File.ts @@ -30,7 +30,7 @@ class S3_DocumentLoaders implements INode { constructor() { this.label = 'S3' this.name = 'S3' - this.version = 1.0 + this.version = 2.0 this.type = 'Document' this.icon = 's3.svg' this.category = 'Document Loaders' @@ -113,12 +113,62 @@ class S3_DocumentLoaders implements INode { optional: true }, { - label: 'NarrativeText Only', - name: 'narrativeTextOnly', + label: 'Element Type', + name: 'elementType', description: - 'Only load documents with NarrativeText metadata from Unstructured. See how Unstructured partition data here', - default: true, - type: 'boolean', + 'Unstructured partition document into different types, select the types to return. If not selected, all types will be returned', + type: 'multiOptions', + options: [ + { + label: 'FigureCaption', + name: 'FigureCaption' + }, + { + label: 'NarrativeText', + name: 'NarrativeText' + }, + { + label: 'ListItem', + name: 'ListItem' + }, + { + label: 'Title', + name: 'Title' + }, + { + label: 'Address', + name: 'Address' + }, + { + label: 'Table', + name: 'Table' + }, + { + label: 'PageBreak', + name: 'PageBreak' + }, + { + label: 'Header', + name: 'Header' + }, + { + label: 'Footer', + name: 'Footer' + }, + { + label: 'UncategorizedText', + name: 'UncategorizedText' + }, + { + label: 'Image', + name: 'Image' + }, + { + label: 'Formula', + name: 'Formula' + } + ], + default: [], optional: true, additionalParams: true }, @@ -138,7 +188,7 @@ class S3_DocumentLoaders implements INode { const unstructuredAPIUrl = nodeData.inputs?.unstructuredAPIUrl as string const unstructuredAPIKey = nodeData.inputs?.unstructuredAPIKey as string const metadata = nodeData.inputs?.metadata - const narrativeTextOnly = nodeData.inputs?.narrativeTextOnly as boolean + const elementType = nodeData.inputs?.elementType as string const credentialData = await getCredentialData(nodeData.credential ?? '', options) const accessKeyId = getCredentialParam('awsKey', credentialData, nodeData) @@ -169,6 +219,15 @@ class S3_DocumentLoaders implements INode { } } + let elementTypes: string[] = [] + if (elementType) { + try { + elementTypes = JSON.parse(elementType) + } catch (e) { + elementTypes = [] + } + } + loader.load = async () => { const tempDir = fsDefault.mkdtempSync(path.join(os.tmpdir(), 's3fileloader-')) @@ -235,10 +294,10 @@ class S3_DocumentLoaders implements INode { } } }) - return narrativeTextOnly ? finaldocs.filter((doc) => doc.metadata.category === 'NarrativeText') : finaldocs + return elementTypes.length ? finaldocs.filter((doc) => elementTypes.includes(doc.metadata.category)) : finaldocs } - return narrativeTextOnly ? docs.filter((doc) => doc.metadata.category === 'NarrativeText') : docs + return elementTypes.length ? docs.filter((doc) => elementTypes.includes(doc.metadata.category)) : docs } } module.exports = { nodeClass: S3_DocumentLoaders }