diff --git a/packages/components/credentials/UnstructuredApi.credential.ts b/packages/components/credentials/UnstructuredApi.credential.ts new file mode 100644 index 000000000..5c77895a1 --- /dev/null +++ b/packages/components/credentials/UnstructuredApi.credential.ts @@ -0,0 +1,26 @@ +import { INodeParams, INodeCredential } from '../src/Interface' + +class UnstructuredApi implements INodeCredential { + label: string + name: string + version: number + description: string + inputs: INodeParams[] + + constructor() { + this.label = 'Unstructured API' + this.name = 'unstructuredApi' + this.version = 1.0 + this.description = + 'Refer to official guide on how to get api key on Unstructured' + this.inputs = [ + { + label: 'API Key', + name: 'unstructuredAPIKey', + type: 'password' + } + ] + } +} + +module.exports = { credClass: UnstructuredApi } diff --git a/packages/components/nodes/documentloaders/Unstructured/UnstructuredFile.ts b/packages/components/nodes/documentloaders/Unstructured/UnstructuredFile.ts new file mode 100644 index 000000000..820aaab55 --- /dev/null +++ b/packages/components/nodes/documentloaders/Unstructured/UnstructuredFile.ts @@ -0,0 +1,93 @@ +import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface' +import { UnstructuredLoader, UnstructuredLoaderOptions } from 'langchain/document_loaders/fs/unstructured' +import { getCredentialData, getCredentialParam } from '../../../src/utils' + +class UnstructuredFile_DocumentLoaders implements INode { + label: string + name: string + version: number + description: string + type: string + icon: string + category: string + baseClasses: string[] + credential: INodeParams + inputs: INodeParams[] + + constructor() { + this.label = 'Unstructured File Loader' + this.name = 'unstructuredFileLoader' + this.version = 1.0 + this.type = 'Document' + this.icon = 'unstructured.png' + this.category = 'Document Loaders' + this.description = 'Use Unstructured.io to load data from a file path' + this.baseClasses = [this.type] + this.credential = { + label: 'Connect Credential', + name: 'credential', + type: 'credential', + credentialNames: ['unstructuredApi'], + optional: true + } + this.inputs = [ + { + label: 'File Path', + name: 'filePath', + type: 'string', + placeholder: '' + }, + { + label: 'Unstructured API URL', + name: 'unstructuredAPIUrl', + description: + 'Unstructured API URL. Read more on how to get started', + type: 'string', + default: 'http://localhost:8000/general/v0/general' + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true + } + /*TODO Add Filter Options*/ + ] + } + + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { + const filePath = nodeData.inputs?.filePath as string + const unstructuredAPIUrl = nodeData.inputs?.unstructuredAPIUrl as string + const metadata = nodeData.inputs?.metadata + + const obj: UnstructuredLoaderOptions = { apiUrl: unstructuredAPIUrl } + + const credentialData = await getCredentialData(nodeData.credential ?? '', options) + const unstructuredAPIKey = getCredentialParam('unstructuredAPIKey', credentialData, nodeData) + if (unstructuredAPIKey) obj.apiKey = unstructuredAPIKey + + const loader = new UnstructuredLoader(filePath, obj) + const docs = await loader.load() + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs = [] + for (const doc of docs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + + return docs + } +} + +module.exports = { nodeClass: UnstructuredFile_DocumentLoaders } diff --git a/packages/components/nodes/documentloaders/Unstructured/UnstructuredFolder.ts b/packages/components/nodes/documentloaders/Unstructured/UnstructuredFolder.ts new file mode 100644 index 000000000..4a52fb5a6 --- /dev/null +++ b/packages/components/nodes/documentloaders/Unstructured/UnstructuredFolder.ts @@ -0,0 +1,93 @@ +import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface' +import { UnstructuredDirectoryLoader, UnstructuredLoaderOptions } from 'langchain/document_loaders/fs/unstructured' +import { getCredentialData, getCredentialParam } from '../../../src/utils' + +class UnstructuredFolder_DocumentLoaders implements INode { + label: string + name: string + version: number + description: string + type: string + icon: string + category: string + baseClasses: string[] + credential: INodeParams + inputs: INodeParams[] + + constructor() { + this.label = 'Unstructured Folder Loader' + this.name = 'unstructuredFolderLoader' + this.version = 1.0 + this.type = 'Document' + this.icon = 'unstructured.png' + this.category = 'Document Loaders' + this.description = 'Use Unstructured.io to load data from a folder' + this.baseClasses = [this.type] + this.credential = { + label: 'Connect Credential', + name: 'credential', + type: 'credential', + credentialNames: ['unstructuredApi'], + optional: true + } + this.inputs = [ + { + label: 'Folder Path', + name: 'folderPath', + type: 'string', + placeholder: '' + }, + { + label: 'Unstructured API URL', + name: 'unstructuredAPIUrl', + description: + 'Unstructured API URL. Read more on how to get started', + type: 'string', + default: 'http://localhost:8000/general/v0/general' + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true + } + /*TODO Add Filter Options*/ + ] + } + + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { + const folderPath = nodeData.inputs?.folderPath as string + const unstructuredAPIUrl = nodeData.inputs?.unstructuredAPIUrl as string + const metadata = nodeData.inputs?.metadata + + const obj: UnstructuredLoaderOptions = { apiUrl: unstructuredAPIUrl } + + const credentialData = await getCredentialData(nodeData.credential ?? '', options) + const unstructuredAPIKey = getCredentialParam('unstructuredAPIKey', credentialData, nodeData) + if (unstructuredAPIKey) obj.apiKey = unstructuredAPIKey + + const loader = new UnstructuredDirectoryLoader(folderPath, obj) + const docs = await loader.load() + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs = [] + for (const doc of docs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + + return docs + } +} + +module.exports = { nodeClass: UnstructuredFolder_DocumentLoaders } diff --git a/packages/components/nodes/documentloaders/Unstructured/unstructured.png b/packages/components/nodes/documentloaders/Unstructured/unstructured.png new file mode 100644 index 000000000..435219bf7 Binary files /dev/null and b/packages/components/nodes/documentloaders/Unstructured/unstructured.png differ