From 4a642f02d0eec2d945d15c6d2a733fb069ad9d9c Mon Sep 17 00:00:00 2001 From: Henry Heng Date: Sat, 15 Nov 2025 11:16:42 +0000 Subject: [PATCH] Chore/Remove Deprecated File Path Unstructured (#5478) * Refactor UnstructuredFile and UnstructuredFolder loaders to remove deprecated file path handling and enhance folder path validation. Ensure folder paths are sanitized and validated against path traversal attacks. * Update UnstructuredFolder.ts --- .../Unstructured/UnstructuredFile.ts | 49 +------------------ 1 file changed, 2 insertions(+), 47 deletions(-) diff --git a/packages/components/nodes/documentloaders/Unstructured/UnstructuredFile.ts b/packages/components/nodes/documentloaders/Unstructured/UnstructuredFile.ts index e1842e27c..d1a372b0c 100644 --- a/packages/components/nodes/documentloaders/Unstructured/UnstructuredFile.ts +++ b/packages/components/nodes/documentloaders/Unstructured/UnstructuredFile.ts @@ -4,15 +4,11 @@ import { UnstructuredLoaderOptions, UnstructuredLoaderStrategy, SkipInferTableTypes, - HiResModelName, - UnstructuredLoader as LCUnstructuredLoader + HiResModelName } from '@langchain/community/document_loaders/fs/unstructured' import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils' import { getFileFromStorage, INodeOutputsValue } from '../../../src' import { UnstructuredLoader } from './Unstructured' -import { isPathTraversal, isUnsafeFilePath } from '../../../src/validator' -import sanitize from 'sanitize-filename' -import path from 'path' class UnstructuredFile_DocumentLoaders implements INode { label: string @@ -44,17 +40,6 @@ class UnstructuredFile_DocumentLoaders implements INode { optional: true } this.inputs = [ - /** Deprecated - { - label: 'File Path', - name: 'filePath', - type: 'string', - placeholder: '', - optional: true, - warning: - 'Use the File Upload instead of File path. If file is uploaded, this path is ignored. Path will be deprecated in future releases.' - }, - */ { label: 'Files Upload', name: 'fileObject', @@ -455,7 +440,6 @@ class UnstructuredFile_DocumentLoaders implements INode { } async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { - const filePath = nodeData.inputs?.filePath as string const unstructuredAPIUrl = nodeData.inputs?.unstructuredAPIUrl as string const strategy = nodeData.inputs?.strategy as UnstructuredLoaderStrategy const encoding = nodeData.inputs?.encoding as string @@ -560,37 +544,8 @@ class UnstructuredFile_DocumentLoaders implements INode { docs.push(...loaderDocs) } } - } else if (filePath) { - if (!filePath || typeof filePath !== 'string') { - throw new Error('Invalid file path format') - } - - if (isPathTraversal(filePath) || isUnsafeFilePath(filePath)) { - throw new Error('Invalid path characters detected in filePath - path traversal not allowed') - } - - const parsedPath = path.parse(filePath) - const sanitizedFilename = sanitize(parsedPath.base) - - if (!sanitizedFilename || sanitizedFilename.trim() === '') { - throw new Error('Invalid filename after sanitization') - } - - const sanitizedFilePath = path.join(parsedPath.dir, sanitizedFilename) - - if (!path.isAbsolute(sanitizedFilePath)) { - throw new Error('File path must be absolute') - } - - if (sanitizedFilePath.includes('..')) { - throw new Error('Invalid file path - directory traversal not allowed') - } - - const loader = new LCUnstructuredLoader(sanitizedFilePath, obj) - const loaderDocs = await loader.load() - docs.push(...loaderDocs) } else { - throw new Error('File path or File upload is required') + throw new Error('File upload is required') } if (metadata) {