Chore/Remove Deprecated File Path Unstructured (#5478)
* Refactor UnstructuredFile and UnstructuredFolder loaders to remove deprecated file path handling and enhance folder path validation. Ensure folder paths are sanitized and validated against path traversal attacks. * Update UnstructuredFolder.ts
This commit is contained in:
parent
ceb0512e2f
commit
4a642f02d0
|
|
@ -4,15 +4,11 @@ import {
|
|||
UnstructuredLoaderOptions,
|
||||
UnstructuredLoaderStrategy,
|
||||
SkipInferTableTypes,
|
||||
HiResModelName,
|
||||
UnstructuredLoader as LCUnstructuredLoader
|
||||
HiResModelName
|
||||
} from '@langchain/community/document_loaders/fs/unstructured'
|
||||
import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
|
||||
import { getFileFromStorage, INodeOutputsValue } from '../../../src'
|
||||
import { UnstructuredLoader } from './Unstructured'
|
||||
import { isPathTraversal, isUnsafeFilePath } from '../../../src/validator'
|
||||
import sanitize from 'sanitize-filename'
|
||||
import path from 'path'
|
||||
|
||||
class UnstructuredFile_DocumentLoaders implements INode {
|
||||
label: string
|
||||
|
|
@ -44,17 +40,6 @@ class UnstructuredFile_DocumentLoaders implements INode {
|
|||
optional: true
|
||||
}
|
||||
this.inputs = [
|
||||
/** Deprecated
|
||||
{
|
||||
label: 'File Path',
|
||||
name: 'filePath',
|
||||
type: 'string',
|
||||
placeholder: '',
|
||||
optional: true,
|
||||
warning:
|
||||
'Use the File Upload instead of File path. If file is uploaded, this path is ignored. Path will be deprecated in future releases.'
|
||||
},
|
||||
*/
|
||||
{
|
||||
label: 'Files Upload',
|
||||
name: 'fileObject',
|
||||
|
|
@ -455,7 +440,6 @@ class UnstructuredFile_DocumentLoaders implements INode {
|
|||
}
|
||||
|
||||
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||
const filePath = nodeData.inputs?.filePath as string
|
||||
const unstructuredAPIUrl = nodeData.inputs?.unstructuredAPIUrl as string
|
||||
const strategy = nodeData.inputs?.strategy as UnstructuredLoaderStrategy
|
||||
const encoding = nodeData.inputs?.encoding as string
|
||||
|
|
@ -560,37 +544,8 @@ class UnstructuredFile_DocumentLoaders implements INode {
|
|||
docs.push(...loaderDocs)
|
||||
}
|
||||
}
|
||||
} else if (filePath) {
|
||||
if (!filePath || typeof filePath !== 'string') {
|
||||
throw new Error('Invalid file path format')
|
||||
}
|
||||
|
||||
if (isPathTraversal(filePath) || isUnsafeFilePath(filePath)) {
|
||||
throw new Error('Invalid path characters detected in filePath - path traversal not allowed')
|
||||
}
|
||||
|
||||
const parsedPath = path.parse(filePath)
|
||||
const sanitizedFilename = sanitize(parsedPath.base)
|
||||
|
||||
if (!sanitizedFilename || sanitizedFilename.trim() === '') {
|
||||
throw new Error('Invalid filename after sanitization')
|
||||
}
|
||||
|
||||
const sanitizedFilePath = path.join(parsedPath.dir, sanitizedFilename)
|
||||
|
||||
if (!path.isAbsolute(sanitizedFilePath)) {
|
||||
throw new Error('File path must be absolute')
|
||||
}
|
||||
|
||||
if (sanitizedFilePath.includes('..')) {
|
||||
throw new Error('Invalid file path - directory traversal not allowed')
|
||||
}
|
||||
|
||||
const loader = new LCUnstructuredLoader(sanitizedFilePath, obj)
|
||||
const loaderDocs = await loader.load()
|
||||
docs.push(...loaderDocs)
|
||||
} else {
|
||||
throw new Error('File path or File upload is required')
|
||||
throw new Error('File upload is required')
|
||||
}
|
||||
|
||||
if (metadata) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue