Chore/Remove Deprecated File Path Unstructured (#5478)
* Refactor UnstructuredFile and UnstructuredFolder loaders to remove deprecated file path handling and enhance folder path validation. Ensure folder paths are sanitized and validated against path traversal attacks. * Update UnstructuredFolder.ts
This commit is contained in:
parent
ceb0512e2f
commit
4a642f02d0
|
|
@ -4,15 +4,11 @@ import {
|
||||||
UnstructuredLoaderOptions,
|
UnstructuredLoaderOptions,
|
||||||
UnstructuredLoaderStrategy,
|
UnstructuredLoaderStrategy,
|
||||||
SkipInferTableTypes,
|
SkipInferTableTypes,
|
||||||
HiResModelName,
|
HiResModelName
|
||||||
UnstructuredLoader as LCUnstructuredLoader
|
|
||||||
} from '@langchain/community/document_loaders/fs/unstructured'
|
} from '@langchain/community/document_loaders/fs/unstructured'
|
||||||
import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
|
import { getCredentialData, getCredentialParam, handleEscapeCharacters } from '../../../src/utils'
|
||||||
import { getFileFromStorage, INodeOutputsValue } from '../../../src'
|
import { getFileFromStorage, INodeOutputsValue } from '../../../src'
|
||||||
import { UnstructuredLoader } from './Unstructured'
|
import { UnstructuredLoader } from './Unstructured'
|
||||||
import { isPathTraversal, isUnsafeFilePath } from '../../../src/validator'
|
|
||||||
import sanitize from 'sanitize-filename'
|
|
||||||
import path from 'path'
|
|
||||||
|
|
||||||
class UnstructuredFile_DocumentLoaders implements INode {
|
class UnstructuredFile_DocumentLoaders implements INode {
|
||||||
label: string
|
label: string
|
||||||
|
|
@ -44,17 +40,6 @@ class UnstructuredFile_DocumentLoaders implements INode {
|
||||||
optional: true
|
optional: true
|
||||||
}
|
}
|
||||||
this.inputs = [
|
this.inputs = [
|
||||||
/** Deprecated
|
|
||||||
{
|
|
||||||
label: 'File Path',
|
|
||||||
name: 'filePath',
|
|
||||||
type: 'string',
|
|
||||||
placeholder: '',
|
|
||||||
optional: true,
|
|
||||||
warning:
|
|
||||||
'Use the File Upload instead of File path. If file is uploaded, this path is ignored. Path will be deprecated in future releases.'
|
|
||||||
},
|
|
||||||
*/
|
|
||||||
{
|
{
|
||||||
label: 'Files Upload',
|
label: 'Files Upload',
|
||||||
name: 'fileObject',
|
name: 'fileObject',
|
||||||
|
|
@ -455,7 +440,6 @@ class UnstructuredFile_DocumentLoaders implements INode {
|
||||||
}
|
}
|
||||||
|
|
||||||
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
async init(nodeData: INodeData, _: string, options: ICommonObject): Promise<any> {
|
||||||
const filePath = nodeData.inputs?.filePath as string
|
|
||||||
const unstructuredAPIUrl = nodeData.inputs?.unstructuredAPIUrl as string
|
const unstructuredAPIUrl = nodeData.inputs?.unstructuredAPIUrl as string
|
||||||
const strategy = nodeData.inputs?.strategy as UnstructuredLoaderStrategy
|
const strategy = nodeData.inputs?.strategy as UnstructuredLoaderStrategy
|
||||||
const encoding = nodeData.inputs?.encoding as string
|
const encoding = nodeData.inputs?.encoding as string
|
||||||
|
|
@ -560,37 +544,8 @@ class UnstructuredFile_DocumentLoaders implements INode {
|
||||||
docs.push(...loaderDocs)
|
docs.push(...loaderDocs)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if (filePath) {
|
|
||||||
if (!filePath || typeof filePath !== 'string') {
|
|
||||||
throw new Error('Invalid file path format')
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isPathTraversal(filePath) || isUnsafeFilePath(filePath)) {
|
|
||||||
throw new Error('Invalid path characters detected in filePath - path traversal not allowed')
|
|
||||||
}
|
|
||||||
|
|
||||||
const parsedPath = path.parse(filePath)
|
|
||||||
const sanitizedFilename = sanitize(parsedPath.base)
|
|
||||||
|
|
||||||
if (!sanitizedFilename || sanitizedFilename.trim() === '') {
|
|
||||||
throw new Error('Invalid filename after sanitization')
|
|
||||||
}
|
|
||||||
|
|
||||||
const sanitizedFilePath = path.join(parsedPath.dir, sanitizedFilename)
|
|
||||||
|
|
||||||
if (!path.isAbsolute(sanitizedFilePath)) {
|
|
||||||
throw new Error('File path must be absolute')
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sanitizedFilePath.includes('..')) {
|
|
||||||
throw new Error('Invalid file path - directory traversal not allowed')
|
|
||||||
}
|
|
||||||
|
|
||||||
const loader = new LCUnstructuredLoader(sanitizedFilePath, obj)
|
|
||||||
const loaderDocs = await loader.load()
|
|
||||||
docs.push(...loaderDocs)
|
|
||||||
} else {
|
} else {
|
||||||
throw new Error('File path or File upload is required')
|
throw new Error('File upload is required')
|
||||||
}
|
}
|
||||||
|
|
||||||
if (metadata) {
|
if (metadata) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue