diff --git a/packages/components/nodes/documentloaders/Json/Json.ts b/packages/components/nodes/documentloaders/Json/Json.ts index f94138a4c..694fc26ab 100644 --- a/packages/components/nodes/documentloaders/Json/Json.ts +++ b/packages/components/nodes/documentloaders/Json/Json.ts @@ -47,7 +47,7 @@ class Json_DocumentLoaders implements INode { constructor() { this.label = 'Json File' this.name = 'jsonFile' - this.version = 3.0 + this.version = 3.1 this.type = 'Document' this.icon = 'json.svg' this.category = 'Document Loaders' @@ -66,6 +66,14 @@ class Json_DocumentLoaders implements INode { type: 'TextSplitter', optional: true }, + { + label: 'Separate by JSON Object', + name: 'separateByObject', + type: 'boolean', + description: 'If enabled and the JSON file contains an array, each object in the array will become a chunk', + optional: true, + additionalParams: true + }, { label: 'Pointers Extraction (separated by commas)', name: 'pointersName', @@ -73,7 +81,10 @@ class Json_DocumentLoaders implements INode { description: 'Ex: { "key": "value" }, Pointer Extraction = "key", "value" will be extracted as pageContent of the chunk. Use comma to separate multiple pointers', placeholder: 'key1, key2', - optional: true + optional: true, + hide: { + separateByObject: true + } }, { label: 'Additional Metadata', @@ -122,6 +133,7 @@ class Json_DocumentLoaders implements INode { const pointersName = nodeData.inputs?.pointersName as string const metadata = nodeData.inputs?.metadata const _omitMetadataKeys = nodeData.inputs?.omitMetadataKeys as string + const separateByObject = nodeData.inputs?.separateByObject as boolean const output = nodeData.outputs?.output as string let omitMetadataKeys: string[] = [] @@ -153,7 +165,7 @@ class Json_DocumentLoaders implements INode { if (!file) continue const fileData = await getFileFromStorage(file, orgId, chatflowid) const blob = new Blob([fileData]) - const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined, metadata) + const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined, metadata, separateByObject) if (textSplitter) { let splittedDocs = await loader.load() @@ -176,7 +188,7 @@ class Json_DocumentLoaders implements INode { splitDataURI.pop() const bf = Buffer.from(splitDataURI.pop() || '', 'base64') const blob = new Blob([bf]) - const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined, metadata) + const loader = new JSONLoader(blob, pointers.length != 0 ? pointers : undefined, metadata, separateByObject) if (textSplitter) { let splittedDocs = await loader.load() @@ -306,13 +318,20 @@ class TextLoader extends BaseDocumentLoader { class JSONLoader extends TextLoader { public pointers: string[] private metadataMapping: Record + private separateByObject: boolean - constructor(filePathOrBlob: string | Blob, pointers: string | string[] = [], metadataMapping: Record = {}) { + constructor( + filePathOrBlob: string | Blob, + pointers: string | string[] = [], + metadataMapping: Record = {}, + separateByObject: boolean = false + ) { super(filePathOrBlob) this.pointers = Array.isArray(pointers) ? pointers : [pointers] if (metadataMapping) { this.metadataMapping = typeof metadataMapping === 'object' ? metadataMapping : JSON.parse(metadataMapping) } + this.separateByObject = separateByObject } protected async parse(raw: string): Promise { @@ -323,14 +342,22 @@ class JSONLoader extends TextLoader { const jsonArray = Array.isArray(json) ? json : [json] for (const item of jsonArray) { - const content = this.extractContent(item) - const metadata = this.extractMetadata(item) - - for (const pageContent of content) { + if (this.separateByObject) { + const metadata = this.extractMetadata(item) + const pageContent = this.formatObjectAsKeyValue(item) documents.push({ pageContent, metadata }) + } else { + const content = this.extractContent(item) + const metadata = this.extractMetadata(item) + for (const pageContent of content) { + documents.push({ + pageContent, + metadata + }) + } } } @@ -370,6 +397,30 @@ class JSONLoader extends TextLoader { return metadata } + /** + * Formats a JSON object as readable key-value pairs + */ + private formatObjectAsKeyValue(obj: any, prefix: string = ''): string { + const lines: string[] = [] + + for (const [key, value] of Object.entries(obj)) { + const fullKey = prefix ? `${prefix}.${key}` : key + + if (value === null || value === undefined) { + lines.push(`${fullKey}: ${value}`) + } else if (Array.isArray(value)) { + lines.push(`${fullKey}: ${JSON.stringify(value)}`) + } else if (typeof value === 'object') { + // Recursively format nested objects + lines.push(this.formatObjectAsKeyValue(value, fullKey)) + } else { + lines.push(`${fullKey}: ${value}`) + } + } + + return lines.join('\n') + } + /** * If JSON pointers are specified, return all strings below any of them * and exclude all other nodes expect if they match a JSON pointer.