diff --git a/packages/components/nodes/documentloaders/Figma/Figma.ts b/packages/components/nodes/documentloaders/Figma/Figma.ts new file mode 100644 index 000000000..388c4ee05 --- /dev/null +++ b/packages/components/nodes/documentloaders/Figma/Figma.ts @@ -0,0 +1,81 @@ +import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { FigmaFileLoader, FigmaLoaderParams } from 'langchain/document_loaders/web/figma' + +class Figma_DocumentLoaders implements INode { + label: string + name: string + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + + constructor() { + this.label = 'Figma' + this.name = 'figma' + this.type = 'Document' + this.icon = 'figma.png' + this.category = 'Document Loaders' + this.description = 'Load data from a Figma file' + this.baseClasses = [this.type] + this.inputs = [ + { + label: 'Access Token', + name: 'accessToken', + type: 'password', + placeholder: '' + }, + { + label: 'File Key', + name: 'fileKey', + type: 'string', + placeholder: 'key' + }, + { + label: 'Node IDs', + name: 'nodeIds', + type: 'string', + placeholder: '0, 1, 2' + }, + { + label: 'Recursive', + name: 'recursive', + type: 'boolean', + optional: true + }, + { + label: 'Text Splitter', + name: 'textSplitter', + type: 'TextSplitter', + optional: true + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true + } + ] + } + + async init(nodeData: INodeData): Promise { + const accessToken = nodeData.inputs?.accessToken as string + const nodeIds = (nodeData.inputs?.nodeIds as string)?.split(',') || [] + const fileKey = nodeData.inputs?.fileKey as string + + const options: FigmaLoaderParams = { + accessToken, + nodeIds, + fileKey + } + + const loader = new FigmaFileLoader(options) + const docs = await loader.load() + + return docs + } +} + +module.exports = { nodeClass: Figma_DocumentLoaders } diff --git a/packages/components/nodes/documentloaders/Figma/figma.png b/packages/components/nodes/documentloaders/Figma/figma.png new file mode 100644 index 000000000..72372ddff Binary files /dev/null and b/packages/components/nodes/documentloaders/Figma/figma.png differ diff --git a/packages/components/nodes/documentloaders/Subtitles/Subtitles.ts b/packages/components/nodes/documentloaders/Subtitles/Subtitles.ts new file mode 100644 index 000000000..0f60e151e --- /dev/null +++ b/packages/components/nodes/documentloaders/Subtitles/Subtitles.ts @@ -0,0 +1,95 @@ +import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { TextSplitter } from 'langchain/text_splitter' +import { SRTLoader } from 'langchain/document_loaders/fs/srt' + +class Subtitles_DocumentLoaders implements INode { + label: string + name: string + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + + constructor() { + this.label = 'Subtitles File' + this.name = 'subtitlesFile' + this.type = 'Document' + this.icon = 'subtitlesFile.svg' + this.category = 'Document Loaders' + this.description = `Load data from subtitles files` + this.baseClasses = [this.type] + this.inputs = [ + { + label: 'Subtitles File', + name: 'subtitlesFile', + type: 'file', + fileType: '.srt' + }, + { + label: 'Text Splitter', + name: 'textSplitter', + type: 'TextSplitter', + optional: true + }, + { + label: 'Metadata', + name: 'metadata', + type: 'json', + optional: true, + additionalParams: true + } + ] + } + + async init(nodeData: INodeData): Promise { + const textSplitter = nodeData.inputs?.textSplitter as TextSplitter + const subtitlesFileBase64 = nodeData.inputs?.subtitlesFile as string + const metadata = nodeData.inputs?.metadata + + let alldocs = [] + let files: string[] = [] + + if (subtitlesFileBase64.startsWith('[') && subtitlesFileBase64.endsWith(']')) { + files = JSON.parse(subtitlesFileBase64) + } else { + files = [subtitlesFileBase64] + } + + for (const file of files) { + const splitDataURI = file.split(',') + splitDataURI.pop() + const bf = Buffer.from(splitDataURI.pop() || '', 'base64') + const blob = new Blob([bf]) + const loader = new SRTLoader(blob) + + if (textSplitter) { + const docs = await loader.loadAndSplit(textSplitter) + alldocs.push(...docs) + } else { + const docs = await loader.load() + alldocs.push(...docs) + } + } + + if (metadata) { + const parsedMetadata = typeof metadata === 'object' ? metadata : JSON.parse(metadata) + let finaldocs = [] + for (const doc of alldocs) { + const newdoc = { + ...doc, + metadata: { + ...doc.metadata, + ...parsedMetadata + } + } + finaldocs.push(newdoc) + } + return finaldocs + } + return alldocs + } +} + +module.exports = { nodeClass: Subtitles_DocumentLoaders } diff --git a/packages/components/nodes/documentloaders/Subtitles/subtitlesFile.svg b/packages/components/nodes/documentloaders/Subtitles/subtitlesFile.svg new file mode 100644 index 000000000..a6ee925bc --- /dev/null +++ b/packages/components/nodes/documentloaders/Subtitles/subtitlesFile.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/packages/components/nodes/textsplitters/LatexTextSplitter/LatexTextSplitter.ts b/packages/components/nodes/textsplitters/LatexTextSplitter/LatexTextSplitter.ts new file mode 100644 index 000000000..d6568d078 --- /dev/null +++ b/packages/components/nodes/textsplitters/LatexTextSplitter/LatexTextSplitter.ts @@ -0,0 +1,52 @@ +import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { getBaseClasses } from '../../../src/utils' +import { RecursiveCharacterTextSplitter, RecursiveCharacterTextSplitterParams } from 'langchain/text_splitter' + +class LatexTextSplitter_TextSplitters implements INode { + label: string + name: string + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + constructor() { + this.label = 'Latex Text Splitter' + this.name = 'latexTextSplitter' + this.type = 'LatexTextSplitter' + this.icon = 'latexTextSplitter.svg' + this.category = 'Text Splitters' + this.description = `Split documents along Latex headings, headlines, enumerations and more.` + this.baseClasses = [this.type, ...getBaseClasses(RecursiveCharacterTextSplitter)] + this.inputs = [ + { + label: 'Chunk Size', + name: 'chunkSize', + type: 'number', + default: 1000, + optional: true + }, + { + label: 'Chunk Overlap', + name: 'chunkOverlap', + type: 'number', + optional: true + } + ] + } + async init(nodeData: INodeData): Promise { + const chunkSize = nodeData.inputs?.chunkSize as string + const chunkOverlap = nodeData.inputs?.chunkOverlap as string + + const obj = {} as RecursiveCharacterTextSplitterParams + + if (chunkSize) obj.chunkSize = parseInt(chunkSize, 10) + if (chunkOverlap) obj.chunkOverlap = parseInt(chunkOverlap, 10) + + const splitter = RecursiveCharacterTextSplitter.fromLanguage('latex', obj) + + return splitter + } +} +module.exports = { nodeClass: LatexTextSplitter_TextSplitters } diff --git a/packages/components/nodes/textsplitters/LatexTextSplitter/latexTextSplitter.svg b/packages/components/nodes/textsplitters/LatexTextSplitter/latexTextSplitter.svg new file mode 100644 index 000000000..ae9d89beb --- /dev/null +++ b/packages/components/nodes/textsplitters/LatexTextSplitter/latexTextSplitter.svg @@ -0,0 +1,6 @@ + + + + + + diff --git a/packages/components/package.json b/packages/components/package.json index 4394f639c..c9ceeea94 100644 --- a/packages/components/package.json +++ b/packages/components/package.json @@ -32,6 +32,7 @@ "faiss-node": "^0.2.1", "form-data": "^4.0.0", "graphql": "^16.6.0", + "html-to-text": "^9.0.5", "langchain": "^0.0.94", "linkifyjs": "^4.1.1", "mammoth": "^1.5.1", @@ -39,10 +40,10 @@ "node-fetch": "^2.6.11", "pdf-parse": "^1.1.1", "playwright": "^1.35.0", + "srt-parser-2": "^1.2.3", "puppeteer": "^20.7.1", "weaviate-ts-client": "^1.1.0", - "ws": "^8.9.0", - "html-to-text": "^9.0.5" + "ws": "^8.9.0" }, "devDependencies": { "@types/gulp": "4.0.9",