From 1c4880f665cbc5fc5e14272d1c4560ceb2690a58 Mon Sep 17 00:00:00 2001 From: Henry Date: Fri, 16 Jun 2023 15:39:49 +0100 Subject: [PATCH] change latext splitter to code splitter --- .../CodeTextSplitter/CodeTextSplitter.ts | 128 ++++++++++++++++++ .../CodeTextSplitter/codeTextSplitter.svg | 8 ++ .../LatexTextSplitter/LatexTextSplitter.ts | 52 ------- .../LatexTextSplitter/latexTextSplitter.svg | 6 - 4 files changed, 136 insertions(+), 58 deletions(-) create mode 100644 packages/components/nodes/textsplitters/CodeTextSplitter/CodeTextSplitter.ts create mode 100644 packages/components/nodes/textsplitters/CodeTextSplitter/codeTextSplitter.svg delete mode 100644 packages/components/nodes/textsplitters/LatexTextSplitter/LatexTextSplitter.ts delete mode 100644 packages/components/nodes/textsplitters/LatexTextSplitter/latexTextSplitter.svg diff --git a/packages/components/nodes/textsplitters/CodeTextSplitter/CodeTextSplitter.ts b/packages/components/nodes/textsplitters/CodeTextSplitter/CodeTextSplitter.ts new file mode 100644 index 000000000..b14655b86 --- /dev/null +++ b/packages/components/nodes/textsplitters/CodeTextSplitter/CodeTextSplitter.ts @@ -0,0 +1,128 @@ +import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { getBaseClasses } from '../../../src/utils' +import { + RecursiveCharacterTextSplitter, + RecursiveCharacterTextSplitterParams, + SupportedTextSplitterLanguage +} from 'langchain/text_splitter' + +class CodeTextSplitter_TextSplitters implements INode { + label: string + name: string + description: string + type: string + icon: string + category: string + baseClasses: string[] + inputs: INodeParams[] + constructor() { + this.label = 'Code Text Splitter' + this.name = 'codeTextSplitter' + this.type = 'CodeTextSplitter' + this.icon = 'codeTextSplitter.svg' + this.category = 'Text Splitters' + this.description = `Split documents based on language-specific syntax` + this.baseClasses = [this.type, ...getBaseClasses(RecursiveCharacterTextSplitter)] + this.inputs = [ + { + label: 'Language', + name: 'language', + type: 'options', + options: [ + { + label: 'cpp', + name: 'cpp' + }, + { + label: 'go', + name: 'go' + }, + { + label: 'java', + name: 'java' + }, + { + label: 'js', + name: 'js' + }, + { + label: 'php', + name: 'php' + }, + { + label: 'proto', + name: 'proto' + }, + { + label: 'python', + name: 'python' + }, + { + label: 'rst', + name: 'rst' + }, + { + label: 'ruby', + name: 'ruby' + }, + { + label: 'rust', + name: 'rust' + }, + { + label: 'scala', + name: 'scala' + }, + { + label: 'swift', + name: 'swift' + }, + { + label: 'markdown', + name: 'markdown' + }, + { + label: 'latex', + name: 'latex' + }, + { + label: 'html', + name: 'html' + }, + { + label: 'sol', + name: 'sol' + } + ] + }, + { + label: 'Chunk Size', + name: 'chunkSize', + type: 'number', + default: 1000, + optional: true + }, + { + label: 'Chunk Overlap', + name: 'chunkOverlap', + type: 'number', + optional: true + } + ] + } + async init(nodeData: INodeData): Promise { + const chunkSize = nodeData.inputs?.chunkSize as string + const chunkOverlap = nodeData.inputs?.chunkOverlap as string + const language = nodeData.inputs?.language as SupportedTextSplitterLanguage + + const obj = {} as RecursiveCharacterTextSplitterParams + + if (chunkSize) obj.chunkSize = parseInt(chunkSize, 10) + if (chunkOverlap) obj.chunkOverlap = parseInt(chunkOverlap, 10) + + const splitter = RecursiveCharacterTextSplitter.fromLanguage(language, obj) + + return splitter + } +} +module.exports = { nodeClass: CodeTextSplitter_TextSplitters } diff --git a/packages/components/nodes/textsplitters/CodeTextSplitter/codeTextSplitter.svg b/packages/components/nodes/textsplitters/CodeTextSplitter/codeTextSplitter.svg new file mode 100644 index 000000000..d3b3d188a --- /dev/null +++ b/packages/components/nodes/textsplitters/CodeTextSplitter/codeTextSplitter.svg @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/packages/components/nodes/textsplitters/LatexTextSplitter/LatexTextSplitter.ts b/packages/components/nodes/textsplitters/LatexTextSplitter/LatexTextSplitter.ts deleted file mode 100644 index d6568d078..000000000 --- a/packages/components/nodes/textsplitters/LatexTextSplitter/LatexTextSplitter.ts +++ /dev/null @@ -1,52 +0,0 @@ -import { INode, INodeData, INodeParams } from '../../../src/Interface' -import { getBaseClasses } from '../../../src/utils' -import { RecursiveCharacterTextSplitter, RecursiveCharacterTextSplitterParams } from 'langchain/text_splitter' - -class LatexTextSplitter_TextSplitters implements INode { - label: string - name: string - description: string - type: string - icon: string - category: string - baseClasses: string[] - inputs: INodeParams[] - constructor() { - this.label = 'Latex Text Splitter' - this.name = 'latexTextSplitter' - this.type = 'LatexTextSplitter' - this.icon = 'latexTextSplitter.svg' - this.category = 'Text Splitters' - this.description = `Split documents along Latex headings, headlines, enumerations and more.` - this.baseClasses = [this.type, ...getBaseClasses(RecursiveCharacterTextSplitter)] - this.inputs = [ - { - label: 'Chunk Size', - name: 'chunkSize', - type: 'number', - default: 1000, - optional: true - }, - { - label: 'Chunk Overlap', - name: 'chunkOverlap', - type: 'number', - optional: true - } - ] - } - async init(nodeData: INodeData): Promise { - const chunkSize = nodeData.inputs?.chunkSize as string - const chunkOverlap = nodeData.inputs?.chunkOverlap as string - - const obj = {} as RecursiveCharacterTextSplitterParams - - if (chunkSize) obj.chunkSize = parseInt(chunkSize, 10) - if (chunkOverlap) obj.chunkOverlap = parseInt(chunkOverlap, 10) - - const splitter = RecursiveCharacterTextSplitter.fromLanguage('latex', obj) - - return splitter - } -} -module.exports = { nodeClass: LatexTextSplitter_TextSplitters } diff --git a/packages/components/nodes/textsplitters/LatexTextSplitter/latexTextSplitter.svg b/packages/components/nodes/textsplitters/LatexTextSplitter/latexTextSplitter.svg deleted file mode 100644 index ae9d89beb..000000000 --- a/packages/components/nodes/textsplitters/LatexTextSplitter/latexTextSplitter.svg +++ /dev/null @@ -1,6 +0,0 @@ - - - - - -