import { INode, INodeData, INodeParams, PromptTemplate } from '../../../src/Interface' import { AgentExecutor } from 'langchain/agents' import { getBaseClasses } from '../../../src/utils' import { LoadPyodide, finalSystemPrompt, systemPrompt } from './core' import { LLMChain } from 'langchain/chains' import { BaseLanguageModel } from 'langchain/base_language' class CSV_Agents implements INode { label: string name: string description: string type: string icon: string category: string baseClasses: string[] inputs: INodeParams[] constructor() { this.label = 'CSV Agent' this.name = 'csvAgentLLM' this.type = 'AgentExecutor' this.category = 'Agents' this.icon = 'csvagent.png' this.description = 'Agent used to to answer queries on CSV data' this.baseClasses = [this.type, ...getBaseClasses(AgentExecutor)] this.inputs = [ { label: 'Csv File', name: 'csvFile', type: 'file', fileType: '.csv' }, { label: 'Language Model', name: 'model', type: 'BaseLanguageModel' } ] } async init(): Promise { // Not used return undefined } async run(nodeData: INodeData, input: string): Promise { const csvFileBase64 = nodeData.inputs?.csvFile as string const model = nodeData.inputs?.model as BaseLanguageModel let files: string[] = [] if (csvFileBase64.startsWith('[') && csvFileBase64.endsWith(']')) { files = JSON.parse(csvFileBase64) } else { files = [csvFileBase64] } let base64String = '' for (const file of files) { const splitDataURI = file.split(',') splitDataURI.pop() base64String = splitDataURI.pop() ?? '' } const pyodide = await LoadPyodide() // First load the csv file and get the dataframe dictionary of column types // For example using titanic.csv: {'PassengerId': 'int64', 'Survived': 'int64', 'Pclass': 'int64', 'Name': 'object', 'Sex': 'object', 'Age': 'float64', 'SibSp': 'int64', 'Parch': 'int64', 'Ticket': 'object', 'Fare': 'float64', 'Cabin': 'object', 'Embarked': 'object'} let executionResult = '' try { const code = `import pandas as pd import base64 from io import StringIO import json base64_string = "${base64String}" decoded_data = base64.b64decode(base64_string) csv_data = StringIO(decoded_data.decode('utf-8')) df = pd.read_csv(csv_data) my_dict = df.dtypes.astype(str).to_dict() print(my_dict) json.dumps(my_dict)` executionResult = await pyodide.runPythonAsync(code) } catch (error) { throw new Error(error) } console.log('executionResult= ', executionResult) // Then tell GPT to come out with ONLY python code // For example: len(df), df[df['SibSp'] > 3]['PassengerId'].count() let pythonCode = '' if (executionResult) { const chain = new LLMChain({ llm: model, prompt: PromptTemplate.fromTemplate(systemPrompt), verbose: process.env.DEBUG === 'true' ? true : false }) const inputs = { dict: executionResult, question: input } const res = await chain.call(inputs) pythonCode = res?.text } console.log('pythonCode= ', pythonCode) // Then run the code using Pyodide let finalResult = '' if (pythonCode) { try { const code = `import pandas as pd\n${pythonCode}` finalResult = await pyodide.runPythonAsync(code) } catch (error) { throw new Error(error) } } console.log('finalResult= ', finalResult) // Finally, return a complete answer if (finalResult) { const chain = new LLMChain({ llm: model, prompt: PromptTemplate.fromTemplate(finalSystemPrompt), verbose: process.env.DEBUG === 'true' ? true : false }) const inputs = { question: input, answer: finalResult } const res = await chain.call(inputs) return res?.text } return executionResult } } module.exports = { nodeClass: CSV_Agents }