add csv agent

This commit is contained in:
Henry 2023-07-22 01:16:23 +01:00
parent c8223d5273
commit 49c7ae718f
5 changed files with 192 additions and 0 deletions

View File

@ -0,0 +1,142 @@
import { INode, INodeData, INodeParams, PromptTemplate } from '../../../src/Interface'
import { AgentExecutor } from 'langchain/agents'
import { getBaseClasses } from '../../../src/utils'
import { LoadPyodide, finalSystemPrompt, systemPrompt } from './core'
import { LLMChain } from 'langchain/chains'
import { BaseLanguageModel } from 'langchain/base_language'
class CSV_Agents implements INode {
label: string
name: string
description: string
type: string
icon: string
category: string
baseClasses: string[]
inputs: INodeParams[]
constructor() {
this.label = 'CSV Agent'
this.name = 'csvAgentLLM'
this.type = 'AgentExecutor'
this.category = 'Agents'
this.icon = 'csvagent.png'
this.description = 'Agent used to to answer queries on CSV data'
this.baseClasses = [this.type, ...getBaseClasses(AgentExecutor)]
this.inputs = [
{
label: 'Csv File',
name: 'csvFile',
type: 'file',
fileType: '.csv'
},
{
label: 'Language Model',
name: 'model',
type: 'BaseLanguageModel'
}
]
}
async init(): Promise<any> {
// Not used
return undefined
}
async run(nodeData: INodeData, input: string): Promise<string> {
const csvFileBase64 = nodeData.inputs?.csvFile as string
const model = nodeData.inputs?.model as BaseLanguageModel
let files: string[] = []
if (csvFileBase64.startsWith('[') && csvFileBase64.endsWith(']')) {
files = JSON.parse(csvFileBase64)
} else {
files = [csvFileBase64]
}
let base64String = ''
for (const file of files) {
const splitDataURI = file.split(',')
splitDataURI.pop()
base64String = splitDataURI.pop() ?? ''
}
const pyodide = await LoadPyodide()
// First load the csv file and get the dataframe dictionary of column types
// For example using titanic.csv: {'PassengerId': 'int64', 'Survived': 'int64', 'Pclass': 'int64', 'Name': 'object', 'Sex': 'object', 'Age': 'float64', 'SibSp': 'int64', 'Parch': 'int64', 'Ticket': 'object', 'Fare': 'float64', 'Cabin': 'object', 'Embarked': 'object'}
let executionResult = ''
try {
const code = `import pandas as pd
import base64
from io import StringIO
import json
base64_string = "${base64String}"
decoded_data = base64.b64decode(base64_string)
csv_data = StringIO(decoded_data.decode('utf-8'))
df = pd.read_csv(csv_data)
my_dict = df.dtypes.astype(str).to_dict()
print(my_dict)
json.dumps(my_dict)`
executionResult = await pyodide.runPythonAsync(code)
} catch (error) {
throw new Error(error)
}
console.log('executionResult= ', executionResult)
// Then tell GPT to come out with ONLY python code
// For example: len(df), df[df['SibSp'] > 3]['PassengerId'].count()
let pythonCode = ''
if (executionResult) {
const chain = new LLMChain({
llm: model,
prompt: PromptTemplate.fromTemplate(systemPrompt),
verbose: process.env.DEBUG === 'true' ? true : false
})
const inputs = {
dict: executionResult,
question: input
}
const res = await chain.call(inputs)
pythonCode = res?.text
}
console.log('pythonCode= ', pythonCode)
// Then run the code using Pyodide
let finalResult = ''
if (pythonCode) {
try {
const code = `import pandas as pd\n${pythonCode}`
finalResult = await pyodide.runPythonAsync(code)
} catch (error) {
throw new Error(error)
}
}
console.log('finalResult= ', finalResult)
// Finally, return a complete answer
if (finalResult) {
const chain = new LLMChain({
llm: model,
prompt: PromptTemplate.fromTemplate(finalSystemPrompt),
verbose: process.env.DEBUG === 'true' ? true : false
})
const inputs = {
question: input,
answer: finalResult
}
const res = await chain.call(inputs)
return res?.text
}
return executionResult
}
}
module.exports = { nodeClass: CSV_Agents }

View File

@ -0,0 +1,35 @@
import type { PyodideInterface } from 'pyodide'
import * as path from 'path'
import { getUserHome } from '../../../src/utils'
let pyodideInstance: PyodideInterface | undefined
export async function LoadPyodide(): Promise<PyodideInterface> {
if (pyodideInstance === undefined) {
const { loadPyodide } = await import('pyodide')
const obj: any = { packageCacheDir: path.join(getUserHome(), '.flowise', 'pyodideCacheDir') }
pyodideInstance = await loadPyodide(obj)
await pyodideInstance.loadPackage('micropip')
const micropip = pyodideInstance.pyimport('micropip')
await micropip.install('pandas')
await micropip.install('numpy')
//let mountDir = "/mnt";
//pyodideInstance.FS.mkdir(mountDir);
//pyodideInstance.FS.mount(pyodideInstance.FS.filesystems.NODEFS, { root: path.join(getUserHome(), '.flowise', 'pyodideFS') }, mountDir);
}
return pyodideInstance
}
export const systemPrompt = `You are working with a pandas dataframe in Python. The name of the dataframe is df.
The columns and data types of a dataframe are given below as a Python dictionary with keys showing column names and values showing the data types.
{dict}
I will ask question, and you will output the Python code using pandas dataframe to answer my question. Do not provide any explanations. Do not respond with anything except the output of the code.
Question: {question}
Output Code:`
export const finalSystemPrompt = `You are given the question: {question}. You have an answer to the question: {answer}. Rephrase the answer with more details.
Helpful Answer:`

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.0 KiB

View File

@ -50,6 +50,7 @@
"pdfjs-dist": "^3.7.107", "pdfjs-dist": "^3.7.107",
"playwright": "^1.35.0", "playwright": "^1.35.0",
"puppeteer": "^20.7.1", "puppeteer": "^20.7.1",
"pyodide": ">=0.21.0-alpha.2",
"redis": "^4.6.7", "redis": "^4.6.7",
"replicate": "^0.12.3", "replicate": "^0.12.3",
"srt-parser-2": "^1.2.3", "srt-parser-2": "^1.2.3",

View File

@ -377,3 +377,17 @@ export const availableDependencies = [
'typeorm', 'typeorm',
'weaviate-ts-client' 'weaviate-ts-client'
] ]
export const getUserHome = (): string => {
let variableName = 'HOME'
if (process.platform === 'win32') {
variableName = 'USERPROFILE'
}
if (process.env[variableName] === undefined) {
// If for some reason the variable does not exist
// fall back to current folder
return process.cwd()
}
return process.env[variableName] as string
}