Flowise/packages/components/evaluation/EvaluationRunner.ts

227 lines
9.7 KiB
TypeScript

import axios from 'axios'
import { v4 as uuidv4 } from 'uuid'
import { ICommonObject } from '../src'
import { getModelConfigByModelName, MODEL_TYPE } from '../src/modelLoader'
export class EvaluationRunner {
static metrics = new Map<string, string[]>()
static getCostMetrics = async (selectedProvider: string, selectedModel: string) => {
let modelConfig = await getModelConfigByModelName(MODEL_TYPE.CHAT, selectedProvider, selectedModel)
if (modelConfig) {
if (modelConfig['cost_values']) {
return modelConfig.cost_values
}
return { cost_values: modelConfig }
} else {
modelConfig = await getModelConfigByModelName(MODEL_TYPE.LLM, selectedProvider, selectedModel)
if (modelConfig) {
if (modelConfig['cost_values']) {
return modelConfig.cost_values
}
return { cost_values: modelConfig }
}
}
return undefined
}
static async getAndDeleteMetrics(id: string) {
const val = EvaluationRunner.metrics.get(id)
if (val) {
try {
//first lets get the provider and model
let selectedModel = undefined
let selectedProvider = undefined
if (val && val.length > 0) {
let modelName = ''
let providerName = ''
for (let i = 0; i < val.length; i++) {
const metric = val[i]
if (typeof metric === 'object') {
modelName = metric['model']
providerName = metric['provider']
} else {
modelName = JSON.parse(metric)['model']
providerName = JSON.parse(metric)['provider']
}
if (modelName) {
selectedModel = modelName
}
if (providerName) {
selectedProvider = providerName
}
}
}
if (selectedProvider && selectedModel) {
const modelConfig = await EvaluationRunner.getCostMetrics(selectedProvider, selectedModel)
if (modelConfig) {
val.push(JSON.stringify({ cost_values: modelConfig }))
}
}
} catch (error) {
//stay silent
}
}
EvaluationRunner.metrics.delete(id)
return val
}
static addMetrics(id: string, metric: string) {
if (EvaluationRunner.metrics.has(id)) {
EvaluationRunner.metrics.get(id)?.push(metric)
} else {
EvaluationRunner.metrics.set(id, [metric])
}
}
baseURL = ''
constructor(baseURL: string) {
this.baseURL = baseURL
}
getChatflowApiKey(chatflowId: string, apiKeys: { chatflowId: string; apiKey: string }[] = []) {
return apiKeys.find((item) => item.chatflowId === chatflowId)?.apiKey || ''
}
public async runEvaluations(data: ICommonObject) {
const chatflowIds = JSON.parse(data.chatflowId)
const returnData: ICommonObject = {}
returnData.evaluationId = data.evaluationId
returnData.runDate = new Date()
returnData.rows = []
for (let i = 0; i < data.dataset.rows.length; i++) {
returnData.rows.push({
input: data.dataset.rows[i].input,
expectedOutput: data.dataset.rows[i].output,
itemNo: data.dataset.rows[i].sequenceNo,
evaluations: [],
status: 'pending'
})
}
for (let i = 0; i < chatflowIds.length; i++) {
const chatflowId = chatflowIds[i]
await this.evaluateChatflow(chatflowId, this.getChatflowApiKey(chatflowId, data.apiKeys), data, returnData)
}
return returnData
}
async evaluateChatflow(chatflowId: string, apiKey: string, data: any, returnData: any) {
for (let i = 0; i < data.dataset.rows.length; i++) {
const item = data.dataset.rows[i]
const uuid = uuidv4()
const headers: any = {
'X-Request-ID': uuid,
'X-Flowise-Evaluation': 'true'
}
if (apiKey) {
headers['Authorization'] = `Bearer ${apiKey}`
}
let axiosConfig = {
headers: headers
}
let startTime = performance.now()
const runData: any = {}
runData.chatflowId = chatflowId
runData.startTime = startTime
const postData: any = { question: item.input, evaluationRunId: uuid, evaluation: true }
if (data.sessionId) {
postData.overrideConfig = { sessionId: data.sessionId }
}
try {
let response = await axios.post(`${this.baseURL}/api/v1/prediction/${chatflowId}`, postData, axiosConfig)
let agentFlowMetrics: any[] = []
if (response?.data?.agentFlowExecutedData) {
for (let i = 0; i < response.data.agentFlowExecutedData.length; i++) {
const agentFlowExecutedData = response.data.agentFlowExecutedData[i]
const input_tokens = agentFlowExecutedData?.data?.output?.usageMetadata?.input_tokens || 0
const output_tokens = agentFlowExecutedData?.data?.output?.usageMetadata?.output_tokens || 0
const total_tokens =
agentFlowExecutedData?.data?.output?.usageMetadata?.total_tokens || input_tokens + output_tokens
const metrics: any = {
promptTokens: input_tokens,
completionTokens: output_tokens,
totalTokens: total_tokens,
provider:
agentFlowExecutedData.data?.input?.llmModelConfig?.llmModel ||
agentFlowExecutedData.data?.input?.agentModelConfig?.agentModel,
model:
agentFlowExecutedData.data?.input?.llmModelConfig?.modelName ||
agentFlowExecutedData.data?.input?.agentModelConfig?.modelName,
nodeLabel: agentFlowExecutedData?.nodeLabel,
nodeId: agentFlowExecutedData?.nodeId
}
if (metrics.provider && metrics.model) {
const modelConfig = await EvaluationRunner.getCostMetrics(metrics.provider, metrics.model)
if (modelConfig) {
metrics.cost_values = {
input_cost: (modelConfig.cost_values.input_cost || 0) * (input_tokens / 1000),
output_cost: (modelConfig.cost_values.output_cost || 0) * (output_tokens / 1000)
}
metrics.cost_values.total_cost = metrics.cost_values.input_cost + metrics.cost_values.output_cost
}
}
agentFlowMetrics.push(metrics)
}
}
const endTime = performance.now()
const timeTaken = (endTime - startTime).toFixed(2)
if (response?.data?.metrics) {
runData.metrics = response.data.metrics
runData.metrics.push({
apiLatency: timeTaken
})
} else {
runData.metrics = [
{
apiLatency: timeTaken
}
]
}
if (agentFlowMetrics.length > 0) {
runData.nested_metrics = agentFlowMetrics
}
runData.status = 'complete'
let resultText = ''
if (response.data.text) resultText = response.data.text
else if (response.data.json) resultText = '```json\n' + JSON.stringify(response.data.json, null, 2)
else resultText = JSON.stringify(response.data, null, 2)
runData.actualOutput = resultText
runData.latency = timeTaken
runData.error = ''
} catch (error: any) {
runData.status = 'error'
runData.actualOutput = ''
runData.error = error?.response?.data?.message
? error.response.data.message
: error?.message
? error.message
: 'Unknown error'
try {
if (runData.error.indexOf('-') > -1) {
// if there is a dash, remove all content before
runData.error = 'Error: ' + runData.error.substr(runData.error.indexOf('-') + 1).trim()
}
} catch (error) {
//stay silent
}
const endTime = performance.now()
const timeTaken = (endTime - startTime).toFixed(2)
runData.metrics = [
{
apiLatency: timeTaken
}
]
runData.latency = timeTaken
}
runData.uuid = uuid
returnData.rows[i].evaluations.push(runData)
}
return returnData
}
}