Compare commits

...

2 Commits

Author SHA1 Message Date
Henry 2435b2b48c remove console log 2025-07-18 19:17:21 +01:00
Henry f73a305584 refactor: Implement SecureZodSchemaParser for safe Zod schema handling and add FilterParser for Supabase filters
* Replaced direct Zod schema evaluation with SecureZodSchemaParser in StructuredOutputParserAdvanced and CustomTool.
* Introduced FilterParser to safely handle Supabase filter strings, preventing arbitrary code execution.
* Added new filterParser.ts file to encapsulate filter parsing logic.
* Updated Supabase vector store to utilize the new FilterParser for RPC filters.
* Created secureZodParser.ts for secure parsing of Zod schemas.
2025-07-18 19:06:48 +01:00
5 changed files with 536 additions and 12 deletions

View File

@ -2,8 +2,8 @@ import { getBaseClasses, INode, INodeData, INodeParams } from '../../../src'
import { BaseOutputParser } from '@langchain/core/output_parsers' import { BaseOutputParser } from '@langchain/core/output_parsers'
import { StructuredOutputParser as LangchainStructuredOutputParser } from 'langchain/output_parsers' import { StructuredOutputParser as LangchainStructuredOutputParser } from 'langchain/output_parsers'
import { CATEGORY } from '../OutputParserHelpers' import { CATEGORY } from '../OutputParserHelpers'
import { z } from 'zod'
import { jsonrepair } from 'jsonrepair' import { jsonrepair } from 'jsonrepair'
import { SecureZodSchemaParser } from '../../../src/secureZodParser'
class AdvancedStructuredOutputParser implements INode { class AdvancedStructuredOutputParser implements INode {
label: string label: string
@ -57,10 +57,8 @@ class AdvancedStructuredOutputParser implements INode {
const schemaString = nodeData.inputs?.exampleJson as string const schemaString = nodeData.inputs?.exampleJson as string
const autoFix = nodeData.inputs?.autofixParser as boolean const autoFix = nodeData.inputs?.autofixParser as boolean
const zodSchemaFunction = new Function('z', `return ${schemaString}`)
const zodSchema = zodSchemaFunction(z)
try { try {
const zodSchema = SecureZodSchemaParser.parseZodSchema(schemaString)
const structuredOutputParser = LangchainStructuredOutputParser.fromZodSchema(zodSchema) const structuredOutputParser = LangchainStructuredOutputParser.fromZodSchema(zodSchema)
const baseParse = structuredOutputParser.parse const baseParse = structuredOutputParser.parse

View File

@ -3,6 +3,7 @@ import { convertSchemaToZod, getBaseClasses, getVars } from '../../../src/utils'
import { DynamicStructuredTool } from './core' import { DynamicStructuredTool } from './core'
import { z } from 'zod' import { z } from 'zod'
import { DataSource } from 'typeorm' import { DataSource } from 'typeorm'
import { SecureZodSchemaParser } from '../../../src/secureZodParser'
class CustomTool_Tools implements INode { class CustomTool_Tools implements INode {
label: string label: string
@ -119,8 +120,7 @@ class CustomTool_Tools implements INode {
if (customToolName) obj.name = customToolName if (customToolName) obj.name = customToolName
if (customToolDesc) obj.description = customToolDesc if (customToolDesc) obj.description = customToolDesc
if (customToolSchema) { if (customToolSchema) {
const zodSchemaFunction = new Function('z', `return ${customToolSchema}`) obj.schema = SecureZodSchemaParser.parseZodSchema(customToolSchema) as z.ZodObject<ICommonObject, 'strip', z.ZodTypeAny>
obj.schema = zodSchemaFunction(z)
} }
const variables = await getVars(appDataSource, databaseEntities, nodeData, options) const variables = await getVars(appDataSource, databaseEntities, nodeData, options)

View File

@ -3,11 +3,12 @@ import { v4 as uuidv4 } from 'uuid'
import { createClient } from '@supabase/supabase-js' import { createClient } from '@supabase/supabase-js'
import { Document } from '@langchain/core/documents' import { Document } from '@langchain/core/documents'
import { Embeddings } from '@langchain/core/embeddings' import { Embeddings } from '@langchain/core/embeddings'
import { SupabaseVectorStore, SupabaseLibArgs, SupabaseFilterRPCCall } from '@langchain/community/vectorstores/supabase' import { SupabaseVectorStore, SupabaseLibArgs } from '@langchain/community/vectorstores/supabase'
import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface' import { ICommonObject, INode, INodeData, INodeOutputsValue, INodeParams, IndexingResult } from '../../../src/Interface'
import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils' import { getBaseClasses, getCredentialData, getCredentialParam } from '../../../src/utils'
import { addMMRInputParams, resolveVectorStoreOrRetriever } from '../VectorStoreUtils' import { addMMRInputParams, resolveVectorStoreOrRetriever } from '../VectorStoreUtils'
import { index } from '../../../src/indexing' import { index } from '../../../src/indexing'
import { FilterParser } from './filterParser'
class Supabase_VectorStores implements INode { class Supabase_VectorStores implements INode {
label: string label: string
@ -233,11 +234,7 @@ class Supabase_VectorStores implements INode {
} }
if (supabaseRPCFilter) { if (supabaseRPCFilter) {
const funcString = `return rpc.${supabaseRPCFilter};` obj.filter = FilterParser.parseFilterString(supabaseRPCFilter)
const funcFilter = new Function('rpc', funcString)
obj.filter = (rpc: SupabaseFilterRPCCall) => {
return funcFilter(rpc)
}
} }
const vectorStore = await SupabaseVectorStore.fromExistingIndex(embeddings, obj) const vectorStore = await SupabaseVectorStore.fromExistingIndex(embeddings, obj)

View File

@ -0,0 +1,203 @@
/**
* This parser safely handles Supabase filter strings without allowing arbitrary code execution
*/
export class FilterParser {
private static readonly ALLOWED_METHODS = ['filter', 'order', 'limit', 'range', 'single', 'maybeSingle']
private static readonly ALLOWED_OPERATORS = [
'eq',
'neq',
'gt',
'gte',
'lt',
'lte',
'like',
'ilike',
'is',
'in',
'cs',
'cd',
'sl',
'sr',
'nxl',
'nxr',
'adj',
'ov',
'fts',
'plfts',
'phfts',
'wfts'
]
/**
* Safely parse a Supabase RPC filter string into a function
* @param filterString The filter string (e.g., 'filter("metadata->a::int", "gt", 5).filter("metadata->c::int", "gt", 7)')
* @returns A function that can be applied to an RPC object
* @throws Error if the filter string contains unsafe patterns
*/
static parseFilterString(filterString: string): (rpc: any) => any {
try {
// Clean and validate the filter string
const cleanedFilter = this.cleanFilterString(filterString)
// Parse the filter chain
const filterChain = this.parseFilterChain(cleanedFilter)
// Build the safe filter function
return this.buildFilterFunction(filterChain)
} catch (error) {
throw new Error(`Failed to parse Supabase filter: ${error.message}`)
}
}
private static cleanFilterString(filter: string): string {
// Remove comments and normalize whitespace
filter = filter.replace(/\/\/.*$/gm, '').replace(/\/\*[\s\S]*?\*\//g, '')
filter = filter.replace(/\s+/g, ' ').trim()
// Remove trailing semicolon if present
if (filter.endsWith(';')) {
filter = filter.slice(0, -1).trim()
}
return filter
}
private static parseFilterChain(filter: string): Array<{ method: string; args: any[] }> {
const chain: Array<{ method: string; args: any[] }> = []
// Split on method calls (e.g., .filter, .order, etc.)
const methodPattern = /\.?(\w+)\s*\((.*?)\)(?=\s*(?:\.|$))/g
let match
while ((match = methodPattern.exec(filter)) !== null) {
const method = match[1]
const argsString = match[2]
// Validate method name
if (!this.ALLOWED_METHODS.includes(method)) {
throw new Error(`Disallowed method: ${method}`)
}
// Parse arguments safely
const args = this.parseArguments(argsString)
// Additional validation for filter method
if (method === 'filter' && args.length >= 2) {
const operator = args[1]
if (typeof operator === 'string' && !this.ALLOWED_OPERATORS.includes(operator)) {
throw new Error(`Disallowed filter operator: ${operator}`)
}
}
chain.push({ method, args })
}
if (chain.length === 0) {
throw new Error('No valid filter methods found')
}
return chain
}
private static parseArguments(argsString: string): any[] {
if (!argsString.trim()) {
return []
}
const args: any[] = []
let current = ''
let inString = false
let stringChar = ''
let depth = 0
for (let i = 0; i < argsString.length; i++) {
const char = argsString[i]
if (!inString && (char === '"' || char === "'")) {
inString = true
stringChar = char
current += char
} else if (inString && char === stringChar && argsString[i - 1] !== '\\') {
inString = false
current += char
} else if (!inString) {
if (char === '(' || char === '[' || char === '{') {
depth++
current += char
} else if (char === ')' || char === ']' || char === '}') {
depth--
current += char
} else if (char === ',' && depth === 0) {
args.push(this.parseArgument(current.trim()))
current = ''
continue
} else {
current += char
}
} else {
current += char
}
}
if (current.trim()) {
args.push(this.parseArgument(current.trim()))
}
return args
}
private static parseArgument(arg: string): any {
arg = arg.trim()
// Handle strings
if ((arg.startsWith('"') && arg.endsWith('"')) || (arg.startsWith("'") && arg.endsWith("'"))) {
return arg.slice(1, -1)
}
// Handle numbers
if (arg.match(/^-?\d+(\.\d+)?$/)) {
return parseFloat(arg)
}
// Handle booleans
if (arg === 'true') return true
if (arg === 'false') return false
if (arg === 'null') return null
// Handle arrays (basic support)
if (arg.startsWith('[') && arg.endsWith(']')) {
const arrayContent = arg.slice(1, -1).trim()
if (!arrayContent) return []
// Simple array parsing - just split by comma and parse each element
return arrayContent.split(',').map((item) => this.parseArgument(item.trim()))
}
// For everything else, treat as string (but validate it doesn't contain dangerous characters)
if (arg.includes('require') || arg.includes('process') || arg.includes('eval') || arg.includes('Function')) {
throw new Error(`Potentially dangerous argument: ${arg}`)
}
return arg
}
private static buildFilterFunction(chain: Array<{ method: string; args: any[] }>): (rpc: any) => any {
return (rpc: any) => {
let result = rpc
for (const { method, args } of chain) {
if (typeof result[method] !== 'function') {
throw new Error(`Method ${method} is not available on the RPC object`)
}
try {
result = result[method](...args)
} catch (error) {
throw new Error(`Failed to call ${method}: ${error.message}`)
}
}
return result
}
}
}

View File

@ -0,0 +1,326 @@
import { z } from 'zod'
/**
* This parser safely handles Zod schema strings without allowing arbitrary code execution
*/
export class SecureZodSchemaParser {
private static readonly ALLOWED_TYPES = [
'string',
'number',
'int',
'boolean',
'date',
'object',
'array',
'enum',
'optional',
'max',
'min',
'describe'
]
/**
* Safely parse a Zod schema string into a Zod schema object
* @param schemaString The Zod schema as a string (e.g., "z.object({name: z.string()})")
* @returns A Zod schema object
* @throws Error if the schema is invalid or contains unsafe patterns
*/
static parseZodSchema(schemaString: string): z.ZodTypeAny {
try {
// Remove comments and normalize whitespace
const cleanedSchema = this.cleanSchemaString(schemaString)
// Parse the schema structure
const parsed = this.parseSchemaStructure(cleanedSchema)
// Build the Zod schema securely
return this.buildZodSchema(parsed)
} catch (error) {
throw new Error(`Failed to parse Zod schema: ${error.message}`)
}
}
private static cleanSchemaString(schema: string): string {
// Remove single-line comments
schema = schema.replace(/\/\/.*$/gm, '')
// Remove multi-line comments
schema = schema.replace(/\/\*[\s\S]*?\*\//g, '')
// Normalize whitespace
schema = schema.replace(/\s+/g, ' ').trim()
return schema
}
private static parseSchemaStructure(schema: string): any {
// This is a simplified parser that handles common Zod patterns safely
// It does NOT use eval/Function and only handles predefined safe patterns
if (!schema.startsWith('z.object(')) {
throw new Error('Schema must start with z.object()')
}
// Extract the object content
const objectMatch = schema.match(/z\.object\(\s*\{([\s\S]*)\}\s*\)/)
if (!objectMatch) {
throw new Error('Invalid z.object() syntax')
}
const objectContent = objectMatch[1]
return this.parseObjectProperties(objectContent)
}
private static parseObjectProperties(content: string): Record<string, any> {
const properties: Record<string, any> = {}
// Split by comma, but handle nested structures
const props = this.splitProperties(content)
for (const prop of props) {
const [key, value] = this.parseProperty(prop)
if (key && value) {
properties[key] = value
}
}
return properties
}
private static splitProperties(content: string): string[] {
const properties: string[] = []
let current = ''
let depth = 0
let inString = false
let stringChar = ''
for (let i = 0; i < content.length; i++) {
const char = content[i]
if (!inString && (char === '"' || char === "'")) {
inString = true
stringChar = char
} else if (inString && char === stringChar && content[i - 1] !== '\\') {
inString = false
} else if (!inString) {
if (char === '(' || char === '[' || char === '{') {
depth++
} else if (char === ')' || char === ']' || char === '}') {
depth--
} else if (char === ',' && depth === 0) {
properties.push(current.trim())
current = ''
continue
}
}
current += char
}
if (current.trim()) {
properties.push(current.trim())
}
return properties
}
private static parseProperty(prop: string): [string | null, any] {
const colonIndex = prop.indexOf(':')
if (colonIndex === -1) return [null, null]
const key = prop.substring(0, colonIndex).trim().replace(/['"]/g, '')
const value = prop.substring(colonIndex + 1).trim()
return [key, this.parseZodType(value)]
}
private static parseZodType(typeStr: string): any {
const type: { base: string; modifiers: any[]; baseArgs?: any[] } = { base: '', modifiers: [] }
// Handle chained methods like z.string().max(500).optional()
const parts = typeStr.split('.')
for (let i = 0; i < parts.length; i++) {
const part = parts[i].trim()
if (i === 0) {
// First part should be 'z'
if (part !== 'z') {
throw new Error(`Expected 'z' but got '${part}'`)
}
continue
}
if (i === 1) {
// Second part is the base type
const baseMatch = part.match(/^(\w+)(\(.*\))?$/)
if (!baseMatch) {
throw new Error(`Invalid base type: ${part}`)
}
type.base = baseMatch[1]
if (baseMatch[2]) {
// Parse arguments for base type (e.g., enum values)
const args = this.parseArguments(baseMatch[2])
type.baseArgs = args
}
} else {
// Subsequent parts are modifiers
const modMatch = part.match(/^(\w+)(\(.*\))?$/)
if (!modMatch) {
throw new Error(`Invalid modifier: ${part}`)
}
const modName = modMatch[1]
const modArgs = modMatch[2] ? this.parseArguments(modMatch[2]) : []
type.modifiers.push({ name: modName, args: modArgs })
}
}
return type
}
private static parseArguments(argsStr: string): any[] {
// Remove outer parentheses
const inner = argsStr.slice(1, -1).trim()
if (!inner) return []
// Simple argument parsing for basic cases
if (inner.startsWith('[') && inner.endsWith(']')) {
// Array argument
const arrayContent = inner.slice(1, -1)
return [this.parseArrayContent(arrayContent)]
} else if (inner.match(/^\d+$/)) {
// Number argument
return [parseInt(inner, 10)]
} else if (inner.startsWith('"') && inner.endsWith('"')) {
// String argument
return [inner.slice(1, -1)]
} else {
// Try to parse as comma-separated values
return inner.split(',').map((arg) => {
arg = arg.trim()
if (arg.match(/^\d+$/)) return parseInt(arg, 10)
if (arg.startsWith('"') && arg.endsWith('"')) return arg.slice(1, -1)
return arg
})
}
}
private static parseArrayContent(content: string): string[] {
const items: string[] = []
let current = ''
let inString = false
let stringChar = ''
for (let i = 0; i < content.length; i++) {
const char = content[i]
if (!inString && (char === '"' || char === "'")) {
inString = true
stringChar = char
current += char
} else if (inString && char === stringChar && content[i - 1] !== '\\') {
inString = false
current += char
} else if (!inString && char === ',') {
items.push(current.trim().replace(/^["']|["']$/g, ''))
current = ''
} else {
current += char
}
}
if (current.trim()) {
items.push(current.trim().replace(/^["']|["']$/g, ''))
}
return items
}
private static buildZodSchema(parsed: Record<string, any>): z.ZodObject<any> {
const schemaObj: Record<string, z.ZodTypeAny> = {}
for (const [key, typeInfo] of Object.entries(parsed)) {
schemaObj[key] = this.buildZodType(typeInfo)
}
return z.object(schemaObj)
}
private static buildZodType(typeInfo: any): z.ZodTypeAny {
let zodType: z.ZodTypeAny
// Build base type
switch (typeInfo.base) {
case 'string':
zodType = z.string()
break
case 'number':
zodType = z.number()
break
case 'boolean':
zodType = z.boolean()
break
case 'date':
zodType = z.date()
break
case 'enum':
if (typeInfo.baseArgs && typeInfo.baseArgs[0] && Array.isArray(typeInfo.baseArgs[0])) {
const enumValues = typeInfo.baseArgs[0] as [string, ...string[]]
zodType = z.enum(enumValues)
} else {
throw new Error('enum requires array of values')
}
break
default:
throw new Error(`Unsupported base type: ${typeInfo.base}`)
}
// Apply modifiers
for (const modifier of typeInfo.modifiers || []) {
switch (modifier.name) {
case 'int':
if (zodType._def?.typeName === 'ZodNumber') {
zodType = (zodType as z.ZodNumber).int()
}
break
case 'max':
if (modifier.args[0] !== undefined) {
if (zodType._def?.typeName === 'ZodString') {
zodType = (zodType as z.ZodString).max(modifier.args[0])
} else if (zodType._def?.typeName === 'ZodArray') {
zodType = (zodType as z.ZodArray<any>).max(modifier.args[0])
}
}
break
case 'min':
if (modifier.args[0] !== undefined) {
if (zodType._def?.typeName === 'ZodString') {
zodType = (zodType as z.ZodString).min(modifier.args[0])
} else if (zodType._def?.typeName === 'ZodArray') {
zodType = (zodType as z.ZodArray<any>).min(modifier.args[0])
}
}
break
case 'optional':
zodType = zodType.optional()
break
case 'array':
zodType = z.array(zodType)
break
case 'describe':
if (modifier.args[0]) {
zodType = zodType.describe(modifier.args[0])
}
break
default:
// Ignore unknown modifiers for compatibility
break
}
}
return zodType
}
}