Merge branch 'main' into chore/DockerFile-Non-Root

# Conflicts:
#	Dockerfile
This commit is contained in:
Henry 2025-11-15 18:08:13 +00:00
commit 5999d360ae
5 changed files with 170 additions and 14 deletions

View File

@ -10,6 +10,7 @@ import { test } from 'linkifyjs'
import { omit } from 'lodash' import { omit } from 'lodash'
import { handleEscapeCharacters, INodeOutputsValue, webCrawl, xmlScrape } from '../../../src' import { handleEscapeCharacters, INodeOutputsValue, webCrawl, xmlScrape } from '../../../src'
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface' import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { isSafeBrowserExecutable } from '../../../src/validator'
class Playwright_DocumentLoaders implements INode { class Playwright_DocumentLoaders implements INode {
label: string label: string
@ -190,11 +191,17 @@ class Playwright_DocumentLoaders implements INode {
async function playwrightLoader(url: string): Promise<Document[] | undefined> { async function playwrightLoader(url: string): Promise<Document[] | undefined> {
try { try {
let docs = [] let docs = []
const executablePath = process.env.PLAYWRIGHT_EXECUTABLE_PATH
if (!isSafeBrowserExecutable(executablePath)) {
throw new Error(`Invalid or unsafe browser executable path: ${executablePath || 'undefined'}. `)
}
const config: PlaywrightWebBaseLoaderOptions = { const config: PlaywrightWebBaseLoaderOptions = {
launchOptions: { launchOptions: {
args: ['--no-sandbox'], args: ['--no-sandbox'],
headless: true, headless: true,
executablePath: process.env.PLAYWRIGHT_EXECUTABLE_FILE_PATH executablePath: executablePath
} }
} }
if (waitUntilGoToOption) { if (waitUntilGoToOption) {

View File

@ -6,6 +6,7 @@ import { omit } from 'lodash'
import { PuppeteerLifeCycleEvent } from 'puppeteer' import { PuppeteerLifeCycleEvent } from 'puppeteer'
import { handleEscapeCharacters, INodeOutputsValue, webCrawl, xmlScrape } from '../../../src' import { handleEscapeCharacters, INodeOutputsValue, webCrawl, xmlScrape } from '../../../src'
import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface' import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface'
import { isSafeBrowserExecutable } from '../../../src/validator'
class Puppeteer_DocumentLoaders implements INode { class Puppeteer_DocumentLoaders implements INode {
label: string label: string
@ -181,11 +182,17 @@ class Puppeteer_DocumentLoaders implements INode {
async function puppeteerLoader(url: string): Promise<Document[] | undefined> { async function puppeteerLoader(url: string): Promise<Document[] | undefined> {
try { try {
let docs: Document[] = [] let docs: Document[] = []
const executablePath = process.env.PUPPETEER_EXECUTABLE_PATH
if (!isSafeBrowserExecutable(executablePath)) {
throw new Error(`Invalid or unsafe browser executable path: ${executablePath || 'undefined'}. `)
}
const config: PuppeteerWebBaseLoaderOptions = { const config: PuppeteerWebBaseLoaderOptions = {
launchOptions: { launchOptions: {
args: ['--no-sandbox'], args: ['--no-sandbox'],
headless: 'new', headless: 'new',
executablePath: process.env.PUPPETEER_EXECUTABLE_FILE_PATH executablePath: executablePath
} }
} }
if (waitUntilGoToOption) { if (waitUntilGoToOption) {

View File

@ -1,8 +1,8 @@
import { Serializable } from '@langchain/core/load/serializable' import { Serializable } from '@langchain/core/load/serializable'
import { NodeFileStore } from 'langchain/stores/file/node'
import { isUnsafeFilePath, isWithinWorkspace } from './validator'
import * as path from 'path'
import * as fs from 'fs' import * as fs from 'fs'
import { NodeFileStore } from 'langchain/stores/file/node'
import * as path from 'path'
import { isSensitiveSystemPath, isUnsafeFilePath, isWithinWorkspace } from './validator'
/** /**
* Security configuration for file operations * Security configuration for file operations
@ -65,28 +65,50 @@ export class SecureFileStore extends Serializable {
throw new Error(`Workspace directory does not exist: ${this.config.workspacePath}`) throw new Error(`Workspace directory does not exist: ${this.config.workspacePath}`)
} }
// Validate that workspace path is not a sensitive system directory
// This prevents setting workspace to /usr/bin, /etc, etc. which would allow access to system files
if (isSensitiveSystemPath(path.normalize(this.config.workspacePath))) {
throw new Error(`Workspace path cannot be set to sensitive system directory: ${this.config.workspacePath}`)
}
// Initialize the underlying NodeFileStore with workspace path // Initialize the underlying NodeFileStore with workspace path
this.nodeFileStore = new NodeFileStore(this.config.workspacePath) this.nodeFileStore = new NodeFileStore(this.config.workspacePath)
} }
/** /**
* Validates a file path against security policies * Validates a file path against security policies
* @param filePath The raw user-provided file path (relative to workspace)
* @param resolvedPath The resolved absolute path (for extension validation)
*/ */
private validateFilePath(filePath: string): void { private validateFilePath(filePath: string, resolvedPath: string): void {
// Check for unsafe path patterns // Validate the raw user input for unsafe patterns (path traversal, absolute paths, etc.)
// This must be done on the raw input, not the resolved path, because isUnsafeFilePath
// is designed to detect absolute paths in user input
if (isUnsafeFilePath(filePath)) { if (isUnsafeFilePath(filePath)) {
throw new Error(`Unsafe file path detected: ${filePath}`) throw new Error(`Unsafe file path detected: ${filePath}`)
} }
// Enforce workspace boundaries if enabled // Enforce workspace boundaries if enabled (this handles path resolution internally)
if (this.config.enforceWorkspaceBoundaries) { if (this.config.enforceWorkspaceBoundaries) {
if (!isWithinWorkspace(filePath, this.config.workspacePath)) { if (!isWithinWorkspace(filePath, this.config.workspacePath)) {
throw new Error(`File path outside workspace boundaries: ${filePath}`) throw new Error(`File path outside workspace boundaries: ${filePath}`)
} }
} }
// Check file extension // Prevent access to Flowise internal files (any path containing .flowise)
const ext = path.extname(filePath).toLowerCase() const normalizedResolved = path.normalize(resolvedPath)
if (normalizedResolved.includes('.flowise')) {
throw new Error(`Access to Flowise internal files denied: ${filePath}`)
}
// Validate that the resolved path does not access sensitive system directories
// This prevents access to system files even if workspace is set to a system directory
if (isSensitiveSystemPath(normalizedResolved)) {
throw new Error(`Access to sensitive system directory denied: ${filePath}`)
}
// Check file extension on the resolved path to get the actual extension
const ext = path.extname(resolvedPath).toLowerCase()
// Check blocked extensions // Check blocked extensions
if (this.config.blockedExtensions.includes(ext)) { if (this.config.blockedExtensions.includes(ext)) {
@ -113,7 +135,10 @@ export class SecureFileStore extends Serializable {
* Reads a file with security validation * Reads a file with security validation
*/ */
async readFile(filePath: string): Promise<string> { async readFile(filePath: string): Promise<string> {
this.validateFilePath(filePath) // Resolve the full path for extension validation
const resolvedPath = path.resolve(this.config.workspacePath, filePath)
// Validate the raw user input (not the resolved path) to avoid false positives
this.validateFilePath(filePath, resolvedPath)
try { try {
return await this.nodeFileStore.readFile(filePath) return await this.nodeFileStore.readFile(filePath)
@ -127,12 +152,16 @@ export class SecureFileStore extends Serializable {
* Writes a file with security validation * Writes a file with security validation
*/ */
async writeFile(filePath: string, contents: string): Promise<void> { async writeFile(filePath: string, contents: string): Promise<void> {
this.validateFilePath(filePath)
this.validateFileSize(contents) this.validateFileSize(contents)
// Resolve the full path for extension validation and directory creation
const resolvedPath = path.resolve(this.config.workspacePath, filePath)
// Validate the raw user input (not the resolved path) to avoid false positives
this.validateFilePath(filePath, resolvedPath)
try { try {
// Ensure the directory exists // Ensure the directory exists
const dir = path.dirname(path.resolve(this.config.workspacePath, filePath)) const dir = path.dirname(resolvedPath)
if (!fs.existsSync(dir)) { if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true }) fs.mkdirSync(dir, { recursive: true })
} }

View File

@ -70,6 +70,35 @@ export const isUnsafeFilePath = (filePath: string): boolean => {
return dangerousPatterns.some((pattern) => pattern.test(filePath)) return dangerousPatterns.some((pattern) => pattern.test(filePath))
} }
/**
* Validates if a resolved path accesses sensitive system directories
* Uses pattern-based detection to identify known sensitive system directories
* at root level or one level deep, while allowing legitimate paths like /usr/src
* @param {string} resolvedPath The resolved absolute path to validate
* @returns {boolean} True if path accesses sensitive system directory, false otherwise
*/
export const isSensitiveSystemPath = (resolvedPath: string): boolean => {
if (!resolvedPath || typeof resolvedPath !== 'string') {
return false
}
// Pattern-based detection for known sensitive system directories:
// Blocks obvious system directories while allowing legitimate paths like /usr/src, /usr/local/src, /opt, etc.
// 1. At root level (e.g., /etc, /sys, /bin, /sbin) - one segment after root
// 2. One level deep (e.g., /etc/passwd, /sys/kernel, /var/log) - two segments total
// 3. Specific sensitive subdirectories (e.g., /var/log, /var/run) - two segments with specific parent
// 4. System binary directories (e.g., /usr/bin, /usr/sbin, /usr/local/bin) - prevents overwriting system executables
const sensitiveSystemPatterns = [
/^[/\\](etc|sys|proc|dev|boot|root|bin|sbin)([/\\]|$)/i, // Root level: /etc, /sys, /proc, /bin, /sbin, etc.
/^[/\\](etc|sys|proc|dev|boot|root|bin|sbin)[/\\][^/\\]*$/i, // One level deep: /etc/passwd, /sys/kernel, /bin/sh, etc.
/^[/\\]var[/\\](log|run|lib|spool|mail)([/\\]|$)/i, // Sensitive /var subdirectories: /var/log, /var/run, etc.
/^[/\\]usr[/\\](bin|sbin)([/\\]|$)/i, // System binary directories: /usr/bin, /usr/sbin
/^[/\\]usr[/\\]local[/\\](bin|sbin)([/\\]|$)/i // Local system binaries: /usr/local/bin, /usr/local/sbin
]
return sensitiveSystemPatterns.some((pattern) => pattern.test(resolvedPath))
}
/** /**
* Validates if a file path is within the allowed workspace boundaries * Validates if a file path is within the allowed workspace boundaries
* @param {string} filePath The file path to validate * @param {string} filePath The file path to validate
@ -102,3 +131,87 @@ export const isWithinWorkspace = (filePath: string, workspacePath: string): bool
return false return false
} }
} }
/**
* Validates if a browser executable path is safe to use
* Prevents arbitrary code execution through environment variable manipulation
* @param {string} executablePath The browser executable path to validate
* @returns {boolean} True if path is safe, false otherwise
*/
export const isSafeBrowserExecutable = (executablePath: string | undefined): boolean => {
if (!executablePath) {
return true // If not specified, let browser library use its default
}
if (typeof executablePath !== 'string' || executablePath.trim() === '') {
return false
}
const path = require('path')
const fs = require('fs')
try {
// Normalize the path
const normalizedPath = path.normalize(executablePath)
// Must be an absolute path
if (!path.isAbsolute(normalizedPath)) {
return false
}
// Allowed browser executable locations (system-managed only)
const allowedPaths = [
// Linux/Unix Chromium/Chrome paths
'/usr/bin/chromium',
'/usr/bin/chromium-browser',
'/usr/bin/google-chrome',
'/usr/bin/google-chrome-stable',
'/usr/bin/chrome',
'/snap/bin/chromium',
// macOS Chrome/Chromium paths
'/Applications/Google Chrome.app/Contents/MacOS/Google Chrome',
'/Applications/Chromium.app/Contents/MacOS/Chromium',
// Windows Chrome/Chromium paths (normalized with forward slashes)
'C:/Program Files/Google/Chrome/Application/chrome.exe',
'C:/Program Files (x86)/Google/Chrome/Application/chrome.exe',
'C:/Program Files/Chromium/Application/chrome.exe',
// Firefox paths
'/usr/bin/firefox',
'/Applications/Firefox.app/Contents/MacOS/firefox',
'C:/Program Files/Mozilla Firefox/firefox.exe',
'C:/Program Files (x86)/Mozilla Firefox/firefox.exe'
]
// Normalize allowed paths for comparison (handle Windows backslashes)
const normalizedAllowedPaths = allowedPaths.map((p) => path.normalize(p))
// Check if the path exactly matches one of the allowed paths
const isAllowedPath = normalizedAllowedPaths.some((allowedPath) => normalizedPath.toLowerCase() === allowedPath.toLowerCase())
if (!isAllowedPath) {
return false
}
// Additional security: Verify file exists and is executable (where applicable)
// This prevents using a path before malicious file is written
try {
if (fs.existsSync(normalizedPath)) {
const stats = fs.statSync(normalizedPath)
// On Unix-like systems, check if file is executable
if (process.platform !== 'win32') {
// Check if file has execute permissions (using bitwise AND)
// 0o111 checks for execute permission for user, group, or others
return (stats.mode & 0o111) !== 0
}
return stats.isFile()
}
// If file doesn't exist, reject it (prevents race conditions)
return false
} catch {
return false
}
} catch (error) {
// If any error occurs during validation, deny access
return false
}
}