- Added `secureFetch` and `checkDenyList` functions from `httpSecurity` to enhance security in web crawling and link fetching processes.

- Updated relevant functions to utilize these new security measures, ensuring safer data handling.
This commit is contained in:
Henry 2025-09-12 16:27:28 +01:00
parent 4987a2880d
commit 73500ad09c
3 changed files with 9 additions and 3 deletions

View File

@ -13,3 +13,4 @@ export * from '../evaluation/EvaluationRunner'
export * from './followUpPrompts'
export * from './validator'
export * from './agentflowv2Generator'
export * from './httpSecurity'

View File

@ -18,6 +18,7 @@ import { TextSplitter } from 'langchain/text_splitter'
import { DocumentLoader } from 'langchain/document_loaders/base'
import { NodeVM } from '@flowiseai/nodevm'
import { Sandbox } from '@e2b/code-interpreter'
import { secureFetch, checkDenyList } from './httpSecurity'
import JSON5 from 'json5'
export const numberOrExpressionRegex = '^(\\d+\\.?\\d*|{{.*}})$' //return true if string consists only numbers OR expression {{}}
@ -422,7 +423,7 @@ async function crawl(baseURL: string, currentURL: string, pages: string[], limit
if (process.env.DEBUG === 'true') console.info(`actively crawling ${currentURL}`)
try {
const resp = await fetch(currentURL)
const resp = await secureFetch(currentURL)
if (resp.status > 399) {
if (process.env.DEBUG === 'true') console.error(`error in fetch with status code: ${resp.status}, on page: ${currentURL}`)
@ -453,6 +454,8 @@ async function crawl(baseURL: string, currentURL: string, pages: string[], limit
* @returns {Promise<string[]>}
*/
export async function webCrawl(stringURL: string, limit: number): Promise<string[]> {
await checkDenyList(stringURL)
const URLObj = new URL(stringURL)
const modifyURL = stringURL.slice(-1) === '/' ? stringURL.slice(0, -1) : stringURL
return await crawl(URLObj.protocol + '//' + URLObj.hostname, modifyURL, [], limit)
@ -476,7 +479,7 @@ export async function xmlScrape(currentURL: string, limit: number): Promise<stri
let urls: string[] = []
if (process.env.DEBUG === 'true') console.info(`actively scarping ${currentURL}`)
try {
const resp = await fetch(currentURL)
const resp = await secureFetch(currentURL)
if (resp.status > 399) {
if (process.env.DEBUG === 'true') console.error(`error in fetch with status code: ${resp.status}, on page: ${currentURL}`)

View File

@ -1,4 +1,4 @@
import { webCrawl, xmlScrape } from 'flowise-components'
import { webCrawl, xmlScrape, checkDenyList } from 'flowise-components'
import { StatusCodes } from 'http-status-codes'
import { InternalFlowiseError } from '../../errors/internalFlowiseError'
import { getErrorMessage } from '../../errors/utils'
@ -6,6 +6,8 @@ import { getErrorMessage } from '../../errors/utils'
const getAllLinks = async (requestUrl: string, relativeLinksMethod: string, queryLimit: string): Promise<any> => {
try {
const url = decodeURIComponent(requestUrl)
await checkDenyList(url)
if (!relativeLinksMethod) {
throw new InternalFlowiseError(
StatusCodes.INTERNAL_SERVER_ERROR,