From 73500ad09cbb0a97f561f73f391e52356371cec7 Mon Sep 17 00:00:00 2001 From: Henry Date: Fri, 12 Sep 2025 16:27:28 +0100 Subject: [PATCH] - Added `secureFetch` and `checkDenyList` functions from `httpSecurity` to enhance security in web crawling and link fetching processes. - Updated relevant functions to utilize these new security measures, ensuring safer data handling. --- packages/components/src/index.ts | 1 + packages/components/src/utils.ts | 7 +++++-- packages/server/src/services/fetch-links/index.ts | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/packages/components/src/index.ts b/packages/components/src/index.ts index f2dc56404..6b9f4cced 100644 --- a/packages/components/src/index.ts +++ b/packages/components/src/index.ts @@ -13,3 +13,4 @@ export * from '../evaluation/EvaluationRunner' export * from './followUpPrompts' export * from './validator' export * from './agentflowv2Generator' +export * from './httpSecurity' diff --git a/packages/components/src/utils.ts b/packages/components/src/utils.ts index 01e60e82e..7b0c4a25d 100644 --- a/packages/components/src/utils.ts +++ b/packages/components/src/utils.ts @@ -18,6 +18,7 @@ import { TextSplitter } from 'langchain/text_splitter' import { DocumentLoader } from 'langchain/document_loaders/base' import { NodeVM } from '@flowiseai/nodevm' import { Sandbox } from '@e2b/code-interpreter' +import { secureFetch, checkDenyList } from './httpSecurity' import JSON5 from 'json5' export const numberOrExpressionRegex = '^(\\d+\\.?\\d*|{{.*}})$' //return true if string consists only numbers OR expression {{}} @@ -422,7 +423,7 @@ async function crawl(baseURL: string, currentURL: string, pages: string[], limit if (process.env.DEBUG === 'true') console.info(`actively crawling ${currentURL}`) try { - const resp = await fetch(currentURL) + const resp = await secureFetch(currentURL) if (resp.status > 399) { if (process.env.DEBUG === 'true') console.error(`error in fetch with status code: ${resp.status}, on page: ${currentURL}`) @@ -453,6 +454,8 @@ async function crawl(baseURL: string, currentURL: string, pages: string[], limit * @returns {Promise} */ export async function webCrawl(stringURL: string, limit: number): Promise { + await checkDenyList(stringURL) + const URLObj = new URL(stringURL) const modifyURL = stringURL.slice(-1) === '/' ? stringURL.slice(0, -1) : stringURL return await crawl(URLObj.protocol + '//' + URLObj.hostname, modifyURL, [], limit) @@ -476,7 +479,7 @@ export async function xmlScrape(currentURL: string, limit: number): Promise 399) { if (process.env.DEBUG === 'true') console.error(`error in fetch with status code: ${resp.status}, on page: ${currentURL}`) diff --git a/packages/server/src/services/fetch-links/index.ts b/packages/server/src/services/fetch-links/index.ts index 53c6e94e9..cf5551512 100644 --- a/packages/server/src/services/fetch-links/index.ts +++ b/packages/server/src/services/fetch-links/index.ts @@ -1,4 +1,4 @@ -import { webCrawl, xmlScrape } from 'flowise-components' +import { webCrawl, xmlScrape, checkDenyList } from 'flowise-components' import { StatusCodes } from 'http-status-codes' import { InternalFlowiseError } from '../../errors/internalFlowiseError' import { getErrorMessage } from '../../errors/utils' @@ -6,6 +6,8 @@ import { getErrorMessage } from '../../errors/utils' const getAllLinks = async (requestUrl: string, relativeLinksMethod: string, queryLimit: string): Promise => { try { const url = decodeURIComponent(requestUrl) + await checkDenyList(url) + if (!relativeLinksMethod) { throw new InternalFlowiseError( StatusCodes.INTERNAL_SERVER_ERROR,