- Added `secureFetch` and `checkDenyList` functions from `httpSecurity` to enhance security in web crawling and link fetching processes.
- Updated relevant functions to utilize these new security measures, ensuring safer data handling.
This commit is contained in:
parent
4987a2880d
commit
73500ad09c
|
|
@ -13,3 +13,4 @@ export * from '../evaluation/EvaluationRunner'
|
|||
export * from './followUpPrompts'
|
||||
export * from './validator'
|
||||
export * from './agentflowv2Generator'
|
||||
export * from './httpSecurity'
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ import { TextSplitter } from 'langchain/text_splitter'
|
|||
import { DocumentLoader } from 'langchain/document_loaders/base'
|
||||
import { NodeVM } from '@flowiseai/nodevm'
|
||||
import { Sandbox } from '@e2b/code-interpreter'
|
||||
import { secureFetch, checkDenyList } from './httpSecurity'
|
||||
import JSON5 from 'json5'
|
||||
|
||||
export const numberOrExpressionRegex = '^(\\d+\\.?\\d*|{{.*}})$' //return true if string consists only numbers OR expression {{}}
|
||||
|
|
@ -422,7 +423,7 @@ async function crawl(baseURL: string, currentURL: string, pages: string[], limit
|
|||
|
||||
if (process.env.DEBUG === 'true') console.info(`actively crawling ${currentURL}`)
|
||||
try {
|
||||
const resp = await fetch(currentURL)
|
||||
const resp = await secureFetch(currentURL)
|
||||
|
||||
if (resp.status > 399) {
|
||||
if (process.env.DEBUG === 'true') console.error(`error in fetch with status code: ${resp.status}, on page: ${currentURL}`)
|
||||
|
|
@ -453,6 +454,8 @@ async function crawl(baseURL: string, currentURL: string, pages: string[], limit
|
|||
* @returns {Promise<string[]>}
|
||||
*/
|
||||
export async function webCrawl(stringURL: string, limit: number): Promise<string[]> {
|
||||
await checkDenyList(stringURL)
|
||||
|
||||
const URLObj = new URL(stringURL)
|
||||
const modifyURL = stringURL.slice(-1) === '/' ? stringURL.slice(0, -1) : stringURL
|
||||
return await crawl(URLObj.protocol + '//' + URLObj.hostname, modifyURL, [], limit)
|
||||
|
|
@ -476,7 +479,7 @@ export async function xmlScrape(currentURL: string, limit: number): Promise<stri
|
|||
let urls: string[] = []
|
||||
if (process.env.DEBUG === 'true') console.info(`actively scarping ${currentURL}`)
|
||||
try {
|
||||
const resp = await fetch(currentURL)
|
||||
const resp = await secureFetch(currentURL)
|
||||
|
||||
if (resp.status > 399) {
|
||||
if (process.env.DEBUG === 'true') console.error(`error in fetch with status code: ${resp.status}, on page: ${currentURL}`)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
import { webCrawl, xmlScrape } from 'flowise-components'
|
||||
import { webCrawl, xmlScrape, checkDenyList } from 'flowise-components'
|
||||
import { StatusCodes } from 'http-status-codes'
|
||||
import { InternalFlowiseError } from '../../errors/internalFlowiseError'
|
||||
import { getErrorMessage } from '../../errors/utils'
|
||||
|
|
@ -6,6 +6,8 @@ import { getErrorMessage } from '../../errors/utils'
|
|||
const getAllLinks = async (requestUrl: string, relativeLinksMethod: string, queryLimit: string): Promise<any> => {
|
||||
try {
|
||||
const url = decodeURIComponent(requestUrl)
|
||||
await checkDenyList(url)
|
||||
|
||||
if (!relativeLinksMethod) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||||
|
|
|
|||
Loading…
Reference in New Issue