- Added `secureFetch` and `checkDenyList` functions from `httpSecurity` to enhance security in web crawling and link fetching processes.
- Updated relevant functions to utilize these new security measures, ensuring safer data handling.
This commit is contained in:
parent
4987a2880d
commit
73500ad09c
|
|
@ -13,3 +13,4 @@ export * from '../evaluation/EvaluationRunner'
|
||||||
export * from './followUpPrompts'
|
export * from './followUpPrompts'
|
||||||
export * from './validator'
|
export * from './validator'
|
||||||
export * from './agentflowv2Generator'
|
export * from './agentflowv2Generator'
|
||||||
|
export * from './httpSecurity'
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,7 @@ import { TextSplitter } from 'langchain/text_splitter'
|
||||||
import { DocumentLoader } from 'langchain/document_loaders/base'
|
import { DocumentLoader } from 'langchain/document_loaders/base'
|
||||||
import { NodeVM } from '@flowiseai/nodevm'
|
import { NodeVM } from '@flowiseai/nodevm'
|
||||||
import { Sandbox } from '@e2b/code-interpreter'
|
import { Sandbox } from '@e2b/code-interpreter'
|
||||||
|
import { secureFetch, checkDenyList } from './httpSecurity'
|
||||||
import JSON5 from 'json5'
|
import JSON5 from 'json5'
|
||||||
|
|
||||||
export const numberOrExpressionRegex = '^(\\d+\\.?\\d*|{{.*}})$' //return true if string consists only numbers OR expression {{}}
|
export const numberOrExpressionRegex = '^(\\d+\\.?\\d*|{{.*}})$' //return true if string consists only numbers OR expression {{}}
|
||||||
|
|
@ -422,7 +423,7 @@ async function crawl(baseURL: string, currentURL: string, pages: string[], limit
|
||||||
|
|
||||||
if (process.env.DEBUG === 'true') console.info(`actively crawling ${currentURL}`)
|
if (process.env.DEBUG === 'true') console.info(`actively crawling ${currentURL}`)
|
||||||
try {
|
try {
|
||||||
const resp = await fetch(currentURL)
|
const resp = await secureFetch(currentURL)
|
||||||
|
|
||||||
if (resp.status > 399) {
|
if (resp.status > 399) {
|
||||||
if (process.env.DEBUG === 'true') console.error(`error in fetch with status code: ${resp.status}, on page: ${currentURL}`)
|
if (process.env.DEBUG === 'true') console.error(`error in fetch with status code: ${resp.status}, on page: ${currentURL}`)
|
||||||
|
|
@ -453,6 +454,8 @@ async function crawl(baseURL: string, currentURL: string, pages: string[], limit
|
||||||
* @returns {Promise<string[]>}
|
* @returns {Promise<string[]>}
|
||||||
*/
|
*/
|
||||||
export async function webCrawl(stringURL: string, limit: number): Promise<string[]> {
|
export async function webCrawl(stringURL: string, limit: number): Promise<string[]> {
|
||||||
|
await checkDenyList(stringURL)
|
||||||
|
|
||||||
const URLObj = new URL(stringURL)
|
const URLObj = new URL(stringURL)
|
||||||
const modifyURL = stringURL.slice(-1) === '/' ? stringURL.slice(0, -1) : stringURL
|
const modifyURL = stringURL.slice(-1) === '/' ? stringURL.slice(0, -1) : stringURL
|
||||||
return await crawl(URLObj.protocol + '//' + URLObj.hostname, modifyURL, [], limit)
|
return await crawl(URLObj.protocol + '//' + URLObj.hostname, modifyURL, [], limit)
|
||||||
|
|
@ -476,7 +479,7 @@ export async function xmlScrape(currentURL: string, limit: number): Promise<stri
|
||||||
let urls: string[] = []
|
let urls: string[] = []
|
||||||
if (process.env.DEBUG === 'true') console.info(`actively scarping ${currentURL}`)
|
if (process.env.DEBUG === 'true') console.info(`actively scarping ${currentURL}`)
|
||||||
try {
|
try {
|
||||||
const resp = await fetch(currentURL)
|
const resp = await secureFetch(currentURL)
|
||||||
|
|
||||||
if (resp.status > 399) {
|
if (resp.status > 399) {
|
||||||
if (process.env.DEBUG === 'true') console.error(`error in fetch with status code: ${resp.status}, on page: ${currentURL}`)
|
if (process.env.DEBUG === 'true') console.error(`error in fetch with status code: ${resp.status}, on page: ${currentURL}`)
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
import { webCrawl, xmlScrape } from 'flowise-components'
|
import { webCrawl, xmlScrape, checkDenyList } from 'flowise-components'
|
||||||
import { StatusCodes } from 'http-status-codes'
|
import { StatusCodes } from 'http-status-codes'
|
||||||
import { InternalFlowiseError } from '../../errors/internalFlowiseError'
|
import { InternalFlowiseError } from '../../errors/internalFlowiseError'
|
||||||
import { getErrorMessage } from '../../errors/utils'
|
import { getErrorMessage } from '../../errors/utils'
|
||||||
|
|
@ -6,6 +6,8 @@ import { getErrorMessage } from '../../errors/utils'
|
||||||
const getAllLinks = async (requestUrl: string, relativeLinksMethod: string, queryLimit: string): Promise<any> => {
|
const getAllLinks = async (requestUrl: string, relativeLinksMethod: string, queryLimit: string): Promise<any> => {
|
||||||
try {
|
try {
|
||||||
const url = decodeURIComponent(requestUrl)
|
const url = decodeURIComponent(requestUrl)
|
||||||
|
await checkDenyList(url)
|
||||||
|
|
||||||
if (!relativeLinksMethod) {
|
if (!relativeLinksMethod) {
|
||||||
throw new InternalFlowiseError(
|
throw new InternalFlowiseError(
|
||||||
StatusCodes.INTERNAL_SERVER_ERROR,
|
StatusCodes.INTERNAL_SERVER_ERROR,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue