diff --git a/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts b/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts index 2f0bd8b64..3eba0ece4 100644 --- a/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts +++ b/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts @@ -1,4 +1,4 @@ -import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface' import { TextSplitter } from 'langchain/text_splitter' import { CheerioWebBaseLoader, WebBaseLoaderParams } from 'langchain/document_loaders/web/cheerio' import { test } from 'linkifyjs' @@ -87,7 +87,7 @@ class Cheerio_DocumentLoaders implements INode { ] } - async init(nodeData: INodeData): Promise { + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const metadata = nodeData.inputs?.metadata const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string @@ -119,13 +119,13 @@ class Cheerio_DocumentLoaders implements INode { } return docs } catch (err) { - if (process.env.DEBUG === 'true') console.error(`error in CheerioWebBaseLoader: ${err.message}, on page: ${url}`) + if (process.env.DEBUG === 'true') options.logger.error(`error in CheerioWebBaseLoader: ${err.message}, on page: ${url}`) } } let docs = [] if (relativeLinksMethod) { - if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`) + if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`) if (!limit) limit = 10 else if (limit < 0) throw new Error('Limit cannot be less than 0') const pages: string[] = @@ -134,14 +134,15 @@ class Cheerio_DocumentLoaders implements INode { : relativeLinksMethod === 'webCrawl' ? await webCrawl(url, limit) : await xmlScrape(url, limit) - if (process.env.DEBUG === 'true') console.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`) + if (process.env.DEBUG === 'true') options.logger.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`) if (!pages || pages.length === 0) throw new Error('No relative links found') for (const page of pages) { docs.push(...(await cheerioLoader(page))) } - if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`) + if (process.env.DEBUG === 'true') options.logger.info(`Finish ${relativeLinksMethod}`) } else if (selectedLinks && selectedLinks.length > 0) { - if (process.env.DEBUG === 'true') console.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`) + if (process.env.DEBUG === 'true') + options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`) for (const page of selectedLinks) { docs.push(...(await cheerioLoader(page))) } diff --git a/packages/components/nodes/documentloaders/Playwright/Playwright.ts b/packages/components/nodes/documentloaders/Playwright/Playwright.ts index cb27f1c4d..2de166ce4 100644 --- a/packages/components/nodes/documentloaders/Playwright/Playwright.ts +++ b/packages/components/nodes/documentloaders/Playwright/Playwright.ts @@ -1,4 +1,4 @@ -import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface' import { TextSplitter } from 'langchain/text_splitter' import { Browser, Page, PlaywrightWebBaseLoader, PlaywrightWebBaseLoaderOptions } from 'langchain/document_loaders/web/playwright' import { test } from 'linkifyjs' @@ -115,7 +115,7 @@ class Playwright_DocumentLoaders implements INode { ] } - async init(nodeData: INodeData): Promise { + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const metadata = nodeData.inputs?.metadata const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string @@ -160,13 +160,13 @@ class Playwright_DocumentLoaders implements INode { } return docs } catch (err) { - if (process.env.DEBUG === 'true') console.error(`error in PlaywrightWebBaseLoader: ${err.message}, on page: ${url}`) + if (process.env.DEBUG === 'true') options.logger.error(`error in PlaywrightWebBaseLoader: ${err.message}, on page: ${url}`) } } let docs = [] if (relativeLinksMethod) { - if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`) + if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`) if (!limit) limit = 10 else if (limit < 0) throw new Error('Limit cannot be less than 0') const pages: string[] = @@ -175,14 +175,15 @@ class Playwright_DocumentLoaders implements INode { : relativeLinksMethod === 'webCrawl' ? await webCrawl(url, limit) : await xmlScrape(url, limit) - if (process.env.DEBUG === 'true') console.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`) + if (process.env.DEBUG === 'true') options.logger.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`) if (!pages || pages.length === 0) throw new Error('No relative links found') for (const page of pages) { docs.push(...(await playwrightLoader(page))) } - if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`) + if (process.env.DEBUG === 'true') options.logger.info(`Finish ${relativeLinksMethod}`) } else if (selectedLinks && selectedLinks.length > 0) { - if (process.env.DEBUG === 'true') console.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`) + if (process.env.DEBUG === 'true') + options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`) for (const page of selectedLinks) { docs.push(...(await playwrightLoader(page))) } diff --git a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts index fe7d4f8a8..3d28f3107 100644 --- a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts +++ b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts @@ -1,4 +1,4 @@ -import { INode, INodeData, INodeParams } from '../../../src/Interface' +import { ICommonObject, INode, INodeData, INodeParams } from '../../../src/Interface' import { TextSplitter } from 'langchain/text_splitter' import { Browser, Page, PuppeteerWebBaseLoader, PuppeteerWebBaseLoaderOptions } from 'langchain/document_loaders/web/puppeteer' import { test } from 'linkifyjs' @@ -116,7 +116,7 @@ class Puppeteer_DocumentLoaders implements INode { ] } - async init(nodeData: INodeData): Promise { + async init(nodeData: INodeData, _: string, options: ICommonObject): Promise { const textSplitter = nodeData.inputs?.textSplitter as TextSplitter const metadata = nodeData.inputs?.metadata const relativeLinksMethod = nodeData.inputs?.relativeLinksMethod as string @@ -161,13 +161,13 @@ class Puppeteer_DocumentLoaders implements INode { } return docs } catch (err) { - if (process.env.DEBUG === 'true') console.error(`error in PuppeteerWebBaseLoader: ${err.message}, on page: ${url}`) + if (process.env.DEBUG === 'true') options.logger.error(`error in PuppeteerWebBaseLoader: ${err.message}, on page: ${url}`) } } let docs = [] if (relativeLinksMethod) { - if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`) + if (process.env.DEBUG === 'true') options.logger.info(`Start ${relativeLinksMethod}`) if (!limit) limit = 10 else if (limit < 0) throw new Error('Limit cannot be less than 0') const pages: string[] = @@ -176,14 +176,15 @@ class Puppeteer_DocumentLoaders implements INode { : relativeLinksMethod === 'webCrawl' ? await webCrawl(url, limit) : await xmlScrape(url, limit) - if (process.env.DEBUG === 'true') console.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`) + if (process.env.DEBUG === 'true') options.logger.info(`pages: ${JSON.stringify(pages)}, length: ${pages.length}`) if (!pages || pages.length === 0) throw new Error('No relative links found') for (const page of pages) { docs.push(...(await puppeteerLoader(page))) } - if (process.env.DEBUG === 'true') console.info(`Finish ${relativeLinksMethod}`) + if (process.env.DEBUG === 'true') options.logger.info(`Finish ${relativeLinksMethod}`) } else if (selectedLinks && selectedLinks.length > 0) { - if (process.env.DEBUG === 'true') console.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`) + if (process.env.DEBUG === 'true') + options.logger.info(`pages: ${JSON.stringify(selectedLinks)}, length: ${selectedLinks.length}`) for (const page of selectedLinks) { docs.push(...(await puppeteerLoader(page))) }