diff --git a/packages/components/credentials/ApifyApi.credential.ts b/packages/components/credentials/ApifyApi.credential.ts
new file mode 100644
index 000000000..c961fd385
--- /dev/null
+++ b/packages/components/credentials/ApifyApi.credential.ts
@@ -0,0 +1,26 @@
+import { INodeParams, INodeCredential } from '../src/Interface'
+
+class ApifyApiCredential implements INodeCredential {
+ label: string
+ name: string
+ version: number
+ description: string
+ inputs: INodeParams[]
+
+ constructor() {
+ this.label = 'Apify API'
+ this.name = 'apifyApi'
+ this.version = 1.0
+ this.description =
+ 'You can find the Apify API token on your Apify account page.'
+ this.inputs = [
+ {
+ label: 'Apify API',
+ name: 'apifyApiToken',
+ type: 'password'
+ }
+ ]
+ }
+}
+
+module.exports = { credClass: ApifyApiCredential }
diff --git a/packages/components/nodes/documentloaders/ApifyWebsiteContentCrawler/ApifyWebsiteContentCrawler.ts b/packages/components/nodes/documentloaders/ApifyWebsiteContentCrawler/ApifyWebsiteContentCrawler.ts
new file mode 100644
index 000000000..a5e6a6e03
--- /dev/null
+++ b/packages/components/nodes/documentloaders/ApifyWebsiteContentCrawler/ApifyWebsiteContentCrawler.ts
@@ -0,0 +1,139 @@
+import { INode, INodeData, INodeParams, ICommonObject } from '../../../src/Interface'
+import { getCredentialData, getCredentialParam } from '../../../src/utils'
+import { TextSplitter } from 'langchain/text_splitter'
+import { ApifyDatasetLoader } from 'langchain/document_loaders/web/apify_dataset'
+import { Document } from 'langchain/document'
+
+class ApifyWebsiteContentCrawler_DocumentLoaders implements INode {
+ label: string
+ name: string
+ description: string
+ type: string
+ icon: string
+ version: number
+ category: string
+ baseClasses: string[]
+ inputs: INodeParams[]
+ credential: INodeParams
+
+ constructor() {
+ this.label = 'Apify Website Content Crawler'
+ this.name = 'apifyWebsiteContentCrawler'
+ this.type = 'Document'
+ this.icon = 'apify-symbol-transparent.svg'
+ this.version = 1.0
+ this.category = 'Document Loaders'
+ this.description = 'Load data from Apify Website Content Crawler'
+ this.baseClasses = [this.type]
+ this.inputs = [
+ {
+ label: 'Start URLs',
+ name: 'urls',
+ type: 'string',
+ description: 'One or more URLs of pages where the crawler will start, separated by commas.',
+ placeholder: 'https://js.langchain.com/docs/'
+ },
+ {
+ label: 'Crawler type',
+ type: 'options',
+ name: 'crawlerType',
+ options: [
+ {
+ label: 'Headless web browser (Chrome+Playwright)',
+ name: 'playwright:chrome'
+ },
+ {
+ label: 'Stealthy web browser (Firefox+Playwright)',
+ name: 'playwright:firefox'
+ },
+ {
+ label: 'Raw HTTP client (Cheerio)',
+ name: 'cheerio'
+ },
+ {
+ label: 'Raw HTTP client with JavaScript execution (JSDOM) [experimental]',
+ name: 'jsdom'
+ }
+ ],
+ description:
+ 'Select the crawling engine, see documentation for additional information.',
+ default: 'playwright:firefox'
+ },
+ {
+ label: 'Max crawling depth',
+ name: 'maxCrawlDepth',
+ type: 'number',
+ optional: true,
+ default: 1
+ },
+ {
+ label: 'Max crawl pages',
+ name: 'maxCrawlPages',
+ type: 'number',
+ optional: true,
+ default: 3
+ },
+ {
+ label: 'Additional input',
+ name: 'additionalInput',
+ type: 'json',
+ default: JSON.stringify({}),
+ description:
+ 'For additional input options for the crawler see documentation.',
+ optional: true
+ },
+ {
+ label: 'Text Splitter',
+ name: 'textSplitter',
+ type: 'TextSplitter',
+ optional: true
+ }
+ ]
+ this.credential = {
+ label: 'Connect Apify API',
+ name: 'credential',
+ type: 'credential',
+ credentialNames: ['apifyApi']
+ }
+ }
+
+ async init(nodeData: INodeData, _: string, options: ICommonObject): Promise {
+ const textSplitter = nodeData.inputs?.textSplitter as TextSplitter
+
+ // Get input options and merge with additional input
+ const urls = nodeData.inputs?.urls as string
+ const crawlerType = nodeData.inputs?.crawlerType as string
+ const maxCrawlDepth = nodeData.inputs?.maxCrawlDepth as string
+ const maxCrawlPages = nodeData.inputs?.maxCrawlPages as string
+ const additionalInput =
+ typeof nodeData.inputs?.additionalInput === 'object'
+ ? nodeData.inputs?.additionalInput
+ : JSON.parse(nodeData.inputs?.additionalInput as string)
+ const input = {
+ startUrls: urls.split(',').map((url) => ({ url: url.trim() })),
+ crawlerType,
+ maxCrawlDepth: parseInt(maxCrawlDepth, 10),
+ maxCrawlPages: parseInt(maxCrawlPages, 10),
+ ...additionalInput
+ }
+
+ // Get Apify API token from credential data
+ const credentialData = await getCredentialData(nodeData.credential ?? '', options)
+ const apifyApiToken = getCredentialParam('apifyApiToken', credentialData, nodeData)
+
+ const loader = await ApifyDatasetLoader.fromActorCall('apify/website-content-crawler', input, {
+ datasetMappingFunction: (item) =>
+ new Document({
+ pageContent: (item.text || '') as string,
+ metadata: { source: item.url }
+ }),
+ clientOptions: {
+ token: apifyApiToken
+ }
+ })
+
+ return textSplitter ? loader.loadAndSplit(textSplitter) : loader.load()
+ }
+}
+
+module.exports = { nodeClass: ApifyWebsiteContentCrawler_DocumentLoaders }
diff --git a/packages/components/nodes/documentloaders/ApifyWebsiteContentCrawler/apify-symbol-transparent.svg b/packages/components/nodes/documentloaders/ApifyWebsiteContentCrawler/apify-symbol-transparent.svg
new file mode 100644
index 000000000..423a3328d
--- /dev/null
+++ b/packages/components/nodes/documentloaders/ApifyWebsiteContentCrawler/apify-symbol-transparent.svg
@@ -0,0 +1 @@
+
\ No newline at end of file
diff --git a/packages/components/package.json b/packages/components/package.json
index 7f55010d7..6a9fc437f 100644
--- a/packages/components/package.json
+++ b/packages/components/package.json
@@ -26,6 +26,7 @@
"@qdrant/js-client-rest": "^1.2.2",
"@supabase/supabase-js": "^2.29.0",
"@types/js-yaml": "^4.0.5",
+ "apify-client": "^2.7.1",
"@types/jsdom": "^21.1.1",
"axios": "^0.27.2",
"cheerio": "^1.0.0-rc.12",