add description and modify default limit to 10 if empty

2023-07-09 10:32:05 +08:00 · 2023-07-09 10:32:05 +08:00 · d53522a0a8
parent 251a55972c
commit d53522a0a8
3 changed files with 30 additions and 21 deletions
--- a/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts
+++ b/packages/components/nodes/documentloaders/Cheerio/Cheerio.ts
@ -38,28 +38,31 @@ class Cheerio_DocumentLoaders implements INode {
                label: 'Get Relative Links Method',
                name: 'relativeLinksMethod',
                type: 'options',
+                description: 'Select a method to retrieve relative links',
                options: [
                    {
                        label: 'Web Crawl',
-                        name: 'webCrawl'
+                        name: 'webCrawl',
+                        description: 'Crawl relative links from HTML URL'
                    },
                    {
                        label: 'Scrape XML Sitemap',
-                        name: 'scrapeXMLSitemap'
+                        name: 'scrapeXMLSitemap',
+                        description: 'Scrape relative links from XML sitemap URL'
                    }
                ],
                optional: true,
                additionalParams: true
            },
            {
-                label: 'Crawl/Scrape Links Limit',
+                label: 'Get Relative Links Limit',
                name: 'limit',
                type: 'number',
-                default: 10,
                optional: true,
                additionalParams: true,
-                description: 'Set 0 to crawl/scrape all relative links',
-                warning: `Scraping all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc) `
+                description:
+                    'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
+                warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
            },
            {
                label: 'Metadata',
@ -101,7 +104,7 @@ class Cheerio_DocumentLoaders implements INode {
        let docs = []
        if (relativeLinksMethod) {
            if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
-            if (!limit) throw new Error('Please set a limit to crawl/scrape')
+            if (!limit) limit = '10'
            else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
            const pages: string[] =
                relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
--- a/packages/components/nodes/documentloaders/Playwright/Playwright.ts
+++ b/packages/components/nodes/documentloaders/Playwright/Playwright.ts
@ -38,28 +38,31 @@ class Playwright_DocumentLoaders implements INode {
                label: 'Get Relative Links Method',
                name: 'relativeLinksMethod',
                type: 'options',
+                description: 'Select a method to retrieve relative links',
                options: [
                    {
                        label: 'Web Crawl',
-                        name: 'webCrawl'
+                        name: 'webCrawl',
+                        description: 'Crawl relative links from HTML URL'
                    },
                    {
                        label: 'Scrape XML Sitemap',
-                        name: 'scrapeXMLSitemap'
+                        name: 'scrapeXMLSitemap',
+                        description: 'Scrape relative links from XML sitemap URL'
                    }
                ],
                optional: true,
                additionalParams: true
            },
            {
-                label: 'Crawl/Scrape Links Limit',
+                label: 'Get Relative Links Limit',
                name: 'limit',
                type: 'number',
-                default: 10,
                optional: true,
                additionalParams: true,
-                description: 'Set 0 to crawl/scrape all relative links',
-                warning: `Scraping all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc) `
+                description:
+                    'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
+                warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
            },
            {
                label: 'Metadata',
@ -101,7 +104,7 @@ class Playwright_DocumentLoaders implements INode {
        let docs = []
        if (relativeLinksMethod) {
            if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
-            if (!limit) throw new Error('Please set a limit to crawl/scrape')
+            if (!limit) limit = '10'
            else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
            const pages: string[] =
                relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))
--- a/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts
+++ b/packages/components/nodes/documentloaders/Puppeteer/Puppeteer.ts
@ -38,28 +38,31 @@ class Puppeteer_DocumentLoaders implements INode {
                label: 'Get Relative Links Method',
                name: 'relativeLinksMethod',
                type: 'options',
+                description: 'Select a method to retrieve relative links',
                options: [
                    {
                        label: 'Web Crawl',
-                        name: 'webCrawl'
+                        name: 'webCrawl',
+                        description: 'Crawl relative links from HTML URL'
                    },
                    {
                        label: 'Scrape XML Sitemap',
-                        name: 'scrapeXMLSitemap'
+                        name: 'scrapeXMLSitemap',
+                        description: 'Scrape relative links from XML sitemap URL'
                    }
                ],
                optional: true,
                additionalParams: true
            },
            {
-                label: 'Crawl/Scrape Links Limit',
+                label: 'Get Relative Links Limit',
                name: 'limit',
                type: 'number',
-                default: 10,
                optional: true,
                additionalParams: true,
-                description: 'Set 0 to crawl/scrape all relative links',
-                warning: `Scraping all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc) `
+                description:
+                    'Only used when "Get Relative Links Method" is selected. Set 0 to retrieve all relative links, default limit is 10.',
+                warning: `Retreiving all links might take long time, and all links will be upserted again if the flow's state changed (eg: different URL, chunk size, etc)`
            },
            {
                label: 'Metadata',
@ -106,7 +109,7 @@ class Puppeteer_DocumentLoaders implements INode {
        let docs = []
        if (relativeLinksMethod) {
            if (process.env.DEBUG === 'true') console.info(`Start ${relativeLinksMethod}`)
-            if (!limit) throw new Error('Please set a limit to crawl/scrape')
+            if (!limit) limit = '10'
            else if (parseInt(limit) < 0) throw new Error('Limit cannot be less than 0')
            const pages: string[] =
                relativeLinksMethod === 'webCrawl' ? await webCrawl(url, parseInt(limit)) : await xmlScrape(url, parseInt(limit))