replace regex with linkifyjs
This commit is contained in:
parent
2cf266d9f9
commit
7460dd3a72
|
|
@ -1,7 +1,7 @@
|
|||
import { INode, INodeData, INodeParams } from '../../../src/Interface'
|
||||
import { TextSplitter } from 'langchain/text_splitter'
|
||||
import { CheerioWebBaseLoader } from 'langchain/document_loaders/web/cheerio'
|
||||
|
||||
import { test } from 'linkifyjs'
|
||||
class Cheerio_DocumentLoaders implements INode {
|
||||
label: string
|
||||
name: string
|
||||
|
|
@ -47,18 +47,12 @@ class Cheerio_DocumentLoaders implements INode {
|
|||
const metadata = nodeData.inputs?.metadata
|
||||
|
||||
let url = nodeData.inputs?.url as string
|
||||
url = url.trim()
|
||||
if (!test(url)) {
|
||||
throw new Error('Invalid URL')
|
||||
}
|
||||
|
||||
var urlPattern = new RegExp(
|
||||
'^(https?:\\/\\/)?' + // validate protocol
|
||||
'((([a-z\\d]([a-z\\d-]*[a-z\\d])*)\\.)+[a-z]{2,}|' + // validate domain name
|
||||
'((\\d{1,3}\\.){3}\\d{1,3}))' + // validate OR ip (v4) address
|
||||
'(\\:\\d+)?(\\/[-a-z\\d%_.~+]*)*' + // validate port and path
|
||||
'(\\?[;&a-z\\d%_.~+=-]*)?' + // validate query string
|
||||
'(\\#[-a-z\\d_]*)?$',
|
||||
'i'
|
||||
) // validate fragment locator
|
||||
|
||||
const loader = new CheerioWebBaseLoader(urlPattern.test(url.trim()) ? url.trim() : '')
|
||||
const loader = new CheerioWebBaseLoader(url)
|
||||
let docs = []
|
||||
|
||||
if (textSplitter) {
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@
|
|||
"form-data": "^4.0.0",
|
||||
"graphql": "^16.6.0",
|
||||
"langchain": "^0.0.73",
|
||||
"linkifyjs": "^4.1.1",
|
||||
"mammoth": "^1.5.1",
|
||||
"moment": "^2.29.3",
|
||||
"node-fetch": "2",
|
||||
|
|
|
|||
Loading…
Reference in New Issue