Feature/Add ability to create new doc store on upsert (#3965)
add ability to create new doc store on upsert, update firecrawl properties
This commit is contained in:
parent
a49177f7fb
commit
20a797d2e0
|
|
@ -679,6 +679,11 @@ paths:
|
|||
type: string
|
||||
format: binary
|
||||
description: Files to be uploaded
|
||||
docId:
|
||||
type: string
|
||||
nullable: true
|
||||
example: '603a7b51-ae7c-4b0a-8865-e454ed2f6766'
|
||||
description: Document ID to use existing configuration
|
||||
loader:
|
||||
type: string
|
||||
nullable: true
|
||||
|
|
@ -704,6 +709,32 @@ paths:
|
|||
nullable: true
|
||||
example: '{"name":"postgresRecordManager"}'
|
||||
description: Record Manager configurations
|
||||
metadata:
|
||||
type: object
|
||||
nullable: true
|
||||
description: Metadata associated with the document
|
||||
example: { 'foo': 'bar' }
|
||||
replaceExisting:
|
||||
type: boolean
|
||||
nullable: true
|
||||
description: Whether to replace existing document loader with the new upserted chunks. However this does not delete the existing embeddings in the vector store
|
||||
createNewDocStore:
|
||||
type: boolean
|
||||
nullable: true
|
||||
description: Whether to create a new document store
|
||||
docStore:
|
||||
type: object
|
||||
nullable: true
|
||||
description: Only when createNewDocStore is true, pass in the new document store configuration
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
example: plainText
|
||||
description: Name of the new document store to be created
|
||||
description:
|
||||
type: string
|
||||
example: plainText
|
||||
description: Description of the new document store to be created
|
||||
required:
|
||||
- files
|
||||
required: true
|
||||
|
|
@ -2350,16 +2381,37 @@ components:
|
|||
docId:
|
||||
type: string
|
||||
format: uuid
|
||||
nullable: true
|
||||
description: Document ID within the store. If provided, existing configuration from the document will be used for the new document
|
||||
metadata:
|
||||
type: object
|
||||
nullable: true
|
||||
description: Metadata associated with the document
|
||||
example: { 'foo': 'bar' }
|
||||
replaceExisting:
|
||||
type: boolean
|
||||
nullable: true
|
||||
description: Whether to replace existing document loader with the new upserted chunks. However this does not delete the existing embeddings in the vector store
|
||||
createNewDocStore:
|
||||
type: boolean
|
||||
nullable: true
|
||||
description: Whether to create a new document store
|
||||
docStore:
|
||||
type: object
|
||||
nullable: true
|
||||
description: Only when createNewDocStore is true, pass in the new document store configuration
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
example: plainText
|
||||
description: Name of the new document store to be created
|
||||
description:
|
||||
type: string
|
||||
example: plainText
|
||||
description: Description of the new document store to be created
|
||||
loader:
|
||||
type: object
|
||||
nullable: true
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
|
|
@ -2370,6 +2422,7 @@ components:
|
|||
description: Configuration for the loader
|
||||
splitter:
|
||||
type: object
|
||||
nullable: true
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
|
|
@ -2380,6 +2433,7 @@ components:
|
|||
description: Configuration for the text splitter
|
||||
embedding:
|
||||
type: object
|
||||
nullable: true
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
|
|
@ -2390,6 +2444,7 @@ components:
|
|||
description: Configuration for the embedding generator
|
||||
vectorStore:
|
||||
type: object
|
||||
nullable: true
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
|
|
@ -2400,6 +2455,7 @@ components:
|
|||
description: Configuration for the vector store
|
||||
recordManager:
|
||||
type: object
|
||||
nullable: true
|
||||
properties:
|
||||
name:
|
||||
type: string
|
||||
|
|
|
|||
|
|
@ -266,7 +266,7 @@ class FireCrawl_DocumentLoaders implements INode {
|
|||
this.name = 'fireCrawl'
|
||||
this.type = 'Document'
|
||||
this.icon = 'firecrawl.png'
|
||||
this.version = 2.0
|
||||
this.version = 2.1
|
||||
this.category = 'Document Loaders'
|
||||
this.description = 'Load data from URL using FireCrawl'
|
||||
this.baseClasses = [this.type]
|
||||
|
|
@ -307,6 +307,42 @@ class FireCrawl_DocumentLoaders implements INode {
|
|||
}
|
||||
],
|
||||
default: 'crawl'
|
||||
},
|
||||
{
|
||||
// maxCrawlPages
|
||||
label: 'Max Crawl Pages',
|
||||
name: 'maxCrawlPages',
|
||||
type: 'string',
|
||||
description: 'Maximum number of pages to crawl',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
// generateImgAltText
|
||||
label: 'Generate Image Alt Text',
|
||||
name: 'generateImgAltText',
|
||||
type: 'boolean',
|
||||
description: 'Generate alt text for images',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
// returnOnlyUrls
|
||||
label: 'Return Only URLs',
|
||||
name: 'returnOnlyUrls',
|
||||
type: 'boolean',
|
||||
description: 'Return only URLs of the crawled pages',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
},
|
||||
{
|
||||
// onlyMainContent
|
||||
label: 'Only Main Content',
|
||||
name: 'onlyMainContent',
|
||||
type: 'boolean',
|
||||
description: 'Extract only the main content of the page',
|
||||
optional: true,
|
||||
additionalParams: true
|
||||
}
|
||||
// ... (other input parameters)
|
||||
]
|
||||
|
|
|
|||
|
|
@ -76,6 +76,8 @@ export interface IDocumentStoreUpsertData {
|
|||
docId: string
|
||||
metadata?: string | object
|
||||
replaceExisting?: boolean
|
||||
createNewDocStore?: boolean
|
||||
docStore?: IDocumentStore
|
||||
loader?: {
|
||||
name: string
|
||||
config: ICommonObject
|
||||
|
|
|
|||
|
|
@ -32,7 +32,8 @@ import {
|
|||
INodeData,
|
||||
MODE,
|
||||
IOverrideConfig,
|
||||
IExecutePreviewLoader
|
||||
IExecutePreviewLoader,
|
||||
DocumentStoreDTO
|
||||
} from '../../Interface'
|
||||
import { DocumentStoreFileChunk } from '../../database/entities/DocumentStoreFileChunk'
|
||||
import { v4 as uuidv4 } from 'uuid'
|
||||
|
|
@ -1464,6 +1465,7 @@ const upsertDocStore = async (
|
|||
}
|
||||
}
|
||||
const replaceExisting = data.replaceExisting ?? false
|
||||
const createNewDocStore = data.createNewDocStore ?? false
|
||||
const newLoader = typeof data.loader === 'string' ? JSON.parse(data.loader) : data.loader
|
||||
const newSplitter = typeof data.splitter === 'string' ? JSON.parse(data.splitter) : data.splitter
|
||||
const newVectorStore = typeof data.vectorStore === 'string' ? JSON.parse(data.vectorStore) : data.vectorStore
|
||||
|
|
@ -1533,6 +1535,15 @@ const upsertDocStore = async (
|
|||
recordManagerConfig = JSON.parse(entity.recordManagerConfig || '{}')?.config
|
||||
}
|
||||
|
||||
if (createNewDocStore) {
|
||||
const docStoreBody = typeof data.docStore === 'string' ? JSON.parse(data.docStore) : data.docStore
|
||||
const newDocumentStore = docStoreBody ?? { name: `Document Store ${Date.now().toString()}` }
|
||||
const docStore = DocumentStoreDTO.toEntity(newDocumentStore)
|
||||
const documentStore = appDataSource.getRepository(DocumentStore).create(docStore)
|
||||
const dbResponse = await appDataSource.getRepository(DocumentStore).save(documentStore)
|
||||
storeId = dbResponse.id
|
||||
}
|
||||
|
||||
// Step 2: Replace with new values
|
||||
loaderName = newLoader?.name ? getComponentLabelFromName(newLoader?.name) : loaderName
|
||||
loaderId = newLoader?.name || loaderId
|
||||
|
|
@ -1687,6 +1698,7 @@ const upsertDocStore = async (
|
|||
isVectorStoreInsert: true
|
||||
})
|
||||
res.docId = newDocId
|
||||
if (createNewDocStore) res.storeId = storeId
|
||||
|
||||
return res
|
||||
} catch (error) {
|
||||
|
|
|
|||
|
|
@ -41,11 +41,13 @@ body_data = {
|
|||
"docId": "${dialogProps.loaderId}",
|
||||
"metadata": {}, # Add additional metadata to the document chunks
|
||||
"replaceExisting": True, # Replace existing document with the new upserted chunks
|
||||
"createNewDocStore": False, # Create a new document store
|
||||
"splitter": json.dumps({"config":{"chunkSize":20000}}) # Override existing configuration
|
||||
# "loader": "",
|
||||
# "vectorStore": "",
|
||||
# "embedding": "",
|
||||
# "recordManager": "",
|
||||
# "docStore": ""
|
||||
}
|
||||
|
||||
headers = {
|
||||
|
|
@ -71,11 +73,14 @@ formData.append("splitter", JSON.stringify({"config":{"chunkSize":20000}}));
|
|||
formData.append("metadata", "{}");
|
||||
// Replace existing document with the new upserted chunks
|
||||
formData.append("replaceExisting", "true");
|
||||
// Create a new document store
|
||||
formData.append("createNewDocStore", "false");
|
||||
// Override existing configuration
|
||||
// formData.append("loader", "");
|
||||
// formData.append("embedding", "");
|
||||
// formData.append("vectorStore", "");
|
||||
// formData.append("recordManager", "");
|
||||
// formData.append("docStore", "");
|
||||
|
||||
async function query(formData) {
|
||||
const response = await fetch(
|
||||
|
|
@ -105,11 +110,13 @@ curl -X POST http://localhost:3000/api/v1/document-store/upsert/${dialogProps.st
|
|||
-F "splitter={"config":{"chunkSize":20000}}" \\
|
||||
-F "metadata={}" \\
|
||||
-F "replaceExisting=true" \\
|
||||
-F "createNewDocStore=false" \\
|
||||
# Override existing configuration:
|
||||
# -F "loader=" \\
|
||||
# -F "embedding=" \\
|
||||
# -F "vectorStore=" \\
|
||||
# -F "recordManager="
|
||||
# -F "recordManager=" \\
|
||||
# -F "docStore="
|
||||
\`\`\`
|
||||
`
|
||||
}
|
||||
|
|
@ -135,6 +142,7 @@ output = query({
|
|||
"docId": "${dialogProps.loaderId}",
|
||||
"metadata": "{}", # Add additional metadata to the document chunks
|
||||
"replaceExisting": True, # Replace existing document with the new upserted chunks
|
||||
"createNewDocStore": False, # Create a new document store
|
||||
# Override existing configuration
|
||||
"loader": {
|
||||
"config": {
|
||||
|
|
@ -149,6 +157,7 @@ output = query({
|
|||
# embedding: {},
|
||||
# vectorStore: {},
|
||||
# recordManager: {}
|
||||
# docStore: {}
|
||||
})
|
||||
print(output)
|
||||
\`\`\`
|
||||
|
|
@ -174,6 +183,7 @@ query({
|
|||
"docId": "${dialogProps.loaderId},
|
||||
"metadata": "{}", // Add additional metadata to the document chunks
|
||||
"replaceExisting": true, // Replace existing document with the new upserted chunks
|
||||
"createNewDocStore": false, // Create a new document store
|
||||
// Override existing configuration
|
||||
"loader": {
|
||||
"config": {
|
||||
|
|
@ -188,6 +198,7 @@ query({
|
|||
// embedding: {},
|
||||
// vectorStore: {},
|
||||
// recordManager: {}
|
||||
// docStore: {}
|
||||
}).then((response) => {
|
||||
console.log(response);
|
||||
});
|
||||
|
|
@ -201,6 +212,7 @@ curl -X POST http://localhost:3000/api/v1/document-store/upsert/${dialogProps.st
|
|||
"docId": "${dialogProps.loaderId}",
|
||||
"metadata": "{}",
|
||||
"replaceExisting": true,
|
||||
"createNewDocStore": false,
|
||||
"loader": {
|
||||
"config": {
|
||||
"text": "This is a new text"
|
||||
|
|
@ -215,6 +227,7 @@ curl -X POST http://localhost:3000/api/v1/document-store/upsert/${dialogProps.st
|
|||
// "embedding": {},
|
||||
// "vectorStore": {},
|
||||
// "recordManager": {}
|
||||
// "docStore": {}
|
||||
}'
|
||||
|
||||
\`\`\`
|
||||
|
|
|
|||
Loading…
Reference in New Issue