Flowise/packages/server/src/Interface.DocumentStore.ts

294 lines
8.3 KiB
TypeScript

import { ICommonObject } from 'flowise-components'
import { DocumentStore } from './database/entities/DocumentStore'
import { DataSource } from 'typeorm'
import { IComponentNodes } from './Interface'
import { Telemetry } from './utils/telemetry'
import { CachePool } from './CachePool'
export enum DocumentStoreStatus {
EMPTY_SYNC = 'EMPTY',
SYNC = 'SYNC',
SYNCING = 'SYNCING',
STALE = 'STALE',
NEW = 'NEW',
UPSERTING = 'UPSERTING',
UPSERTED = 'UPSERTED'
}
export interface IDocumentStore {
id: string
name: string
description: string
loaders: string // JSON string
whereUsed: string // JSON string
updatedDate: Date
createdDate: Date
status: DocumentStoreStatus
vectorStoreConfig: string | null // JSON string
embeddingConfig: string | null // JSON string
recordManagerConfig: string | null // JSON string
}
export interface IDocumentStoreFileChunk {
id: string
chunkNo: number
docId: string
storeId: string
pageContent: string
metadata: string
}
export interface IDocumentStoreFileChunkPagedResponse {
chunks: IDocumentStoreFileChunk[]
count: number
characters: number
file?: IDocumentStoreLoader
currentPage: number
storeName: string
description: string
docId: string
}
export interface IDocumentStoreLoader {
id?: string
loaderId?: string
loaderName?: string
loaderConfig?: any // JSON string
splitterId?: string
splitterName?: string
splitterConfig?: any // JSON string
totalChunks?: number
totalChars?: number
status?: DocumentStoreStatus
storeId?: string
files?: IDocumentStoreLoaderFile[]
source?: string
credential?: string
}
export interface IDocumentStoreLoaderForPreview extends IDocumentStoreLoader {
rehydrated?: boolean
preview?: boolean
previewChunkCount?: number
}
export interface IDocumentStoreUpsertData {
docId: string
metadata?: string | object
replaceExisting?: boolean
loader?: {
name: string
config: ICommonObject
}
splitter?: {
name: string
config: ICommonObject
}
vectorStore?: {
name: string
config: ICommonObject
}
embedding?: {
name: string
config: ICommonObject
}
recordManager?: {
name: string
config: ICommonObject
}
}
export interface IDocumentStoreRefreshData {
items: IDocumentStoreUpsertData[]
}
export interface IDocumentStoreLoaderFile {
id: string
name: string
mimePrefix: string
size: number
status: DocumentStoreStatus
uploaded: Date
}
export interface IDocumentStoreWhereUsed {
id: string
name: string
}
export interface IUpsertQueueAppServer {
appDataSource: DataSource
componentNodes: IComponentNodes
telemetry: Telemetry
cachePool?: CachePool
}
export interface IExecuteDocStoreUpsert extends IUpsertQueueAppServer {
storeId: string
totalItems: IDocumentStoreUpsertData[]
files: Express.Multer.File[]
isRefreshAPI: boolean
}
export interface IExecutePreviewLoader extends Omit<IUpsertQueueAppServer, 'telemetry'> {
data: IDocumentStoreLoaderForPreview
isPreviewOnly: boolean
telemetry?: Telemetry
}
export interface IExecuteProcessLoader extends IUpsertQueueAppServer {
data: IDocumentStoreLoaderForPreview
docLoaderId: string
isProcessWithoutUpsert: boolean
}
export interface IExecuteVectorStoreInsert extends IUpsertQueueAppServer {
data: ICommonObject
isStrictSave: boolean
isVectorStoreInsert: boolean
}
const getFileName = (fileBase64: string) => {
let fileNames = []
if (fileBase64.startsWith('FILE-STORAGE::')) {
const names = fileBase64.substring(14)
if (names.includes('[') && names.includes(']')) {
const files = JSON.parse(names)
return files.join(', ')
} else {
return fileBase64.substring(14)
}
}
if (fileBase64.startsWith('[') && fileBase64.endsWith(']')) {
const files = JSON.parse(fileBase64)
for (const file of files) {
const splitDataURI = file.split(',')
const filename = splitDataURI[splitDataURI.length - 1].split(':')[1]
fileNames.push(filename)
}
return fileNames.join(', ')
} else {
const splitDataURI = fileBase64.split(',')
const filename = splitDataURI[splitDataURI.length - 1].split(':')[1]
return filename
}
}
export const addLoaderSource = (loader: IDocumentStoreLoader, isGetFileNameOnly = false) => {
let source = 'None'
const handleUnstructuredFileLoader = (config: any, isGetFileNameOnly: boolean): string => {
if (config.fileObject) {
return isGetFileNameOnly ? getFileName(config.fileObject) : config.fileObject.replace('FILE-STORAGE::', '')
}
return config.filePath || 'None'
}
switch (loader.loaderId) {
case 'pdfFile':
case 'jsonFile':
case 'csvFile':
case 'file':
case 'jsonlinesFile':
case 'txtFile':
source = isGetFileNameOnly
? getFileName(loader.loaderConfig?.[loader.loaderId])
: loader.loaderConfig?.[loader.loaderId]?.replace('FILE-STORAGE::', '') || 'None'
break
case 'apiLoader':
source = loader.loaderConfig?.url + ' (' + loader.loaderConfig?.method + ')'
break
case 'cheerioWebScraper':
case 'playwrightWebScraper':
case 'puppeteerWebScraper':
source = loader.loaderConfig?.url || 'None'
break
case 'unstructuredFileLoader':
source = handleUnstructuredFileLoader(loader.loaderConfig || {}, isGetFileNameOnly)
break
default:
source = 'None'
break
}
return source
}
export class DocumentStoreDTO {
id: string
name: string
description: string
files: IDocumentStoreLoaderFile[]
whereUsed: IDocumentStoreWhereUsed[]
createdDate: Date
updatedDate: Date
status: DocumentStoreStatus
chunkOverlap: number
splitter: string
totalChunks: number
totalChars: number
chunkSize: number
loaders: IDocumentStoreLoader[]
vectorStoreConfig: any
embeddingConfig: any
recordManagerConfig: any
constructor() {}
static fromEntity(entity: DocumentStore): DocumentStoreDTO {
let documentStoreDTO = new DocumentStoreDTO()
Object.assign(documentStoreDTO, entity)
documentStoreDTO.id = entity.id
documentStoreDTO.name = entity.name
documentStoreDTO.description = entity.description
documentStoreDTO.status = entity.status
documentStoreDTO.totalChars = 0
documentStoreDTO.totalChunks = 0
if (entity.whereUsed) {
documentStoreDTO.whereUsed = JSON.parse(entity.whereUsed)
} else {
documentStoreDTO.whereUsed = []
}
if (entity.vectorStoreConfig) {
documentStoreDTO.vectorStoreConfig = JSON.parse(entity.vectorStoreConfig)
}
if (entity.embeddingConfig) {
documentStoreDTO.embeddingConfig = JSON.parse(entity.embeddingConfig)
}
if (entity.recordManagerConfig) {
documentStoreDTO.recordManagerConfig = JSON.parse(entity.recordManagerConfig)
}
if (entity.loaders) {
documentStoreDTO.loaders = JSON.parse(entity.loaders)
documentStoreDTO.loaders.map((loader) => {
documentStoreDTO.totalChars += loader.totalChars || 0
documentStoreDTO.totalChunks += loader.totalChunks || 0
loader.source = addLoaderSource(loader)
if (loader.status !== 'SYNC') {
documentStoreDTO.status = DocumentStoreStatus.STALE
}
})
}
return documentStoreDTO
}
static fromEntities(entities: DocumentStore[]): DocumentStoreDTO[] {
return entities.map((entity) => this.fromEntity(entity))
}
static toEntity(body: any): DocumentStore {
const docStore = new DocumentStore()
Object.assign(docStore, body)
docStore.loaders = '[]'
docStore.whereUsed = '[]'
// when a new document store is created, it is empty and in sync
docStore.status = DocumentStoreStatus.EMPTY_SYNC
return docStore
}
}