Flowise/packages/components/src/storageUtils.ts

1109 lines
39 KiB
TypeScript

import {
DeleteObjectsCommand,
GetObjectCommand,
ListObjectsCommand,
ListObjectsV2Command,
PutObjectCommand,
S3Client,
S3ClientConfig
} from '@aws-sdk/client-s3'
import { Storage } from '@google-cloud/storage'
import fs from 'fs'
import { Readable } from 'node:stream'
import path from 'path'
import sanitize from 'sanitize-filename'
import { getUserHome } from './utils'
import { isPathTraversal, isValidUUID } from './validator'
const dirSize = async (directoryPath: string) => {
let totalSize = 0
async function calculateSize(itemPath: string) {
const stats = await fs.promises.stat(itemPath)
if (stats.isFile()) {
totalSize += stats.size
} else if (stats.isDirectory()) {
const files = await fs.promises.readdir(itemPath)
for (const file of files) {
await calculateSize(path.join(itemPath, file))
}
}
}
await calculateSize(directoryPath)
return totalSize
}
export const addBase64FilesToStorage = async (
fileBase64: string,
chatflowid: string,
fileNames: string[],
orgId: string
): Promise<{ path: string; totalSize: number }> => {
// Validate chatflowid
if (!chatflowid || !isValidUUID(chatflowid)) {
throw new Error('Invalid chatflowId format - must be a valid UUID')
}
// Check for path traversal attempts
if (isPathTraversal(chatflowid)) {
throw new Error('Invalid path characters detected in chatflowId')
}
const storageType = getStorageType()
if (storageType === 's3') {
const { s3Client, Bucket } = getS3Config()
const splitDataURI = fileBase64.split(',')
const filename = splitDataURI.pop()?.split(':')[1] ?? ''
const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
const mime = splitDataURI[0].split(':')[1].split(';')[0]
const sanitizedFilename = _sanitizeFilename(filename)
const Key = orgId + '/' + chatflowid + '/' + sanitizedFilename
const putObjCmd = new PutObjectCommand({
Bucket,
Key,
ContentEncoding: 'base64', // required for binary data
ContentType: mime,
Body: bf
})
await s3Client.send(putObjCmd)
fileNames.push(sanitizedFilename)
const totalSize = await getS3StorageSize(orgId)
return { path: 'FILE-STORAGE::' + JSON.stringify(fileNames), totalSize: totalSize / 1024 / 1024 }
} else if (storageType === 'gcs') {
const { bucket } = getGcsClient()
const splitDataURI = fileBase64.split(',')
const filename = splitDataURI.pop()?.split(':')[1] ?? ''
const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
const mime = splitDataURI[0].split(':')[1].split(';')[0]
const sanitizedFilename = _sanitizeFilename(filename)
const normalizedChatflowid = chatflowid.replace(/\\/g, '/')
const normalizedFilename = sanitizedFilename.replace(/\\/g, '/')
const filePath = `${normalizedChatflowid}/${normalizedFilename}`
const file = bucket.file(filePath)
await new Promise<void>((resolve, reject) => {
file.createWriteStream({ contentType: mime, metadata: { contentEncoding: 'base64' } })
.on('error', (err) => reject(err))
.on('finish', () => resolve())
.end(bf)
})
fileNames.push(sanitizedFilename)
const totalSize = await getGCSStorageSize(orgId)
return { path: 'FILE-STORAGE::' + JSON.stringify(fileNames), totalSize: totalSize / 1024 / 1024 }
} else {
const dir = path.join(getStoragePath(), orgId, chatflowid)
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true })
}
const splitDataURI = fileBase64.split(',')
const filename = splitDataURI.pop()?.split(':')[1] ?? ''
const bf = Buffer.from(splitDataURI.pop() || '', 'base64')
const sanitizedFilename = _sanitizeFilename(filename)
const filePath = path.join(dir, sanitizedFilename)
fs.writeFileSync(filePath, bf)
fileNames.push(sanitizedFilename)
const totalSize = await dirSize(path.join(getStoragePath(), orgId))
return { path: 'FILE-STORAGE::' + JSON.stringify(fileNames), totalSize: totalSize / 1024 / 1024 }
}
}
export const addArrayFilesToStorage = async (
mime: string,
bf: Buffer,
fileName: string,
fileNames: string[],
...paths: string[]
): Promise<{ path: string; totalSize: number }> => {
const storageType = getStorageType()
const sanitizedFilename = _sanitizeFilename(fileName)
if (storageType === 's3') {
const { s3Client, Bucket } = getS3Config()
let Key = paths.reduce((acc, cur) => acc + '/' + cur, '') + '/' + sanitizedFilename
if (Key.startsWith('/')) {
Key = Key.substring(1)
}
const putObjCmd = new PutObjectCommand({
Bucket,
Key,
ContentEncoding: 'base64', // required for binary data
ContentType: mime,
Body: bf
})
await s3Client.send(putObjCmd)
fileNames.push(sanitizedFilename)
const totalSize = await getS3StorageSize(paths[0])
return { path: 'FILE-STORAGE::' + JSON.stringify(fileNames), totalSize: totalSize / 1024 / 1024 }
} else if (storageType === 'gcs') {
const { bucket } = getGcsClient()
const normalizedPaths = paths.map((p) => p.replace(/\\/g, '/'))
const normalizedFilename = sanitizedFilename.replace(/\\/g, '/')
const filePath = [...normalizedPaths, normalizedFilename].join('/')
const file = bucket.file(filePath)
await new Promise<void>((resolve, reject) => {
file.createWriteStream()
.on('error', (err) => reject(err))
.on('finish', () => resolve())
.end(bf)
})
fileNames.push(sanitizedFilename)
const totalSize = await getGCSStorageSize(paths[0])
return { path: 'FILE-STORAGE::' + JSON.stringify(fileNames), totalSize: totalSize / 1024 / 1024 }
} else {
const dir = path.join(getStoragePath(), ...paths.map(_sanitizeFilename))
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true })
}
const filePath = path.join(dir, sanitizedFilename)
fs.writeFileSync(filePath, bf)
fileNames.push(sanitizedFilename)
const totalSize = await dirSize(path.join(getStoragePath(), paths[0]))
return { path: 'FILE-STORAGE::' + JSON.stringify(fileNames), totalSize: totalSize / 1024 / 1024 }
}
}
export const addSingleFileToStorage = async (
mime: string,
bf: Buffer,
fileName: string,
...paths: string[]
): Promise<{ path: string; totalSize: number }> => {
const storageType = getStorageType()
const sanitizedFilename = _sanitizeFilename(fileName)
if (storageType === 's3') {
const { s3Client, Bucket } = getS3Config()
let Key = paths.reduce((acc, cur) => acc + '/' + cur, '') + '/' + sanitizedFilename
if (Key.startsWith('/')) {
Key = Key.substring(1)
}
const putObjCmd = new PutObjectCommand({
Bucket,
Key,
ContentEncoding: 'base64', // required for binary data
ContentType: mime,
Body: bf
})
await s3Client.send(putObjCmd)
const totalSize = await getS3StorageSize(paths[0])
return { path: 'FILE-STORAGE::' + sanitizedFilename, totalSize: totalSize / 1024 / 1024 }
} else if (storageType === 'gcs') {
const { bucket } = getGcsClient()
const normalizedPaths = paths.map((p) => p.replace(/\\/g, '/'))
const normalizedFilename = sanitizedFilename.replace(/\\/g, '/')
const filePath = [...normalizedPaths, normalizedFilename].join('/')
const file = bucket.file(filePath)
await new Promise<void>((resolve, reject) => {
file.createWriteStream({ contentType: mime, metadata: { contentEncoding: 'base64' } })
.on('error', (err) => reject(err))
.on('finish', () => resolve())
.end(bf)
})
const totalSize = await getGCSStorageSize(paths[0])
return { path: 'FILE-STORAGE::' + sanitizedFilename, totalSize: totalSize / 1024 / 1024 }
} else {
const dir = path.join(getStoragePath(), ...paths.map(_sanitizeFilename))
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true })
}
const filePath = path.join(dir, sanitizedFilename)
fs.writeFileSync(filePath, bf)
const totalSize = await dirSize(path.join(getStoragePath(), paths[0]))
return { path: 'FILE-STORAGE::' + sanitizedFilename, totalSize: totalSize / 1024 / 1024 }
}
}
export const getFileFromUpload = async (filePath: string): Promise<Buffer> => {
const storageType = getStorageType()
if (storageType === 's3') {
const { s3Client, Bucket } = getS3Config()
let Key = filePath
// remove the first '/' if it exists
if (Key.startsWith('/')) {
Key = Key.substring(1)
}
const getParams = {
Bucket,
Key
}
const response = await s3Client.send(new GetObjectCommand(getParams))
const body = response.Body
if (body instanceof Readable) {
const streamToString = await body.transformToString('base64')
if (streamToString) {
return Buffer.from(streamToString, 'base64')
}
}
// @ts-ignore
const buffer = Buffer.concat(response.Body.toArray())
return buffer
} else if (storageType === 'gcs') {
const { bucket } = getGcsClient()
const file = bucket.file(filePath)
const [buffer] = await file.download()
return buffer
} else {
return fs.readFileSync(filePath)
}
}
export const getFileFromStorage = async (file: string, ...paths: string[]): Promise<Buffer> => {
const storageType = getStorageType()
const sanitizedFilename = _sanitizeFilename(file)
if (storageType === 's3') {
const { s3Client, Bucket } = getS3Config()
let Key = paths.reduce((acc, cur) => acc + '/' + cur, '') + '/' + sanitizedFilename
if (Key.startsWith('/')) {
Key = Key.substring(1)
}
try {
const getParams = {
Bucket,
Key
}
const response = await s3Client.send(new GetObjectCommand(getParams))
const body = response.Body
if (body instanceof Readable) {
const streamToString = await body.transformToString('base64')
if (streamToString) {
return Buffer.from(streamToString, 'base64')
}
}
// @ts-ignore
const buffer = Buffer.concat(response.Body.toArray())
return buffer
} catch (error) {
// Fallback: Check if file exists without the first path element (likely orgId)
if (paths.length > 1) {
const fallbackPaths = paths.slice(1)
let fallbackKey = fallbackPaths.reduce((acc, cur) => acc + '/' + cur, '') + '/' + sanitizedFilename
if (fallbackKey.startsWith('/')) {
fallbackKey = fallbackKey.substring(1)
}
try {
const fallbackParams = {
Bucket,
Key: fallbackKey
}
const fallbackResponse = await s3Client.send(new GetObjectCommand(fallbackParams))
const fallbackBody = fallbackResponse.Body
// Get the file content
let fileContent: Buffer
if (fallbackBody instanceof Readable) {
const streamToString = await fallbackBody.transformToString('base64')
if (streamToString) {
fileContent = Buffer.from(streamToString, 'base64')
} else {
// @ts-ignore
fileContent = Buffer.concat(fallbackBody.toArray())
}
} else {
// @ts-ignore
fileContent = Buffer.concat(fallbackBody.toArray())
}
// Move to correct location with orgId
const putObjCmd = new PutObjectCommand({
Bucket,
Key,
Body: fileContent
})
await s3Client.send(putObjCmd)
// Delete the old file
await s3Client.send(
new DeleteObjectsCommand({
Bucket,
Delete: {
Objects: [{ Key: fallbackKey }],
Quiet: false
}
})
)
// Check if the directory is empty and delete recursively if needed
if (fallbackPaths.length > 0) {
await _cleanEmptyS3Folders(s3Client, Bucket, fallbackPaths[0])
}
return fileContent
} catch (fallbackError) {
// Throw the original error since the fallback also failed
throw error
}
} else {
throw error
}
}
} else if (storageType === 'gcs') {
const { bucket } = getGcsClient()
const normalizedPaths = paths.map((p) => p.replace(/\\/g, '/'))
const normalizedFilename = sanitizedFilename.replace(/\\/g, '/')
const filePath = [...normalizedPaths, normalizedFilename].join('/')
try {
const file = bucket.file(filePath)
const [buffer] = await file.download()
return buffer
} catch (error) {
// Fallback: Check if file exists without the first path element (likely orgId)
if (normalizedPaths.length > 1) {
const fallbackPaths = normalizedPaths.slice(1)
const fallbackPath = [...fallbackPaths, normalizedFilename].join('/')
try {
const fallbackFile = bucket.file(fallbackPath)
const [buffer] = await fallbackFile.download()
// Move to correct location with orgId
const file = bucket.file(filePath)
await new Promise<void>((resolve, reject) => {
file.createWriteStream()
.on('error', (err) => reject(err))
.on('finish', () => resolve())
.end(buffer)
})
// Delete the old file
await fallbackFile.delete()
// Check if the directory is empty and delete recursively if needed
if (fallbackPaths.length > 0) {
await _cleanEmptyGCSFolders(bucket, fallbackPaths[0])
}
return buffer
} catch (fallbackError) {
// Throw the original error since the fallback also failed
throw error
}
} else {
throw error
}
}
} else {
try {
const fileInStorage = path.join(getStoragePath(), ...paths.map(_sanitizeFilename), sanitizedFilename)
return fs.readFileSync(fileInStorage)
} catch (error) {
// Fallback: Check if file exists without the first path element (likely orgId)
if (paths.length > 1) {
const fallbackPaths = paths.slice(1)
const fallbackPath = path.join(getStoragePath(), ...fallbackPaths.map(_sanitizeFilename), sanitizedFilename)
if (fs.existsSync(fallbackPath)) {
// Create directory if it doesn't exist
const targetPath = path.join(getStoragePath(), ...paths.map(_sanitizeFilename), sanitizedFilename)
const dir = path.dirname(targetPath)
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true })
}
// Copy file to correct location with orgId
fs.copyFileSync(fallbackPath, targetPath)
// Delete the old file
fs.unlinkSync(fallbackPath)
// Clean up empty directories recursively
if (fallbackPaths.length > 0) {
_cleanEmptyLocalFolders(path.join(getStoragePath(), ...fallbackPaths.map(_sanitizeFilename).slice(0, -1)))
}
return fs.readFileSync(targetPath)
} else {
throw error
}
} else {
throw error
}
}
}
}
export const getFilesListFromStorage = async (...paths: string[]): Promise<Array<{ name: string; path: string; size: number }>> => {
const storageType = getStorageType()
if (storageType === 's3') {
const { s3Client, Bucket } = getS3Config()
let Key = paths.reduce((acc, cur) => acc + '/' + cur, '')
if (Key.startsWith('/')) {
Key = Key.substring(1)
}
const listCommand = new ListObjectsV2Command({
Bucket,
Prefix: Key
})
const list = await s3Client.send(listCommand)
if (list.Contents && list.Contents.length > 0) {
return list.Contents.map((item) => ({
name: item.Key?.split('/').pop() || '',
path: item.Key ?? '',
size: item.Size || 0
}))
} else {
return []
}
} else {
const directory = path.join(getStoragePath(), ...paths)
const filesList = getFilePaths(directory)
return filesList
}
}
interface FileInfo {
name: string
path: string
size: number
}
function getFilePaths(dir: string): FileInfo[] {
let results: FileInfo[] = []
function readDirectory(directory: string) {
try {
if (!fs.existsSync(directory)) {
console.warn(`Directory does not exist: ${directory}`)
return
}
const list = fs.readdirSync(directory)
list.forEach((file) => {
const filePath = path.join(directory, file)
try {
const stat = fs.statSync(filePath)
if (stat && stat.isDirectory()) {
readDirectory(filePath)
} else {
const sizeInMB = stat.size / (1024 * 1024)
results.push({ name: file, path: filePath, size: sizeInMB })
}
} catch (error) {
console.error(`Error processing file ${filePath}:`, error)
}
})
} catch (error) {
console.error(`Error reading directory ${directory}:`, error)
}
}
readDirectory(dir)
return results
}
/**
* Prepare storage path
*/
export const getStoragePath = (): string => {
const storagePath = process.env.BLOB_STORAGE_PATH
? path.join(process.env.BLOB_STORAGE_PATH)
: path.join(getUserHome(), '.flowise', 'storage')
if (!fs.existsSync(storagePath)) {
fs.mkdirSync(storagePath, { recursive: true })
}
return storagePath
}
/**
* Get the storage type - local or s3
*/
export const getStorageType = (): string => {
return process.env.STORAGE_TYPE ? process.env.STORAGE_TYPE : 'local'
}
export const removeFilesFromStorage = async (...paths: string[]) => {
const storageType = getStorageType()
if (storageType === 's3') {
let Key = paths.reduce((acc, cur) => acc + '/' + cur, '')
// remove the first '/' if it exists
if (Key.startsWith('/')) {
Key = Key.substring(1)
}
await _deleteS3Folder(Key)
// check folder size after deleting all the files
const totalSize = await getS3StorageSize(paths[0])
return { totalSize: totalSize / 1024 / 1024 }
} else if (storageType === 'gcs') {
const { bucket } = getGcsClient()
const normalizedPath = paths.map((p) => p.replace(/\\/g, '/')).join('/')
await bucket.deleteFiles({ prefix: `${normalizedPath}/` })
const totalSize = await getGCSStorageSize(paths[0])
return { totalSize: totalSize / 1024 / 1024 }
} else {
const directory = path.join(getStoragePath(), ...paths.map(_sanitizeFilename))
await _deleteLocalFolderRecursive(directory)
const totalSize = await dirSize(path.join(getStoragePath(), paths[0]))
return { totalSize: totalSize / 1024 / 1024 }
}
}
export const removeSpecificFileFromUpload = async (filePath: string) => {
const storageType = getStorageType()
if (storageType === 's3') {
let Key = filePath
// remove the first '/' if it exists
if (Key.startsWith('/')) {
Key = Key.substring(1)
}
await _deleteS3Folder(Key)
} else if (storageType === 'gcs') {
const { bucket } = getGcsClient()
await bucket.file(filePath).delete()
} else {
fs.unlinkSync(filePath)
}
}
export const removeSpecificFileFromStorage = async (...paths: string[]) => {
const storageType = getStorageType()
if (storageType === 's3') {
let Key = paths.reduce((acc, cur) => acc + '/' + cur, '')
// remove the first '/' if it exists
if (Key.startsWith('/')) {
Key = Key.substring(1)
}
await _deleteS3Folder(Key)
// check folder size after deleting all the files
const totalSize = await getS3StorageSize(paths[0])
return { totalSize: totalSize / 1024 / 1024 }
} else if (storageType === 'gcs') {
const { bucket } = getGcsClient()
const fileName = paths.pop()
if (fileName) {
const sanitizedFilename = _sanitizeFilename(fileName)
paths.push(sanitizedFilename)
}
const normalizedPath = paths.map((p) => p.replace(/\\/g, '/')).join('/')
await bucket.file(normalizedPath).delete()
const totalSize = await getGCSStorageSize(paths[0])
return { totalSize: totalSize / 1024 / 1024 }
} else {
const fileName = paths.pop()
if (fileName) {
const sanitizedFilename = _sanitizeFilename(fileName)
paths.push(sanitizedFilename)
}
const file = path.join(getStoragePath(), ...paths.map(_sanitizeFilename))
// check if file exists, if not skip delete
// this might happen when user tries to delete a document loader but the attached file is already deleted
const stat = fs.statSync(file, { throwIfNoEntry: false })
if (stat && stat.isFile()) {
fs.unlinkSync(file)
}
const totalSize = await dirSize(path.join(getStoragePath(), paths[0]))
return { totalSize: totalSize / 1024 / 1024 }
}
}
export const removeFolderFromStorage = async (...paths: string[]) => {
const storageType = getStorageType()
if (storageType === 's3') {
let Key = paths.reduce((acc, cur) => acc + '/' + cur, '')
// remove the first '/' if it exists
if (Key.startsWith('/')) {
Key = Key.substring(1)
}
await _deleteS3Folder(Key)
// check folder size after deleting all the files
const totalSize = await getS3StorageSize(paths[0])
return { totalSize: totalSize / 1024 / 1024 }
} else if (storageType === 'gcs') {
const { bucket } = getGcsClient()
const normalizedPath = paths.map((p) => p.replace(/\\/g, '/')).join('/')
await bucket.deleteFiles({ prefix: `${normalizedPath}/` })
const totalSize = await getGCSStorageSize(paths[0])
return { totalSize: totalSize / 1024 / 1024 }
} else {
const directory = path.join(getStoragePath(), ...paths.map(_sanitizeFilename))
await _deleteLocalFolderRecursive(directory, true)
const totalSize = await dirSize(path.join(getStoragePath(), paths[0]))
return { totalSize: totalSize / 1024 / 1024 }
}
}
const _deleteLocalFolderRecursive = async (directory: string, deleteParentChatflowFolder?: boolean) => {
try {
// Check if the path exists
await fs.promises.access(directory)
if (deleteParentChatflowFolder) {
await fs.promises.rmdir(directory, { recursive: true })
}
// Get stats of the path to determine if it's a file or directory
const stats = await fs.promises.stat(directory)
if (stats.isDirectory()) {
// Read all directory contents
const files = await fs.promises.readdir(directory)
// Recursively delete all contents
for (const file of files) {
const currentPath = path.join(directory, file)
await _deleteLocalFolderRecursive(currentPath) // Recursive call
}
// Delete the directory itself after emptying it
await fs.promises.rmdir(directory, { recursive: true })
} else {
// If it's a file, delete it directly
await fs.promises.unlink(directory)
}
} catch (error) {
// Error handling
}
}
const _deleteS3Folder = async (location: string) => {
let count = 0 // number of files deleted
const { s3Client, Bucket } = getS3Config()
async function recursiveS3Delete(token?: any) {
// get the files
const listCommand = new ListObjectsV2Command({
Bucket: Bucket,
Prefix: location,
ContinuationToken: token
})
let list = await s3Client.send(listCommand)
if (list.KeyCount) {
const deleteCommand = new DeleteObjectsCommand({
Bucket: Bucket,
Delete: {
Objects: list.Contents?.map((item) => ({ Key: item.Key })),
Quiet: false
}
})
let deleted = await s3Client.send(deleteCommand)
// @ts-ignore
count += deleted.Deleted.length
if (deleted.Errors) {
deleted.Errors.map((error: any) => console.error(`${error.Key} could not be deleted - ${error.Code}`))
}
}
// repeat if more files to delete
if (list.NextContinuationToken) {
await recursiveS3Delete(list.NextContinuationToken)
}
// return total deleted count when finished
return `${count} files deleted from S3`
}
// start the recursive function
return recursiveS3Delete()
}
export const streamStorageFile = async (
chatflowId: string,
chatId: string,
fileName: string,
orgId: string
): Promise<fs.ReadStream | Buffer | undefined> => {
// Validate chatflowId
if (!chatflowId || !isValidUUID(chatflowId)) {
throw new Error('Invalid chatflowId format - must be a valid UUID')
}
// Check for path traversal attempts
if (isPathTraversal(chatflowId) || isPathTraversal(chatId)) {
throw new Error('Invalid path characters detected in chatflowId or chatId')
}
const storageType = getStorageType()
const sanitizedFilename = sanitize(fileName)
if (storageType === 's3') {
const { s3Client, Bucket } = getS3Config()
const Key = orgId + '/' + chatflowId + '/' + chatId + '/' + sanitizedFilename
const getParams = {
Bucket,
Key
}
try {
const response = await s3Client.send(new GetObjectCommand(getParams))
const body = response.Body
if (body instanceof Readable) {
const blob = await body.transformToByteArray()
return Buffer.from(blob)
}
} catch (error) {
// Fallback: Check if file exists without orgId
const fallbackKey = chatflowId + '/' + chatId + '/' + sanitizedFilename
try {
const fallbackParams = {
Bucket,
Key: fallbackKey
}
const fallbackResponse = await s3Client.send(new GetObjectCommand(fallbackParams))
const fallbackBody = fallbackResponse.Body
// If found, copy to correct location with orgId
if (fallbackBody) {
// Get the file content
let fileContent: Buffer
if (fallbackBody instanceof Readable) {
const blob = await fallbackBody.transformToByteArray()
fileContent = Buffer.from(blob)
} else {
// @ts-ignore
fileContent = Buffer.concat(fallbackBody.toArray())
}
// Move to correct location with orgId
const putObjCmd = new PutObjectCommand({
Bucket,
Key,
Body: fileContent
})
await s3Client.send(putObjCmd)
// Delete the old file
await s3Client.send(
new DeleteObjectsCommand({
Bucket,
Delete: {
Objects: [{ Key: fallbackKey }],
Quiet: false
}
})
)
// Check if the directory is empty and delete recursively if needed
await _cleanEmptyS3Folders(s3Client, Bucket, chatflowId)
return fileContent
}
} catch (fallbackError) {
// File not found in fallback location either
throw new Error(`File ${fileName} not found`)
}
}
} else if (storageType === 'gcs') {
const { bucket } = getGcsClient()
const normalizedChatflowId = chatflowId.replace(/\\/g, '/')
const normalizedChatId = chatId.replace(/\\/g, '/')
const normalizedFilename = sanitizedFilename.replace(/\\/g, '/')
const filePath = `${orgId}/${normalizedChatflowId}/${normalizedChatId}/${normalizedFilename}`
try {
const [buffer] = await bucket.file(filePath).download()
return buffer
} catch (error) {
// Fallback: Check if file exists without orgId
const fallbackPath = `${normalizedChatflowId}/${normalizedChatId}/${normalizedFilename}`
try {
const fallbackFile = bucket.file(fallbackPath)
const [buffer] = await fallbackFile.download()
// If found, copy to correct location with orgId
if (buffer) {
const file = bucket.file(filePath)
await new Promise<void>((resolve, reject) => {
file.createWriteStream()
.on('error', (err) => reject(err))
.on('finish', () => resolve())
.end(buffer)
})
// Delete the old file
await fallbackFile.delete()
// Check if the directory is empty and delete recursively if needed
await _cleanEmptyGCSFolders(bucket, normalizedChatflowId)
return buffer
}
} catch (fallbackError) {
// File not found in fallback location either
throw new Error(`File ${fileName} not found`)
}
}
} else {
const filePath = path.join(getStoragePath(), orgId, chatflowId, chatId, sanitizedFilename)
//raise error if file path is not absolute
if (!path.isAbsolute(filePath)) throw new Error(`Invalid file path`)
//raise error if file path contains '..'
if (filePath.includes('..')) throw new Error(`Invalid file path`)
//only return from the storage folder
if (!filePath.startsWith(getStoragePath())) throw new Error(`Invalid file path`)
if (fs.existsSync(filePath)) {
return fs.createReadStream(filePath)
} else {
// Fallback: Check if file exists without orgId
const fallbackPath = path.join(getStoragePath(), chatflowId, chatId, sanitizedFilename)
if (fs.existsSync(fallbackPath)) {
// Create directory if it doesn't exist
const dir = path.dirname(filePath)
if (!fs.existsSync(dir)) {
fs.mkdirSync(dir, { recursive: true })
}
// Copy file to correct location with orgId
fs.copyFileSync(fallbackPath, filePath)
// Delete the old file
fs.unlinkSync(fallbackPath)
// Clean up empty directories recursively
_cleanEmptyLocalFolders(path.join(getStoragePath(), chatflowId, chatId))
return fs.createReadStream(filePath)
} else {
throw new Error(`File ${fileName} not found`)
}
}
}
}
/**
* Check if a local directory is empty and delete it if so,
* then check parent directories recursively
*/
const _cleanEmptyLocalFolders = (dirPath: string) => {
try {
// Stop if we reach the storage root
if (dirPath === getStoragePath()) return
// Check if directory exists
if (!fs.existsSync(dirPath)) return
// Read directory contents
const files = fs.readdirSync(dirPath)
// If directory is empty, delete it and check parent
if (files.length === 0) {
fs.rmdirSync(dirPath)
// Recursively check parent directory
_cleanEmptyLocalFolders(path.dirname(dirPath))
}
} catch (error) {
// Ignore errors during cleanup
console.error('Error cleaning empty folders:', error)
}
}
/**
* Check if an S3 "folder" is empty and delete it recursively
*/
const _cleanEmptyS3Folders = async (s3Client: S3Client, Bucket: string, prefix: string) => {
try {
// Skip if prefix is empty
if (!prefix) return
// List objects in this "folder"
const listCmd = new ListObjectsV2Command({
Bucket,
Prefix: prefix + '/',
Delimiter: '/'
})
const response = await s3Client.send(listCmd)
// If folder is empty (only contains common prefixes but no files)
if (
(response.Contents?.length === 0 || !response.Contents) &&
(response.CommonPrefixes?.length === 0 || !response.CommonPrefixes)
) {
// Delete the folder marker if it exists
await s3Client.send(
new DeleteObjectsCommand({
Bucket,
Delete: {
Objects: [{ Key: prefix + '/' }],
Quiet: true
}
})
)
// Recursively check parent folder
const parentPrefix = prefix.substring(0, prefix.lastIndexOf('/'))
if (parentPrefix) {
await _cleanEmptyS3Folders(s3Client, Bucket, parentPrefix)
}
}
} catch (error) {
// Ignore errors during cleanup
console.error('Error cleaning empty S3 folders:', error)
}
}
/**
* Check if a GCS "folder" is empty and delete recursively if so
*/
const _cleanEmptyGCSFolders = async (bucket: any, prefix: string) => {
try {
// Skip if prefix is empty
if (!prefix) return
// List files with this prefix
const [files] = await bucket.getFiles({
prefix: prefix + '/',
delimiter: '/'
})
// If folder is empty (no files)
if (files.length === 0) {
// Delete the folder marker if it exists
try {
await bucket.file(prefix + '/').delete()
} catch (err) {
// Folder marker might not exist, ignore
}
// Recursively check parent folder
const parentPrefix = prefix.substring(0, prefix.lastIndexOf('/'))
if (parentPrefix) {
await _cleanEmptyGCSFolders(bucket, parentPrefix)
}
}
} catch (error) {
// Ignore errors during cleanup
console.error('Error cleaning empty GCS folders:', error)
}
}
export const getGCSStorageSize = async (orgId: string): Promise<number> => {
const { bucket } = getGcsClient()
let totalSize = 0
const [files] = await bucket.getFiles({ prefix: orgId })
for (const file of files) {
const size = file.metadata.size
// Handle different types that size could be
if (typeof size === 'string') {
totalSize += parseInt(size, 10) || 0
} else if (typeof size === 'number') {
totalSize += size
}
}
return totalSize
}
export const getGcsClient = () => {
const pathToGcsCredential = process.env.GOOGLE_CLOUD_STORAGE_CREDENTIAL
const projectId = process.env.GOOGLE_CLOUD_STORAGE_PROJ_ID
const bucketName = process.env.GOOGLE_CLOUD_STORAGE_BUCKET_NAME
if (!bucketName) {
throw new Error('GOOGLE_CLOUD_STORAGE_BUCKET_NAME env variable is required')
}
const storageConfig = {
...(pathToGcsCredential ? { keyFilename: pathToGcsCredential } : {}),
...(projectId ? { projectId } : {})
}
const storage = new Storage(storageConfig)
const bucket = storage.bucket(bucketName)
return { storage, bucket }
}
export const getS3StorageSize = async (orgId: string): Promise<number> => {
const { s3Client, Bucket } = getS3Config()
const getCmd = new ListObjectsCommand({
Bucket,
Prefix: orgId
})
const headObj = await s3Client.send(getCmd)
let totalSize = 0
for (const obj of headObj.Contents || []) {
totalSize += obj.Size || 0
}
return totalSize
}
export const getS3Config = () => {
const accessKeyId = process.env.S3_STORAGE_ACCESS_KEY_ID
const secretAccessKey = process.env.S3_STORAGE_SECRET_ACCESS_KEY
const region = process.env.S3_STORAGE_REGION
const Bucket = process.env.S3_STORAGE_BUCKET_NAME
const customURL = process.env.S3_ENDPOINT_URL
const forcePathStyle = process.env.S3_FORCE_PATH_STYLE === 'true' ? true : false
if (!region || region.trim() === '' || !Bucket || Bucket.trim() === '') {
throw new Error('S3 storage configuration is missing')
}
const s3Config: S3ClientConfig = {
region: region,
forcePathStyle: forcePathStyle
}
// Only include endpoint if customURL is not empty
if (customURL && customURL.trim() !== '') {
s3Config.endpoint = customURL
}
if (accessKeyId && accessKeyId.trim() !== '' && secretAccessKey && secretAccessKey.trim() !== '') {
s3Config.credentials = {
accessKeyId: accessKeyId,
secretAccessKey: secretAccessKey
}
}
const s3Client = new S3Client(s3Config)
return { s3Client, Bucket }
}
const _sanitizeFilename = (filename: string): string => {
if (filename) {
let sanitizedFilename = sanitize(filename)
// remove all leading .
return sanitizedFilename.replace(/^\.+/, '')
}
return ''
}