Compare commits
41 Commits
main
...
feature/tt
| Author | SHA1 | Date |
|---|---|---|
|
|
53622d95e4 | |
|
|
634d8fe282 | |
|
|
87b54bec9e | |
|
|
b71844a174 | |
|
|
6aabcee1de | |
|
|
3db27e28fe | |
|
|
4be176d55a | |
|
|
3f89d82292 | |
|
|
50b5bb911a | |
|
|
d2a177db59 | |
|
|
27da0b62bd | |
|
|
3198e7817e | |
|
|
b5718c3fdb | |
|
|
f2da015dce | |
|
|
4cd1c4a22b | |
|
|
b514a82728 | |
|
|
f64900b8b8 | |
|
|
72ccf2e7b0 | |
|
|
123ab3c85e | |
|
|
5ea714098d | |
|
|
c1553d12ff | |
|
|
95a63fa609 | |
|
|
2e33a00667 | |
|
|
eb07a42f1d | |
|
|
4aad293685 | |
|
|
45917a34e3 | |
|
|
d42c096164 | |
|
|
55b6be24df | |
|
|
8de200ee15 | |
|
|
ad44c7b661 | |
|
|
aa357c8373 | |
|
|
2b5554aafe | |
|
|
2247646182 | |
|
|
b30e4a9da8 | |
|
|
08f0d7597d | |
|
|
ef1b0dc856 | |
|
|
1902701e64 | |
|
|
eca7d175fd | |
|
|
47dd721177 | |
|
|
3364539a2e | |
|
|
2c39b51cff |
|
|
@ -0,0 +1,26 @@
|
|||
import { INodeParams, INodeCredential } from '../src/Interface'
|
||||
|
||||
class ElevenLabsApi implements INodeCredential {
|
||||
label: string
|
||||
name: string
|
||||
version: number
|
||||
description: string
|
||||
inputs: INodeParams[]
|
||||
|
||||
constructor() {
|
||||
this.label = 'Eleven Labs API'
|
||||
this.name = 'elevenLabsApi'
|
||||
this.version = 1.0
|
||||
this.description =
|
||||
'Sign up for a Eleven Labs account and <a target="_blank" href="https://elevenlabs.io/app/settings/api-keys">create an API Key</a>.'
|
||||
this.inputs = [
|
||||
{
|
||||
label: 'Eleven Labs API Key',
|
||||
name: 'elevenLabsApiKey',
|
||||
type: 'password'
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { credClass: ElevenLabsApi }
|
||||
|
|
@ -33,6 +33,7 @@
|
|||
"@dqbd/tiktoken": "^1.0.21",
|
||||
"@e2b/code-interpreter": "^1.5.1",
|
||||
"@elastic/elasticsearch": "^8.9.0",
|
||||
"@elevenlabs/elevenlabs-js": "^2.8.0",
|
||||
"@flowiseai/nodevm": "^3.9.25",
|
||||
"@getzep/zep-cloud": "~1.0.7",
|
||||
"@getzep/zep-js": "^0.9.0",
|
||||
|
|
|
|||
|
|
@ -441,6 +441,9 @@ export interface IServerSideEventStreamer {
|
|||
streamAbortEvent(chatId: string): void
|
||||
streamEndEvent(chatId: string): void
|
||||
streamUsageMetadataEvent(chatId: string, data: any): void
|
||||
streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void
|
||||
streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void
|
||||
streamTTSEndEvent(chatId: string, chatMessageId: string): void
|
||||
}
|
||||
|
||||
export enum FollowUpPromptProvider {
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ dotenv.config({ path: envPath, override: true })
|
|||
export * from './Interface'
|
||||
export * from './utils'
|
||||
export * from './speechToText'
|
||||
export * from './textToSpeech'
|
||||
export * from './storageUtils'
|
||||
export * from './handler'
|
||||
export * from '../evaluation/EvaluationRunner'
|
||||
|
|
|
|||
|
|
@ -0,0 +1,240 @@
|
|||
import { ICommonObject } from './Interface'
|
||||
import { getCredentialData } from './utils'
|
||||
import OpenAI from 'openai'
|
||||
import { ElevenLabsClient } from '@elevenlabs/elevenlabs-js'
|
||||
import { Readable } from 'node:stream'
|
||||
import type { ReadableStream } from 'node:stream/web'
|
||||
|
||||
const TextToSpeechType = {
|
||||
OPENAI_TTS: 'openai',
|
||||
ELEVEN_LABS_TTS: 'elevenlabs'
|
||||
}
|
||||
|
||||
export const convertTextToSpeechStream = async (
|
||||
text: string,
|
||||
textToSpeechConfig: ICommonObject,
|
||||
options: ICommonObject,
|
||||
abortController: AbortController,
|
||||
onStart: (format: string) => void,
|
||||
onChunk: (chunk: Buffer) => void,
|
||||
onEnd: () => void
|
||||
): Promise<void> => {
|
||||
return new Promise<void>((resolve, reject) => {
|
||||
let streamDestroyed = false
|
||||
|
||||
// Handle abort signal early
|
||||
if (abortController.signal.aborted) {
|
||||
reject(new Error('TTS generation aborted'))
|
||||
return
|
||||
}
|
||||
const processStream = async () => {
|
||||
try {
|
||||
if (textToSpeechConfig) {
|
||||
const credentialId = textToSpeechConfig.credentialId as string
|
||||
const credentialData = await getCredentialData(credentialId ?? '', options)
|
||||
|
||||
switch (textToSpeechConfig.name) {
|
||||
case TextToSpeechType.OPENAI_TTS: {
|
||||
onStart('mp3')
|
||||
|
||||
const openai = new OpenAI({
|
||||
apiKey: credentialData.openAIApiKey
|
||||
})
|
||||
|
||||
const response = await openai.audio.speech.create(
|
||||
{
|
||||
model: 'gpt-4o-mini-tts',
|
||||
voice: (textToSpeechConfig.voice || 'alloy') as
|
||||
| 'alloy'
|
||||
| 'ash'
|
||||
| 'ballad'
|
||||
| 'coral'
|
||||
| 'echo'
|
||||
| 'fable'
|
||||
| 'nova'
|
||||
| 'onyx'
|
||||
| 'sage'
|
||||
| 'shimmer',
|
||||
input: text,
|
||||
response_format: 'mp3'
|
||||
},
|
||||
{
|
||||
signal: abortController.signal
|
||||
}
|
||||
)
|
||||
|
||||
const stream = response.body as unknown as Readable
|
||||
if (!stream) {
|
||||
throw new Error('Failed to get response stream')
|
||||
}
|
||||
|
||||
await processStreamWithRateLimit(stream, onChunk, onEnd, resolve, reject, 640, 20, abortController, () => {
|
||||
streamDestroyed = true
|
||||
})
|
||||
break
|
||||
}
|
||||
|
||||
case TextToSpeechType.ELEVEN_LABS_TTS: {
|
||||
onStart('mp3')
|
||||
|
||||
const client = new ElevenLabsClient({
|
||||
apiKey: credentialData.elevenLabsApiKey
|
||||
})
|
||||
|
||||
const response = await client.textToSpeech.stream(
|
||||
textToSpeechConfig.voice || '21m00Tcm4TlvDq8ikWAM',
|
||||
{
|
||||
text: text,
|
||||
modelId: 'eleven_multilingual_v2'
|
||||
},
|
||||
{ abortSignal: abortController.signal }
|
||||
)
|
||||
|
||||
const stream = Readable.fromWeb(response as unknown as ReadableStream)
|
||||
if (!stream) {
|
||||
throw new Error('Failed to get response stream')
|
||||
}
|
||||
|
||||
await processStreamWithRateLimit(stream, onChunk, onEnd, resolve, reject, 640, 40, abortController, () => {
|
||||
streamDestroyed = true
|
||||
})
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
reject(new Error('Text to speech is not selected. Please configure TTS in the chatflow.'))
|
||||
}
|
||||
} catch (error) {
|
||||
reject(error)
|
||||
}
|
||||
}
|
||||
|
||||
// Handle abort signal
|
||||
abortController.signal.addEventListener('abort', () => {
|
||||
if (!streamDestroyed) {
|
||||
reject(new Error('TTS generation aborted'))
|
||||
}
|
||||
})
|
||||
|
||||
processStream()
|
||||
})
|
||||
}
|
||||
|
||||
const processStreamWithRateLimit = async (
|
||||
stream: Readable,
|
||||
onChunk: (chunk: Buffer) => void,
|
||||
onEnd: () => void,
|
||||
resolve: () => void,
|
||||
reject: (error: any) => void,
|
||||
targetChunkSize: number = 640,
|
||||
rateLimitMs: number = 20,
|
||||
abortController: AbortController,
|
||||
onStreamDestroy?: () => void
|
||||
) => {
|
||||
const TARGET_CHUNK_SIZE = targetChunkSize
|
||||
const RATE_LIMIT_MS = rateLimitMs
|
||||
|
||||
let buffer: Buffer = Buffer.alloc(0)
|
||||
let isEnded = false
|
||||
|
||||
const processChunks = async () => {
|
||||
while (!isEnded || buffer.length > 0) {
|
||||
// Check if aborted
|
||||
if (abortController.signal.aborted) {
|
||||
if (!stream.destroyed) {
|
||||
stream.destroy()
|
||||
}
|
||||
onStreamDestroy?.()
|
||||
reject(new Error('TTS generation aborted'))
|
||||
return
|
||||
}
|
||||
|
||||
if (buffer.length >= TARGET_CHUNK_SIZE) {
|
||||
const chunk = buffer.subarray(0, TARGET_CHUNK_SIZE)
|
||||
buffer = buffer.subarray(TARGET_CHUNK_SIZE)
|
||||
onChunk(chunk)
|
||||
await sleep(RATE_LIMIT_MS)
|
||||
} else if (isEnded && buffer.length > 0) {
|
||||
onChunk(buffer)
|
||||
buffer = Buffer.alloc(0)
|
||||
} else if (!isEnded) {
|
||||
await sleep(RATE_LIMIT_MS)
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
onEnd()
|
||||
resolve()
|
||||
}
|
||||
|
||||
stream.on('data', (chunk) => {
|
||||
if (!abortController.signal.aborted) {
|
||||
buffer = Buffer.concat([buffer, Buffer.from(chunk)])
|
||||
}
|
||||
})
|
||||
|
||||
stream.on('end', () => {
|
||||
isEnded = true
|
||||
})
|
||||
|
||||
stream.on('error', (error) => {
|
||||
reject(error)
|
||||
})
|
||||
|
||||
// Handle abort signal
|
||||
abortController.signal.addEventListener('abort', () => {
|
||||
if (!stream.destroyed) {
|
||||
stream.destroy()
|
||||
}
|
||||
onStreamDestroy?.()
|
||||
reject(new Error('TTS generation aborted'))
|
||||
})
|
||||
|
||||
processChunks().catch(reject)
|
||||
}
|
||||
|
||||
const sleep = (ms: number): Promise<void> => {
|
||||
return new Promise((resolve) => setTimeout(resolve, ms))
|
||||
}
|
||||
|
||||
export const getVoices = async (provider: string, credentialId: string, options: ICommonObject) => {
|
||||
const credentialData = await getCredentialData(credentialId ?? '', options)
|
||||
|
||||
switch (provider) {
|
||||
case TextToSpeechType.OPENAI_TTS:
|
||||
return [
|
||||
{ id: 'alloy', name: 'Alloy' },
|
||||
{ id: 'ash', name: 'Ash' },
|
||||
{ id: 'ballad', name: 'Ballad' },
|
||||
{ id: 'coral', name: 'Coral' },
|
||||
{ id: 'echo', name: 'Echo' },
|
||||
{ id: 'fable', name: 'Fable' },
|
||||
{ id: 'nova', name: 'Nova' },
|
||||
{ id: 'onyx', name: 'Onyx' },
|
||||
{ id: 'sage', name: 'Sage' },
|
||||
{ id: 'shimmer', name: 'Shimmer' }
|
||||
]
|
||||
|
||||
case TextToSpeechType.ELEVEN_LABS_TTS: {
|
||||
const client = new ElevenLabsClient({
|
||||
apiKey: credentialData.elevenLabsApiKey
|
||||
})
|
||||
|
||||
const voices = await client.voices.search({
|
||||
pageSize: 100,
|
||||
voiceType: 'default',
|
||||
category: 'premade'
|
||||
})
|
||||
|
||||
return voices.voices.map((voice) => ({
|
||||
id: voice.voiceId,
|
||||
name: voice.name,
|
||||
category: voice.category
|
||||
}))
|
||||
}
|
||||
|
||||
default:
|
||||
throw new Error(`Unsupported TTS provider: ${provider}`)
|
||||
}
|
||||
}
|
||||
|
|
@ -64,6 +64,7 @@ export interface IChatFlow {
|
|||
apikeyid?: string
|
||||
analytic?: string
|
||||
speechToText?: string
|
||||
textToSpeech?: string
|
||||
chatbotConfig?: string
|
||||
followUpPrompts?: string
|
||||
apiConfig?: string
|
||||
|
|
|
|||
|
|
@ -0,0 +1,226 @@
|
|||
import { Request, Response, NextFunction } from 'express'
|
||||
import chatflowsService from '../../services/chatflows'
|
||||
import textToSpeechService from '../../services/text-to-speech'
|
||||
import { InternalFlowiseError } from '../../errors/internalFlowiseError'
|
||||
import { StatusCodes } from 'http-status-codes'
|
||||
import { getRunningExpressApp } from '../../utils/getRunningExpressApp'
|
||||
import { convertTextToSpeechStream } from 'flowise-components'
|
||||
import { databaseEntities } from '../../utils'
|
||||
|
||||
const generateTextToSpeech = async (req: Request, res: Response) => {
|
||||
try {
|
||||
const {
|
||||
chatId,
|
||||
chatflowId,
|
||||
chatMessageId,
|
||||
text,
|
||||
provider: bodyProvider,
|
||||
credentialId: bodyCredentialId,
|
||||
voice: bodyVoice,
|
||||
model: bodyModel
|
||||
} = req.body
|
||||
|
||||
if (!text) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.BAD_REQUEST,
|
||||
`Error: textToSpeechController.generateTextToSpeech - text not provided!`
|
||||
)
|
||||
}
|
||||
|
||||
let provider: string, credentialId: string, voice: string, model: string
|
||||
|
||||
if (chatflowId) {
|
||||
// Get TTS config from chatflow
|
||||
const chatflow = await chatflowsService.getChatflowById(chatflowId)
|
||||
const ttsConfig = JSON.parse(chatflow.textToSpeech)
|
||||
|
||||
// Find the provider with status: true
|
||||
const activeProviderKey = Object.keys(ttsConfig).find((key) => ttsConfig[key].status === true)
|
||||
if (!activeProviderKey) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.BAD_REQUEST,
|
||||
`Error: textToSpeechController.generateTextToSpeech - no active TTS provider configured in chatflow!`
|
||||
)
|
||||
}
|
||||
|
||||
const providerConfig = ttsConfig[activeProviderKey]
|
||||
provider = activeProviderKey
|
||||
credentialId = providerConfig.credentialId
|
||||
voice = providerConfig.voice
|
||||
model = providerConfig.model
|
||||
} else {
|
||||
// Use TTS config from request body
|
||||
provider = bodyProvider
|
||||
credentialId = bodyCredentialId
|
||||
voice = bodyVoice
|
||||
model = bodyModel
|
||||
}
|
||||
|
||||
if (!provider) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.BAD_REQUEST,
|
||||
`Error: textToSpeechController.generateTextToSpeech - provider not provided!`
|
||||
)
|
||||
}
|
||||
|
||||
if (!credentialId) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.BAD_REQUEST,
|
||||
`Error: textToSpeechController.generateTextToSpeech - credentialId not provided!`
|
||||
)
|
||||
}
|
||||
|
||||
res.setHeader('Content-Type', 'text/event-stream')
|
||||
res.setHeader('Cache-Control', 'no-cache')
|
||||
res.setHeader('Connection', 'keep-alive')
|
||||
res.setHeader('Access-Control-Allow-Origin', '*')
|
||||
res.setHeader('Access-Control-Allow-Headers', 'Cache-Control')
|
||||
|
||||
const appServer = getRunningExpressApp()
|
||||
const options = {
|
||||
orgId: '',
|
||||
chatflowid: chatflowId || '',
|
||||
chatId: chatId || '',
|
||||
appDataSource: appServer.AppDataSource,
|
||||
databaseEntities: databaseEntities
|
||||
}
|
||||
|
||||
const textToSpeechConfig = {
|
||||
name: provider,
|
||||
credentialId: credentialId,
|
||||
voice: voice,
|
||||
model: model
|
||||
}
|
||||
|
||||
// Create and store AbortController
|
||||
const abortController = new AbortController()
|
||||
const ttsAbortId = `tts_${chatId}_${chatMessageId}`
|
||||
appServer.abortControllerPool.add(ttsAbortId, abortController)
|
||||
|
||||
try {
|
||||
await convertTextToSpeechStream(
|
||||
text,
|
||||
textToSpeechConfig,
|
||||
options,
|
||||
abortController,
|
||||
(format: string) => {
|
||||
const startResponse = {
|
||||
event: 'tts_start',
|
||||
data: { chatMessageId, format }
|
||||
}
|
||||
res.write('event: tts_start\n')
|
||||
res.write(`data: ${JSON.stringify(startResponse)}\n\n`)
|
||||
},
|
||||
(chunk: Buffer) => {
|
||||
const audioBase64 = chunk.toString('base64')
|
||||
const clientResponse = {
|
||||
event: 'tts_data',
|
||||
data: { chatMessageId, audioChunk: audioBase64 }
|
||||
}
|
||||
res.write('event: tts_data\n')
|
||||
res.write(`data: ${JSON.stringify(clientResponse)}\n\n`)
|
||||
},
|
||||
async () => {
|
||||
const endResponse = {
|
||||
event: 'tts_end',
|
||||
data: { chatMessageId }
|
||||
}
|
||||
res.write('event: tts_end\n')
|
||||
res.write(`data: ${JSON.stringify(endResponse)}\n\n`)
|
||||
res.end()
|
||||
// Clean up from pool on successful completion
|
||||
appServer.abortControllerPool.remove(ttsAbortId)
|
||||
}
|
||||
)
|
||||
} catch (error) {
|
||||
// Clean up from pool on error
|
||||
appServer.abortControllerPool.remove(ttsAbortId)
|
||||
throw error
|
||||
}
|
||||
} catch (error) {
|
||||
if (!res.headersSent) {
|
||||
res.setHeader('Content-Type', 'text/event-stream')
|
||||
res.setHeader('Cache-Control', 'no-cache')
|
||||
res.setHeader('Connection', 'keep-alive')
|
||||
}
|
||||
|
||||
const errorResponse = {
|
||||
event: 'tts_error',
|
||||
data: { error: error instanceof Error ? error.message : 'TTS generation failed' }
|
||||
}
|
||||
res.write('event: tts_error\n')
|
||||
res.write(`data: ${JSON.stringify(errorResponse)}\n\n`)
|
||||
res.end()
|
||||
}
|
||||
}
|
||||
|
||||
const abortTextToSpeech = async (req: Request, res: Response) => {
|
||||
try {
|
||||
const { chatId, chatMessageId, chatflowId } = req.body
|
||||
|
||||
if (!chatId) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.BAD_REQUEST,
|
||||
`Error: textToSpeechController.abortTextToSpeech - chatId not provided!`
|
||||
)
|
||||
}
|
||||
|
||||
if (!chatMessageId) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.BAD_REQUEST,
|
||||
`Error: textToSpeechController.abortTextToSpeech - chatMessageId not provided!`
|
||||
)
|
||||
}
|
||||
|
||||
if (!chatflowId) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.BAD_REQUEST,
|
||||
`Error: textToSpeechController.abortTextToSpeech - chatflowId not provided!`
|
||||
)
|
||||
}
|
||||
|
||||
const appServer = getRunningExpressApp()
|
||||
|
||||
// Abort the TTS generation using existing pool
|
||||
const ttsAbortId = `tts_${chatId}_${chatMessageId}`
|
||||
appServer.abortControllerPool.abort(ttsAbortId)
|
||||
|
||||
// Also abort the main chat flow AbortController for auto-TTS
|
||||
const chatFlowAbortId = `${chatflowId}_${chatId}`
|
||||
if (appServer.abortControllerPool.get(chatFlowAbortId)) {
|
||||
appServer.abortControllerPool.abort(chatFlowAbortId)
|
||||
appServer.sseStreamer.streamMetadataEvent(chatId, { chatId, chatMessageId })
|
||||
}
|
||||
|
||||
// Send abort event to client
|
||||
appServer.sseStreamer.streamTTSAbortEvent(chatId, chatMessageId)
|
||||
|
||||
res.json({ message: 'TTS stream aborted successfully', chatId, chatMessageId })
|
||||
} catch (error) {
|
||||
res.status(500).json({
|
||||
error: error instanceof Error ? error.message : 'Failed to abort TTS stream'
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
const getVoices = async (req: Request, res: Response, next: NextFunction) => {
|
||||
try {
|
||||
const { provider, credentialId } = req.query
|
||||
|
||||
if (!provider) {
|
||||
throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, `Error: textToSpeechController.getVoices - provider not provided!`)
|
||||
}
|
||||
|
||||
const voices = await textToSpeechService.getVoices(provider as any, credentialId as string)
|
||||
|
||||
return res.json(voices)
|
||||
} catch (error) {
|
||||
next(error)
|
||||
}
|
||||
}
|
||||
|
||||
export default {
|
||||
generateTextToSpeech,
|
||||
abortTextToSpeech,
|
||||
getVoices
|
||||
}
|
||||
|
|
@ -41,6 +41,9 @@ export class ChatFlow implements IChatFlow {
|
|||
@Column({ nullable: true, type: 'text' })
|
||||
speechToText?: string
|
||||
|
||||
@Column({ nullable: true, type: 'text' })
|
||||
textToSpeech?: string
|
||||
|
||||
@Column({ nullable: true, type: 'text' })
|
||||
followUpPrompts?: string
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,12 @@
|
|||
import { MigrationInterface, QueryRunner } from 'typeorm'
|
||||
|
||||
export class AddTextToSpeechToChatFlow1754986457485 implements MigrationInterface {
|
||||
public async up(queryRunner: QueryRunner): Promise<void> {
|
||||
const columnExists = await queryRunner.hasColumn('chat_flow', 'textToSpeech')
|
||||
if (!columnExists) queryRunner.query(`ALTER TABLE \`chat_flow\` ADD COLUMN \`textToSpeech\` TEXT;`)
|
||||
}
|
||||
|
||||
public async down(queryRunner: QueryRunner): Promise<void> {
|
||||
await queryRunner.query(`ALTER TABLE \`chat_flow\` DROP COLUMN \`textToSpeech\`;`)
|
||||
}
|
||||
}
|
||||
|
|
@ -36,6 +36,7 @@ import { AddExecutionEntity1738090872625 } from './1738090872625-AddExecutionEnt
|
|||
import { FixOpenSourceAssistantTable1743758056188 } from './1743758056188-FixOpenSourceAssistantTable'
|
||||
import { AddErrorToEvaluationRun1744964560174 } from './1744964560174-AddErrorToEvaluationRun'
|
||||
import { ModifyExecutionDataColumnType1747902489801 } from './1747902489801-ModifyExecutionDataColumnType'
|
||||
import { AddTextToSpeechToChatFlow1754986457485 } from './1754986457485-AddTextToSpeechToChatFlow'
|
||||
import { ModifyChatflowType1755066758601 } from './1755066758601-ModifyChatflowType'
|
||||
import { AddChatFlowNameIndex1755748356008 } from './1755748356008-AddChatFlowNameIndex'
|
||||
|
||||
|
|
@ -101,6 +102,7 @@ export const mariadbMigrations = [
|
|||
AddErrorToEvaluationRun1744964560174,
|
||||
ExecutionLinkWorkspaceId1746862866554,
|
||||
ModifyExecutionDataColumnType1747902489801,
|
||||
AddTextToSpeechToChatFlow1754986457485,
|
||||
ModifyChatflowType1755066758601,
|
||||
AddChatFlowNameIndex1755748356008
|
||||
]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,12 @@
|
|||
import { MigrationInterface, QueryRunner } from 'typeorm'
|
||||
|
||||
export class AddTextToSpeechToChatFlow1754986468397 implements MigrationInterface {
|
||||
public async up(queryRunner: QueryRunner): Promise<void> {
|
||||
const columnExists = await queryRunner.hasColumn('chat_flow', 'textToSpeech')
|
||||
if (!columnExists) queryRunner.query(`ALTER TABLE \`chat_flow\` ADD COLUMN \`textToSpeech\` TEXT;`)
|
||||
}
|
||||
|
||||
public async down(queryRunner: QueryRunner): Promise<void> {
|
||||
await queryRunner.query(`ALTER TABLE \`chat_flow\` DROP COLUMN \`textToSpeech\`;`)
|
||||
}
|
||||
}
|
||||
|
|
@ -37,6 +37,7 @@ import { FixOpenSourceAssistantTable1743758056188 } from './1743758056188-FixOpe
|
|||
import { AddErrorToEvaluationRun1744964560174 } from './1744964560174-AddErrorToEvaluationRun'
|
||||
import { FixErrorsColumnInEvaluationRun1746437114935 } from './1746437114935-FixErrorsColumnInEvaluationRun'
|
||||
import { ModifyExecutionDataColumnType1747902489801 } from './1747902489801-ModifyExecutionDataColumnType'
|
||||
import { AddTextToSpeechToChatFlow1754986468397 } from './1754986468397-AddTextToSpeechToChatFlow'
|
||||
import { ModifyChatflowType1755066758601 } from './1755066758601-ModifyChatflowType'
|
||||
import { AddChatFlowNameIndex1755748356008 } from './1755748356008-AddChatFlowNameIndex'
|
||||
|
||||
|
|
@ -103,6 +104,7 @@ export const mysqlMigrations = [
|
|||
FixErrorsColumnInEvaluationRun1746437114935,
|
||||
ExecutionLinkWorkspaceId1746862866554,
|
||||
ModifyExecutionDataColumnType1747902489801,
|
||||
AddTextToSpeechToChatFlow1754986468397,
|
||||
ModifyChatflowType1755066758601,
|
||||
AddChatFlowNameIndex1755748356008
|
||||
]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,11 @@
|
|||
import { MigrationInterface, QueryRunner } from 'typeorm'
|
||||
|
||||
export class AddTextToSpeechToChatFlow1754986480347 implements MigrationInterface {
|
||||
public async up(queryRunner: QueryRunner): Promise<void> {
|
||||
await queryRunner.query(`ALTER TABLE "chat_flow" ADD COLUMN IF NOT EXISTS "textToSpeech" TEXT;`)
|
||||
}
|
||||
|
||||
public async down(queryRunner: QueryRunner): Promise<void> {
|
||||
await queryRunner.query(`ALTER TABLE "chat_flow" DROP COLUMN "textToSpeech";`)
|
||||
}
|
||||
}
|
||||
|
|
@ -36,6 +36,7 @@ import { AddExecutionEntity1738090872625 } from './1738090872625-AddExecutionEnt
|
|||
import { FixOpenSourceAssistantTable1743758056188 } from './1743758056188-FixOpenSourceAssistantTable'
|
||||
import { AddErrorToEvaluationRun1744964560174 } from './1744964560174-AddErrorToEvaluationRun'
|
||||
import { ModifyExecutionSessionIdFieldType1748450230238 } from './1748450230238-ModifyExecutionSessionIdFieldType'
|
||||
import { AddTextToSpeechToChatFlow1754986480347 } from './1754986480347-AddTextToSpeechToChatFlow'
|
||||
import { ModifyChatflowType1755066758601 } from './1755066758601-ModifyChatflowType'
|
||||
import { AddChatFlowNameIndex1755748356008 } from './1755748356008-AddChatFlowNameIndex'
|
||||
|
||||
|
|
@ -101,6 +102,7 @@ export const postgresMigrations = [
|
|||
AddErrorToEvaluationRun1744964560174,
|
||||
ExecutionLinkWorkspaceId1746862866554,
|
||||
ModifyExecutionSessionIdFieldType1748450230238,
|
||||
AddTextToSpeechToChatFlow1754986480347,
|
||||
ModifyChatflowType1755066758601,
|
||||
AddChatFlowNameIndex1755748356008
|
||||
]
|
||||
|
|
|
|||
|
|
@ -0,0 +1,11 @@
|
|||
import { MigrationInterface, QueryRunner } from 'typeorm'
|
||||
|
||||
export class AddTextToSpeechToChatFlow1754986486669 implements MigrationInterface {
|
||||
public async up(queryRunner: QueryRunner): Promise<void> {
|
||||
await queryRunner.query(`ALTER TABLE "chat_flow" ADD COLUMN "textToSpeech" TEXT;`)
|
||||
}
|
||||
|
||||
public async down(queryRunner: QueryRunner): Promise<void> {
|
||||
await queryRunner.query(`ALTER TABLE "chat_flow" DROP COLUMN "textToSpeech";`)
|
||||
}
|
||||
}
|
||||
|
|
@ -34,6 +34,7 @@ import { AddSeqNoToDatasetRow1733752119696 } from './1733752119696-AddSeqNoToDat
|
|||
import { AddExecutionEntity1738090872625 } from './1738090872625-AddExecutionEntity'
|
||||
import { FixOpenSourceAssistantTable1743758056188 } from './1743758056188-FixOpenSourceAssistantTable'
|
||||
import { AddErrorToEvaluationRun1744964560174 } from './1744964560174-AddErrorToEvaluationRun'
|
||||
import { AddTextToSpeechToChatFlow1754986486669 } from './1754986486669-AddTextToSpeechToChatFlow'
|
||||
import { ModifyChatflowType1755066758601 } from './1755066758601-ModifyChatflowType'
|
||||
import { AddChatFlowNameIndex1755748356008 } from './1755748356008-AddChatFlowNameIndex'
|
||||
|
||||
|
|
@ -97,6 +98,7 @@ export const sqliteMigrations = [
|
|||
FixOpenSourceAssistantTable1743758056188,
|
||||
AddErrorToEvaluationRun1744964560174,
|
||||
ExecutionLinkWorkspaceId1746862866554,
|
||||
AddTextToSpeechToChatFlow1754986486669,
|
||||
ModifyChatflowType1755066758601,
|
||||
AddChatFlowNameIndex1755748356008
|
||||
]
|
||||
|
|
|
|||
|
|
@ -380,6 +380,70 @@ export class RedisEventPublisher implements IServerSideEventStreamer {
|
|||
}
|
||||
}
|
||||
|
||||
streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void {
|
||||
try {
|
||||
this.redisPublisher.publish(
|
||||
chatId,
|
||||
JSON.stringify({
|
||||
chatId,
|
||||
chatMessageId,
|
||||
eventType: 'tts_start',
|
||||
data: { format }
|
||||
})
|
||||
)
|
||||
} catch (error) {
|
||||
console.error('Error streaming TTS start event:', error)
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void {
|
||||
try {
|
||||
this.redisPublisher.publish(
|
||||
chatId,
|
||||
JSON.stringify({
|
||||
chatId,
|
||||
chatMessageId,
|
||||
eventType: 'tts_data',
|
||||
data: audioChunk
|
||||
})
|
||||
)
|
||||
} catch (error) {
|
||||
console.error('Error streaming TTS data event:', error)
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSEndEvent(chatId: string, chatMessageId: string): void {
|
||||
try {
|
||||
this.redisPublisher.publish(
|
||||
chatId,
|
||||
JSON.stringify({
|
||||
chatId,
|
||||
chatMessageId,
|
||||
eventType: 'tts_end',
|
||||
data: {}
|
||||
})
|
||||
)
|
||||
} catch (error) {
|
||||
console.error('Error streaming TTS end event:', error)
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSAbortEvent(chatId: string, chatMessageId: string): void {
|
||||
try {
|
||||
this.redisPublisher.publish(
|
||||
chatId,
|
||||
JSON.stringify({
|
||||
chatId,
|
||||
chatMessageId,
|
||||
eventType: 'tts_abort',
|
||||
data: {}
|
||||
})
|
||||
)
|
||||
} catch (error) {
|
||||
console.error('Error streaming TTS abort event:', error)
|
||||
}
|
||||
}
|
||||
|
||||
async disconnect() {
|
||||
if (this.redisPublisher) {
|
||||
await this.redisPublisher.quit()
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ export class RedisEventSubscriber {
|
|||
private handleEvent(message: string) {
|
||||
// Parse the message from Redis
|
||||
const event = JSON.parse(message)
|
||||
const { eventType, chatId, data } = event
|
||||
const { eventType, chatId, chatMessageId, data } = event
|
||||
|
||||
// Stream the event to the client
|
||||
switch (eventType) {
|
||||
|
|
@ -121,6 +121,9 @@ export class RedisEventSubscriber {
|
|||
case 'usedTools':
|
||||
this.sseStreamer.streamUsedToolsEvent(chatId, data)
|
||||
break
|
||||
case 'calledTools':
|
||||
this.sseStreamer.streamCalledToolsEvent(chatId, data)
|
||||
break
|
||||
case 'fileAnnotations':
|
||||
this.sseStreamer.streamFileAnnotationsEvent(chatId, data)
|
||||
break
|
||||
|
|
@ -154,6 +157,21 @@ export class RedisEventSubscriber {
|
|||
case 'metadata':
|
||||
this.sseStreamer.streamMetadataEvent(chatId, data)
|
||||
break
|
||||
case 'usageMetadata':
|
||||
this.sseStreamer.streamUsageMetadataEvent(chatId, data)
|
||||
break
|
||||
case 'tts_start':
|
||||
this.sseStreamer.streamTTSStartEvent(chatId, chatMessageId, data.format)
|
||||
break
|
||||
case 'tts_data':
|
||||
this.sseStreamer.streamTTSDataEvent(chatId, chatMessageId, data)
|
||||
break
|
||||
case 'tts_end':
|
||||
this.sseStreamer.streamTTSEndEvent(chatId, chatMessageId)
|
||||
break
|
||||
case 'tts_abort':
|
||||
this.sseStreamer.streamTTSAbortEvent(chatId, chatMessageId)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -55,6 +55,7 @@ import nvidiaNimRouter from './nvidia-nim'
|
|||
import executionsRouter from './executions'
|
||||
import validationRouter from './validation'
|
||||
import agentflowv2GeneratorRouter from './agentflowv2-generator'
|
||||
import textToSpeechRouter from './text-to-speech'
|
||||
|
||||
import authRouter from '../enterprise/routes/auth'
|
||||
import auditRouter from '../enterprise/routes/audit'
|
||||
|
|
@ -124,6 +125,7 @@ router.use('/nvidia-nim', nvidiaNimRouter)
|
|||
router.use('/executions', executionsRouter)
|
||||
router.use('/validation', validationRouter)
|
||||
router.use('/agentflowv2-generator', agentflowv2GeneratorRouter)
|
||||
router.use('/text-to-speech', textToSpeechRouter)
|
||||
|
||||
router.use('/auth', authRouter)
|
||||
router.use('/audit', IdentityManager.checkFeatureByPlan('feat:login-activity'), auditRouter)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,12 @@
|
|||
import express from 'express'
|
||||
import textToSpeechController from '../../controllers/text-to-speech'
|
||||
|
||||
const router = express.Router()
|
||||
|
||||
router.post('/generate', textToSpeechController.generateTextToSpeech)
|
||||
|
||||
router.post('/abort', textToSpeechController.abortTextToSpeech)
|
||||
|
||||
router.get('/voices', textToSpeechController.getVoices)
|
||||
|
||||
export default router
|
||||
|
|
@ -363,7 +363,18 @@ const getSinglePublicChatbotConfig = async (chatflowId: string): Promise<any> =>
|
|||
if (dbResponse.chatbotConfig || uploadsConfig) {
|
||||
try {
|
||||
const parsedConfig = dbResponse.chatbotConfig ? JSON.parse(dbResponse.chatbotConfig) : {}
|
||||
return { ...parsedConfig, uploads: uploadsConfig, flowData: dbResponse.flowData }
|
||||
const ttsConfig =
|
||||
typeof dbResponse.textToSpeech === 'string' ? JSON.parse(dbResponse.textToSpeech) : dbResponse.textToSpeech
|
||||
|
||||
let isTTSEnabled = false
|
||||
if (ttsConfig) {
|
||||
Object.keys(ttsConfig).forEach((provider) => {
|
||||
if (provider !== 'none' && ttsConfig?.[provider]?.status) {
|
||||
isTTSEnabled = true
|
||||
}
|
||||
})
|
||||
}
|
||||
return { ...parsedConfig, uploads: uploadsConfig, flowData: dbResponse.flowData, isTTSEnabled }
|
||||
} catch (e) {
|
||||
throw new InternalFlowiseError(StatusCodes.INTERNAL_SERVER_ERROR, `Error parsing Chatbot Config for Chatflow ${chatflowId}`)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,52 @@
|
|||
import { StatusCodes } from 'http-status-codes'
|
||||
import { getRunningExpressApp } from '../../utils/getRunningExpressApp'
|
||||
import { InternalFlowiseError } from '../../errors/internalFlowiseError'
|
||||
import { getErrorMessage } from '../../errors/utils'
|
||||
import { getVoices } from 'flowise-components'
|
||||
import { databaseEntities } from '../../utils'
|
||||
|
||||
export enum TextToSpeechProvider {
|
||||
OPENAI = 'openai',
|
||||
ELEVEN_LABS = 'elevenlabs'
|
||||
}
|
||||
|
||||
export interface TTSRequest {
|
||||
text: string
|
||||
provider: TextToSpeechProvider
|
||||
credentialId: string
|
||||
voice?: string
|
||||
model?: string
|
||||
}
|
||||
|
||||
export interface TTSResponse {
|
||||
audioBuffer: Buffer
|
||||
contentType: string
|
||||
}
|
||||
|
||||
const getVoicesForProvider = async (provider: string, credentialId?: string): Promise<any[]> => {
|
||||
try {
|
||||
if (!credentialId) {
|
||||
throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, 'Credential ID required for this provider')
|
||||
}
|
||||
|
||||
const appServer = getRunningExpressApp()
|
||||
const options = {
|
||||
orgId: '',
|
||||
chatflowid: '',
|
||||
chatId: '',
|
||||
appDataSource: appServer.AppDataSource,
|
||||
databaseEntities: databaseEntities
|
||||
}
|
||||
|
||||
return await getVoices(provider, credentialId, options)
|
||||
} catch (error) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.INTERNAL_SERVER_ERROR,
|
||||
`Error: textToSpeechService.getVoices - ${getErrorMessage(error)}`
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
export default {
|
||||
getVoices: getVoicesForProvider
|
||||
}
|
||||
|
|
@ -257,4 +257,50 @@ export class SSEStreamer implements IServerSideEventStreamer {
|
|||
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void {
|
||||
const client = this.clients[chatId]
|
||||
if (client) {
|
||||
const clientResponse = {
|
||||
event: 'tts_start',
|
||||
data: { chatMessageId, format }
|
||||
}
|
||||
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void {
|
||||
const client = this.clients[chatId]
|
||||
if (client) {
|
||||
const clientResponse = {
|
||||
event: 'tts_data',
|
||||
data: { chatMessageId, audioChunk }
|
||||
}
|
||||
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSEndEvent(chatId: string, chatMessageId: string): void {
|
||||
const client = this.clients[chatId]
|
||||
if (client) {
|
||||
const clientResponse = {
|
||||
event: 'tts_end',
|
||||
data: { chatMessageId }
|
||||
}
|
||||
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||
}
|
||||
}
|
||||
|
||||
streamTTSAbortEvent(chatId: string, chatMessageId: string): void {
|
||||
const client = this.clients[chatId]
|
||||
if (client) {
|
||||
const clientResponse = {
|
||||
event: 'tts_abort',
|
||||
data: { chatMessageId }
|
||||
}
|
||||
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||
client.response.end()
|
||||
delete this.clients[chatId]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@ import { ChatMessage } from '../database/entities/ChatMessage'
|
|||
import { Telemetry } from './telemetry'
|
||||
import { getWorkspaceSearchOptions } from '../enterprise/utils/ControllerServiceUtils'
|
||||
import { UsageCacheManager } from '../UsageCacheManager'
|
||||
import { generateTTSForResponseStream, shouldAutoPlayTTS } from './buildChatflow'
|
||||
|
||||
interface IWaitingNode {
|
||||
nodeId: string
|
||||
|
|
@ -2208,5 +2209,27 @@ export const executeAgentFlow = async ({
|
|||
|
||||
if (sessionId) result.sessionId = sessionId
|
||||
|
||||
if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
|
||||
const options = {
|
||||
orgId,
|
||||
chatflowid,
|
||||
chatId,
|
||||
appDataSource,
|
||||
databaseEntities
|
||||
}
|
||||
|
||||
if (sseStreamer) {
|
||||
await generateTTSForResponseStream(
|
||||
result.text,
|
||||
chatflow.textToSpeech,
|
||||
options,
|
||||
chatId,
|
||||
chatMessage?.id,
|
||||
sseStreamer,
|
||||
abortController
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import { omit } from 'lodash'
|
|||
import {
|
||||
IFileUpload,
|
||||
convertSpeechToText,
|
||||
convertTextToSpeechStream,
|
||||
ICommonObject,
|
||||
addSingleFileToStorage,
|
||||
generateFollowUpPrompts,
|
||||
|
|
@ -16,7 +17,8 @@ import {
|
|||
getFileFromUpload,
|
||||
removeSpecificFileFromUpload,
|
||||
EvaluationRunner,
|
||||
handleEscapeCharacters
|
||||
handleEscapeCharacters,
|
||||
IServerSideEventStreamer
|
||||
} from 'flowise-components'
|
||||
import { StatusCodes } from 'http-status-codes'
|
||||
import {
|
||||
|
|
@ -70,9 +72,74 @@ import { executeAgentFlow } from './buildAgentflow'
|
|||
import { Workspace } from '../enterprise/database/entities/workspace.entity'
|
||||
import { Organization } from '../enterprise/database/entities/organization.entity'
|
||||
|
||||
/*
|
||||
* Initialize the ending node to be executed
|
||||
*/
|
||||
const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boolean => {
|
||||
if (!textToSpeechConfig) return false
|
||||
try {
|
||||
const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
|
||||
for (const providerKey in config) {
|
||||
const provider = config[providerKey]
|
||||
if (provider && provider.status === true && provider.autoPlay === true) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
} catch (error) {
|
||||
logger.error(`Error parsing textToSpeechConfig: ${getErrorMessage(error)}`)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
const generateTTSForResponseStream = async (
|
||||
responseText: string,
|
||||
textToSpeechConfig: string | undefined,
|
||||
options: ICommonObject,
|
||||
chatId: string,
|
||||
chatMessageId: string,
|
||||
sseStreamer: IServerSideEventStreamer,
|
||||
abortController?: AbortController
|
||||
): Promise<void> => {
|
||||
try {
|
||||
if (!textToSpeechConfig) return
|
||||
const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
|
||||
|
||||
let activeProviderConfig = null
|
||||
for (const providerKey in config) {
|
||||
const provider = config[providerKey]
|
||||
if (provider && provider.status === true) {
|
||||
activeProviderConfig = {
|
||||
name: providerKey,
|
||||
credentialId: provider.credentialId,
|
||||
voice: provider.voice,
|
||||
model: provider.model
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if (!activeProviderConfig) return
|
||||
|
||||
await convertTextToSpeechStream(
|
||||
responseText,
|
||||
activeProviderConfig,
|
||||
options,
|
||||
abortController || new AbortController(),
|
||||
(format: string) => {
|
||||
sseStreamer.streamTTSStartEvent(chatId, chatMessageId, format)
|
||||
},
|
||||
(chunk: Buffer) => {
|
||||
const audioBase64 = chunk.toString('base64')
|
||||
sseStreamer.streamTTSDataEvent(chatId, chatMessageId, audioBase64)
|
||||
},
|
||||
() => {
|
||||
sseStreamer.streamTTSEndEvent(chatId, chatMessageId)
|
||||
}
|
||||
)
|
||||
} catch (error) {
|
||||
logger.error(`[server]: TTS streaming failed: ${getErrorMessage(error)}`)
|
||||
sseStreamer.streamTTSEndEvent(chatId, chatMessageId)
|
||||
}
|
||||
}
|
||||
|
||||
const initEndingNode = async ({
|
||||
endingNodeIds,
|
||||
componentNodes,
|
||||
|
|
@ -833,6 +900,17 @@ export const executeFlow = async ({
|
|||
if (memoryType) result.memoryType = memoryType
|
||||
if (Object.keys(setVariableNodesOutput).length) result.flowVariables = setVariableNodesOutput
|
||||
|
||||
if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
|
||||
const options = {
|
||||
orgId,
|
||||
chatflowid,
|
||||
chatId,
|
||||
appDataSource,
|
||||
databaseEntities
|
||||
}
|
||||
await generateTTSForResponseStream(result.text, chatflow.textToSpeech, options, chatId, chatMessage?.id, sseStreamer, signal)
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
|
@ -1064,3 +1142,5 @@ const incrementFailedMetricCounter = (metricsProvider: IMetricsProvider, isInter
|
|||
)
|
||||
}
|
||||
}
|
||||
|
||||
export { shouldAutoPlayTTS, generateTTSForResponseStream }
|
||||
|
|
|
|||
|
|
@ -41,6 +41,8 @@ export const WHITELIST_URLS = [
|
|||
'/api/v1/user/test',
|
||||
'/api/v1/oauth2-credential/callback',
|
||||
'/api/v1/oauth2-credential/refresh',
|
||||
'/api/v1/text-to-speech/generate',
|
||||
'/api/v1/text-to-speech/abort',
|
||||
AzureSSO.LOGIN_URI,
|
||||
AzureSSO.LOGOUT_URI,
|
||||
AzureSSO.CALLBACK_URI,
|
||||
|
|
|
|||
|
|
@ -0,0 +1,16 @@
|
|||
import client from './client'
|
||||
|
||||
const abortTTS = (body) => client.post('/text-to-speech/abort', body)
|
||||
|
||||
const generateVoice = (body) =>
|
||||
client.post('/text-to-speech/generate', body, {
|
||||
responseType: 'arraybuffer'
|
||||
})
|
||||
|
||||
const listVoices = (params) => client.get('/text-to-speech/voices', { params })
|
||||
|
||||
export default {
|
||||
abortTTS,
|
||||
generateVoice,
|
||||
listVoices
|
||||
}
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
|
||||
<rect width="24" height="24" rx="4" fill="#000000"/>
|
||||
<path d="M6 8h3v8H6V8zm5-2h3v12h-3V6zm5 4h3v4h-3v-4z" fill="#ffffff"/>
|
||||
<circle cx="7.5" cy="12" r="1" fill="#00ff88"/>
|
||||
<circle cx="12.5" cy="12" r="1" fill="#00ff88"/>
|
||||
<circle cx="17.5" cy="12" r="1" fill="#00ff88"/>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 383 B |
|
|
@ -4,6 +4,7 @@ import { createPortal } from 'react-dom'
|
|||
import { Box, Dialog, DialogContent, DialogTitle, Tabs, Tab } from '@mui/material'
|
||||
import { tabsClasses } from '@mui/material/Tabs'
|
||||
import SpeechToText from '@/ui-component/extended/SpeechToText'
|
||||
import TextToSpeech from '@/ui-component/extended/TextToSpeech'
|
||||
import Security from '@/ui-component/extended/Security'
|
||||
import ChatFeedback from '@/ui-component/extended/ChatFeedback'
|
||||
import AnalyseFlow from '@/ui-component/extended/AnalyseFlow'
|
||||
|
|
@ -30,6 +31,10 @@ const CHATFLOW_CONFIGURATION_TABS = [
|
|||
label: 'Speech to Text',
|
||||
id: 'speechToText'
|
||||
},
|
||||
{
|
||||
label: 'Text to Speech',
|
||||
id: 'textToSpeech'
|
||||
},
|
||||
{
|
||||
label: 'Chat Feedback',
|
||||
id: 'chatFeedback'
|
||||
|
|
@ -125,18 +130,19 @@ const ChatflowConfigurationDialog = ({ show, isAgentCanvas, dialogProps, onCance
|
|||
alignItems: 'center',
|
||||
mb: 1
|
||||
}}
|
||||
key={index}
|
||||
key={item.id}
|
||||
label={item.label}
|
||||
{...a11yProps(index)}
|
||||
></Tab>
|
||||
))}
|
||||
</Tabs>
|
||||
{filteredTabs.map((item, index) => (
|
||||
<TabPanel key={index} value={tabValue} index={index}>
|
||||
<TabPanel key={item.id} value={tabValue} index={index}>
|
||||
{item.id === 'security' && <Security dialogProps={dialogProps} />}
|
||||
{item.id === 'conversationStarters' ? <StarterPrompts dialogProps={dialogProps} /> : null}
|
||||
{item.id === 'followUpPrompts' ? <FollowUpPrompts dialogProps={dialogProps} /> : null}
|
||||
{item.id === 'speechToText' ? <SpeechToText dialogProps={dialogProps} /> : null}
|
||||
{item.id === 'textToSpeech' ? <TextToSpeech dialogProps={dialogProps} /> : null}
|
||||
{item.id === 'chatFeedback' ? <ChatFeedback dialogProps={dialogProps} /> : null}
|
||||
{item.id === 'analyseChatflow' ? <AnalyseFlow dialogProps={dialogProps} /> : null}
|
||||
{item.id === 'leads' ? <Leads dialogProps={dialogProps} /> : null}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,311 @@
|
|||
import { useRef, useEffect, useState, useCallback } from 'react'
|
||||
import PropTypes from 'prop-types'
|
||||
import { Box, IconButton, CircularProgress } from '@mui/material'
|
||||
import { IconPlayerPlay, IconPlayerPause } from '@tabler/icons-react'
|
||||
import { useTheme } from '@mui/material/styles'
|
||||
|
||||
const AudioWaveform = ({
|
||||
audioSrc,
|
||||
onPlay,
|
||||
onPause,
|
||||
onEnded,
|
||||
isPlaying = false,
|
||||
duration: _duration = 0,
|
||||
isGenerating = false,
|
||||
disabled = false,
|
||||
externalAudioRef = null,
|
||||
resetProgress = false
|
||||
}) => {
|
||||
const canvasRef = useRef(null)
|
||||
const audioRef = useRef(null)
|
||||
const animationRef = useRef(null)
|
||||
const theme = useTheme()
|
||||
|
||||
const [progress, setProgress] = useState(0)
|
||||
const [_audioBuffer, setAudioBuffer] = useState(null)
|
||||
const [waveformData, setWaveformData] = useState([])
|
||||
|
||||
// Generate waveform visualization data
|
||||
const generateWaveform = useCallback((buffer) => {
|
||||
if (!buffer) return []
|
||||
|
||||
const rawData = buffer.getChannelData(0)
|
||||
const samples = 200 // More bars for smoother appearance like reference
|
||||
const blockSize = Math.floor(rawData.length / samples)
|
||||
const filteredData = []
|
||||
|
||||
for (let i = 0; i < samples; i++) {
|
||||
let blockStart = blockSize * i
|
||||
let sum = 0
|
||||
for (let j = 0; j < blockSize; j++) {
|
||||
sum += Math.abs(rawData[blockStart + j])
|
||||
}
|
||||
filteredData.push(sum / blockSize)
|
||||
}
|
||||
|
||||
// Normalize the data
|
||||
const maxValue = Math.max(...filteredData)
|
||||
return filteredData.map((value) => (value / maxValue) * 100)
|
||||
}, [])
|
||||
|
||||
// Generate realistic placeholder waveform like in reference
|
||||
const generatePlaceholderWaveform = useCallback(() => {
|
||||
const samples = 200
|
||||
const waveform = []
|
||||
|
||||
for (let i = 0; i < samples; i++) {
|
||||
// Create a more realistic waveform pattern
|
||||
const position = i / samples
|
||||
const baseHeight = 20 + Math.sin(position * Math.PI * 4) * 15
|
||||
const variation = Math.random() * 40 + 10
|
||||
const envelope = Math.sin(position * Math.PI) * 0.8 + 0.2
|
||||
|
||||
waveform.push((baseHeight + variation) * envelope)
|
||||
}
|
||||
|
||||
return waveform
|
||||
}, [])
|
||||
|
||||
// Draw waveform on canvas
|
||||
const drawWaveform = useCallback(() => {
|
||||
const canvas = canvasRef.current
|
||||
if (!canvas || waveformData.length === 0) return
|
||||
|
||||
const ctx = canvas.getContext('2d')
|
||||
|
||||
// Handle high DPI displays for crisp rendering
|
||||
const dpr = window.devicePixelRatio || 1
|
||||
const rect = canvas.getBoundingClientRect()
|
||||
|
||||
canvas.width = rect.width * dpr
|
||||
canvas.height = rect.height * dpr
|
||||
ctx.scale(dpr, dpr)
|
||||
|
||||
canvas.style.width = rect.width + 'px'
|
||||
canvas.style.height = rect.height + 'px'
|
||||
|
||||
ctx.clearRect(0, 0, rect.width, rect.height)
|
||||
|
||||
// More bars for smoother appearance like the reference
|
||||
const totalBars = waveformData.length
|
||||
const barWidth = 2 // Fixed thin bar width like in reference
|
||||
const barSpacing = 1 // Small gap between bars
|
||||
const totalWidth = rect.width
|
||||
const startX = (totalWidth - totalBars * (barWidth + barSpacing)) / 2
|
||||
const centerY = rect.height / 2
|
||||
|
||||
waveformData.forEach((value, index) => {
|
||||
const barHeight = Math.max(2, (value / 100) * (rect.height * 0.8))
|
||||
const x = startX + index * (barWidth + barSpacing)
|
||||
|
||||
// Determine color based on playback progress
|
||||
const progressIndex = Math.floor((progress / 100) * waveformData.length)
|
||||
const isPlayed = index <= progressIndex
|
||||
|
||||
ctx.fillStyle = isPlayed ? theme.palette.primary.main : theme.palette.mode === 'dark' ? '#444' : '#ccc'
|
||||
|
||||
// Draw thin vertical bars like in reference
|
||||
ctx.fillRect(x, centerY - barHeight / 2, barWidth, barHeight)
|
||||
})
|
||||
}, [waveformData, progress, theme])
|
||||
|
||||
// Load and decode audio for waveform generation
|
||||
useEffect(() => {
|
||||
if (audioSrc && audioSrc.startsWith('blob:')) {
|
||||
const loadAudioBuffer = async () => {
|
||||
try {
|
||||
const response = await fetch(audioSrc)
|
||||
const arrayBuffer = await response.arrayBuffer()
|
||||
const audioContext = new (window.AudioContext || window.webkitAudioContext)()
|
||||
const buffer = await audioContext.decodeAudioData(arrayBuffer)
|
||||
setAudioBuffer(buffer)
|
||||
const waveform = generateWaveform(buffer)
|
||||
setWaveformData(waveform)
|
||||
} catch (error) {
|
||||
console.error('Error loading audio buffer:', error)
|
||||
// Generate placeholder waveform
|
||||
const placeholder = generatePlaceholderWaveform()
|
||||
setWaveformData(placeholder)
|
||||
}
|
||||
}
|
||||
loadAudioBuffer()
|
||||
} else {
|
||||
// Always show placeholder waveform when no audio source
|
||||
const placeholder = generatePlaceholderWaveform()
|
||||
setWaveformData(placeholder)
|
||||
}
|
||||
}, [audioSrc, generateWaveform, generatePlaceholderWaveform])
|
||||
|
||||
// Reset progress when resetProgress prop is true
|
||||
useEffect(() => {
|
||||
if (resetProgress) {
|
||||
setProgress(0)
|
||||
}
|
||||
}, [resetProgress])
|
||||
|
||||
// Draw waveform when data changes or progress updates
|
||||
useEffect(() => {
|
||||
drawWaveform()
|
||||
}, [drawWaveform, progress])
|
||||
|
||||
// Update progress during playback
|
||||
useEffect(() => {
|
||||
const activeAudioRef = externalAudioRef || audioRef.current
|
||||
if (isPlaying && activeAudioRef && audioSrc) {
|
||||
const updateProgress = () => {
|
||||
const audio = externalAudioRef || audioRef.current
|
||||
if (audio && audio.duration && !isNaN(audio.duration)) {
|
||||
const currentProgress = (audio.currentTime / audio.duration) * 100
|
||||
setProgress(currentProgress)
|
||||
}
|
||||
if (isPlaying && audio && !audio.paused) {
|
||||
animationRef.current = requestAnimationFrame(updateProgress)
|
||||
}
|
||||
}
|
||||
|
||||
// Start the update loop
|
||||
animationRef.current = requestAnimationFrame(updateProgress)
|
||||
} else {
|
||||
if (animationRef.current) {
|
||||
cancelAnimationFrame(animationRef.current)
|
||||
}
|
||||
}
|
||||
|
||||
return () => {
|
||||
if (animationRef.current) {
|
||||
cancelAnimationFrame(animationRef.current)
|
||||
}
|
||||
}
|
||||
}, [isPlaying, audioSrc, externalAudioRef])
|
||||
|
||||
const handlePlayPause = () => {
|
||||
if (isPlaying) {
|
||||
onPause?.()
|
||||
} else {
|
||||
onPlay?.()
|
||||
}
|
||||
}
|
||||
|
||||
// Handle canvas click for seeking
|
||||
const handleCanvasClick = (event) => {
|
||||
const activeAudio = externalAudioRef || audioRef.current
|
||||
if (!activeAudio || !activeAudio.duration || disabled || isGenerating) return
|
||||
|
||||
const canvas = canvasRef.current
|
||||
const rect = canvas.getBoundingClientRect()
|
||||
const clickX = event.clientX - rect.left
|
||||
|
||||
// Use the actual canvas display width for more accurate clicking
|
||||
const clickProgress = Math.max(0, Math.min(100, (clickX / rect.width) * 100))
|
||||
const seekTime = (clickProgress / 100) * activeAudio.duration
|
||||
|
||||
activeAudio.currentTime = seekTime
|
||||
setProgress(clickProgress)
|
||||
}
|
||||
|
||||
return (
|
||||
<Box sx={{ width: '100%' }}>
|
||||
{/* Hidden audio element for duration and seeking - only if no external ref */}
|
||||
{audioSrc && !externalAudioRef && (
|
||||
<audio
|
||||
ref={audioRef}
|
||||
src={audioSrc}
|
||||
onLoadedMetadata={() => {
|
||||
if (audioRef.current) {
|
||||
setProgress(0)
|
||||
}
|
||||
}}
|
||||
onTimeUpdate={() => {
|
||||
// Additional progress update on timeupdate event
|
||||
const audio = audioRef.current
|
||||
if (audio && audio.duration && !isNaN(audio.duration)) {
|
||||
const currentProgress = (audio.currentTime / audio.duration) * 100
|
||||
setProgress(currentProgress)
|
||||
}
|
||||
}}
|
||||
onEnded={() => {
|
||||
setProgress(0)
|
||||
onEnded?.()
|
||||
}}
|
||||
style={{ display: 'none' }}
|
||||
>
|
||||
<track kind='captions' />
|
||||
</audio>
|
||||
)}
|
||||
|
||||
{/* Play button and Waveform side by side */}
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
{/* Play/Pause Button */}
|
||||
<IconButton
|
||||
onClick={handlePlayPause}
|
||||
disabled={disabled || isGenerating}
|
||||
size='small'
|
||||
sx={{
|
||||
width: 32,
|
||||
height: 32,
|
||||
flexShrink: 0,
|
||||
backgroundColor: isPlaying ? 'transparent' : theme.palette.primary.main,
|
||||
color: isPlaying ? theme.palette.primary.main : 'white',
|
||||
border: isPlaying ? `1px solid ${theme.palette.primary.main}` : 'none',
|
||||
'&:hover': {
|
||||
backgroundColor: isPlaying ? theme.palette.primary.main : theme.palette.primary.dark,
|
||||
color: 'white'
|
||||
},
|
||||
'&:disabled': {
|
||||
backgroundColor: theme.palette.action.disabled,
|
||||
color: theme.palette.action.disabled,
|
||||
border: 'none'
|
||||
}
|
||||
}}
|
||||
>
|
||||
{isGenerating ? (
|
||||
<CircularProgress size={16} />
|
||||
) : isPlaying ? (
|
||||
<IconPlayerPause size={16} />
|
||||
) : (
|
||||
<IconPlayerPlay size={16} />
|
||||
)}
|
||||
</IconButton>
|
||||
|
||||
{/* Waveform Canvas */}
|
||||
<Box
|
||||
sx={{
|
||||
flex: 1,
|
||||
cursor: !disabled && !isGenerating && audioSrc ? 'pointer' : 'default',
|
||||
display: 'flex',
|
||||
alignItems: 'center'
|
||||
}}
|
||||
>
|
||||
<canvas
|
||||
ref={canvasRef}
|
||||
width={400}
|
||||
height={32}
|
||||
onClick={handleCanvasClick}
|
||||
style={{
|
||||
width: '100%',
|
||||
height: '32px',
|
||||
backgroundColor: 'transparent',
|
||||
opacity: disabled ? 0.6 : 1,
|
||||
display: 'block'
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
)
|
||||
}
|
||||
|
||||
AudioWaveform.propTypes = {
|
||||
audioSrc: PropTypes.string,
|
||||
onPlay: PropTypes.func,
|
||||
onPause: PropTypes.func,
|
||||
onEnded: PropTypes.func,
|
||||
isPlaying: PropTypes.bool,
|
||||
duration: PropTypes.number,
|
||||
isGenerating: PropTypes.bool,
|
||||
disabled: PropTypes.bool,
|
||||
externalAudioRef: PropTypes.object,
|
||||
resetProgress: PropTypes.bool
|
||||
}
|
||||
|
||||
export default AudioWaveform
|
||||
|
|
@ -402,7 +402,15 @@ const SpeechToText = ({ dialogProps }) => {
|
|||
sx={{ ml: 1 }}
|
||||
primary={speechToTextProviders[selectedProvider].label}
|
||||
secondary={
|
||||
<a target='_blank' rel='noreferrer' href={speechToTextProviders[selectedProvider].url}>
|
||||
<a
|
||||
target='_blank'
|
||||
rel='noreferrer'
|
||||
href={speechToTextProviders[selectedProvider].url}
|
||||
style={{
|
||||
color: theme?.customization?.isDarkMode ? '#90caf9' : '#1976d2',
|
||||
textDecoration: 'underline'
|
||||
}}
|
||||
>
|
||||
{speechToTextProviders[selectedProvider].url}
|
||||
</a>
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,660 @@
|
|||
import { useDispatch } from 'react-redux'
|
||||
import { useState, useEffect } from 'react'
|
||||
import PropTypes from 'prop-types'
|
||||
import { enqueueSnackbar as enqueueSnackbarAction, closeSnackbar as closeSnackbarAction, SET_CHATFLOW } from '@/store/actions'
|
||||
|
||||
// material-ui
|
||||
import {
|
||||
Typography,
|
||||
Box,
|
||||
Button,
|
||||
FormControl,
|
||||
ListItem,
|
||||
ListItemAvatar,
|
||||
ListItemText,
|
||||
MenuItem,
|
||||
Select,
|
||||
CircularProgress,
|
||||
Autocomplete,
|
||||
TextField
|
||||
} from '@mui/material'
|
||||
import { IconX, IconVolume } from '@tabler/icons-react'
|
||||
import { useTheme } from '@mui/material/styles'
|
||||
|
||||
// Project import
|
||||
import CredentialInputHandler from '@/views/canvas/CredentialInputHandler'
|
||||
import { TooltipWithParser } from '@/ui-component/tooltip/TooltipWithParser'
|
||||
import { SwitchInput } from '@/ui-component/switch/Switch'
|
||||
import { Input } from '@/ui-component/input/Input'
|
||||
import { StyledButton } from '@/ui-component/button/StyledButton'
|
||||
import { Dropdown } from '@/ui-component/dropdown/Dropdown'
|
||||
import AudioWaveform from '@/ui-component/extended/AudioWaveform'
|
||||
import openAISVG from '@/assets/images/openai.svg'
|
||||
import elevenLabsSVG from '@/assets/images/elevenlabs.svg'
|
||||
|
||||
// store
|
||||
import useNotifier from '@/utils/useNotifier'
|
||||
|
||||
// API
|
||||
import chatflowsApi from '@/api/chatflows'
|
||||
import ttsApi from '@/api/tts'
|
||||
|
||||
const TextToSpeechType = {
|
||||
OPENAI_TTS: 'openai',
|
||||
ELEVEN_LABS_TTS: 'elevenlabs'
|
||||
}
|
||||
|
||||
// Weird quirk - the key must match the name property value.
|
||||
const textToSpeechProviders = {
|
||||
[TextToSpeechType.OPENAI_TTS]: {
|
||||
label: 'OpenAI TTS',
|
||||
name: TextToSpeechType.OPENAI_TTS,
|
||||
icon: openAISVG,
|
||||
url: 'https://platform.openai.com/docs/guides/text-to-speech',
|
||||
inputs: [
|
||||
{
|
||||
label: 'Connect Credential',
|
||||
name: 'credential',
|
||||
type: 'credential',
|
||||
credentialNames: ['openAIApi']
|
||||
},
|
||||
{
|
||||
label: 'Voice',
|
||||
name: 'voice',
|
||||
type: 'voice_select',
|
||||
description: 'The voice to use when generating the audio',
|
||||
default: 'alloy',
|
||||
optional: true
|
||||
}
|
||||
]
|
||||
},
|
||||
[TextToSpeechType.ELEVEN_LABS_TTS]: {
|
||||
label: 'Eleven Labs TTS',
|
||||
name: TextToSpeechType.ELEVEN_LABS_TTS,
|
||||
icon: elevenLabsSVG,
|
||||
url: 'https://elevenlabs.io/',
|
||||
inputs: [
|
||||
{
|
||||
label: 'Connect Credential',
|
||||
name: 'credential',
|
||||
type: 'credential',
|
||||
credentialNames: ['elevenLabsApi']
|
||||
},
|
||||
{
|
||||
label: 'Voice',
|
||||
name: 'voice',
|
||||
type: 'voice_select',
|
||||
description: 'The voice to use for text-to-speech',
|
||||
default: '21m00Tcm4TlvDq8ikWAM',
|
||||
optional: true
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
const TextToSpeech = ({ dialogProps }) => {
|
||||
const dispatch = useDispatch()
|
||||
|
||||
useNotifier()
|
||||
const theme = useTheme()
|
||||
|
||||
const enqueueSnackbar = (...args) => dispatch(enqueueSnackbarAction(...args))
|
||||
const closeSnackbar = (...args) => dispatch(closeSnackbarAction(...args))
|
||||
|
||||
const [textToSpeech, setTextToSpeech] = useState(null)
|
||||
const [selectedProvider, setSelectedProvider] = useState('none')
|
||||
const [voices, setVoices] = useState([])
|
||||
const [loadingVoices, setLoadingVoices] = useState(false)
|
||||
const [testAudioSrc, setTestAudioSrc] = useState(null)
|
||||
const [isTestPlaying, setIsTestPlaying] = useState(false)
|
||||
const [testAudioRef, setTestAudioRef] = useState(null)
|
||||
const [isGeneratingTest, setIsGeneratingTest] = useState(false)
|
||||
const [resetWaveform, setResetWaveform] = useState(false)
|
||||
|
||||
const resetTestAudio = () => {
|
||||
if (testAudioSrc) {
|
||||
URL.revokeObjectURL(testAudioSrc)
|
||||
setTestAudioSrc(null)
|
||||
}
|
||||
setIsTestPlaying(false)
|
||||
setResetWaveform(true)
|
||||
setTimeout(() => setResetWaveform(false), 100)
|
||||
}
|
||||
|
||||
const onSave = async () => {
|
||||
const textToSpeechConfig = setValue(true, selectedProvider, 'status')
|
||||
try {
|
||||
const saveResp = await chatflowsApi.updateChatflow(dialogProps.chatflow.id, {
|
||||
textToSpeech: JSON.stringify(textToSpeechConfig)
|
||||
})
|
||||
if (saveResp.data) {
|
||||
enqueueSnackbar({
|
||||
message: 'Text To Speech Configuration Saved',
|
||||
options: {
|
||||
key: Date.now() + Math.random(),
|
||||
variant: 'success',
|
||||
action: (key) => (
|
||||
<Button style={{ color: 'white' }} onClick={() => closeSnackbar(key)}>
|
||||
<IconX />
|
||||
</Button>
|
||||
)
|
||||
}
|
||||
})
|
||||
dispatch({ type: SET_CHATFLOW, chatflow: saveResp.data })
|
||||
}
|
||||
} catch (error) {
|
||||
enqueueSnackbar({
|
||||
message: `Failed to save Text To Speech Configuration: ${
|
||||
typeof error.response.data === 'object' ? error.response.data.message : error.response.data
|
||||
}`,
|
||||
options: {
|
||||
key: Date.now() + Math.random(),
|
||||
variant: 'error',
|
||||
persist: true,
|
||||
action: (key) => (
|
||||
<Button style={{ color: 'white' }} onClick={() => closeSnackbar(key)}>
|
||||
<IconX />
|
||||
</Button>
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
const setValue = (value, providerName, inputParamName) => {
|
||||
let newVal = {}
|
||||
if (!textToSpeech || !Object.hasOwn(textToSpeech, providerName)) {
|
||||
newVal = { ...(textToSpeech || {}), [providerName]: {} }
|
||||
} else {
|
||||
newVal = { ...textToSpeech }
|
||||
}
|
||||
|
||||
newVal[providerName][inputParamName] = value
|
||||
if (inputParamName === 'status' && value === true) {
|
||||
// ensure that the others are turned off
|
||||
Object.keys(textToSpeechProviders).forEach((key) => {
|
||||
const provider = textToSpeechProviders[key]
|
||||
if (provider.name !== providerName) {
|
||||
newVal[provider.name] = { ...(textToSpeech?.[provider.name] || {}), status: false }
|
||||
}
|
||||
})
|
||||
if (providerName !== 'none' && newVal['none']) {
|
||||
newVal['none'].status = false
|
||||
}
|
||||
}
|
||||
|
||||
// Reset test audio when voice or credential is changed
|
||||
if ((inputParamName === 'voice' || inputParamName === 'credentialId') && providerName === selectedProvider) {
|
||||
resetTestAudio()
|
||||
}
|
||||
|
||||
setTextToSpeech(newVal)
|
||||
return newVal
|
||||
}
|
||||
|
||||
const handleProviderChange = (provider, configOverride = null) => {
|
||||
setSelectedProvider(provider)
|
||||
setVoices([])
|
||||
resetTestAudio()
|
||||
|
||||
if (provider !== 'none') {
|
||||
const config = configOverride || textToSpeech
|
||||
const credentialId = config?.[provider]?.credentialId
|
||||
if (credentialId) {
|
||||
loadVoicesForProvider(provider, credentialId)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const loadVoicesForProvider = async (provider, credentialId) => {
|
||||
if (provider === 'none' || !credentialId) return
|
||||
|
||||
setLoadingVoices(true)
|
||||
try {
|
||||
const params = new URLSearchParams({ provider })
|
||||
params.append('credentialId', credentialId)
|
||||
|
||||
const response = await ttsApi.listVoices(params)
|
||||
|
||||
if (response.data) {
|
||||
const voicesData = await response.data
|
||||
setVoices(voicesData)
|
||||
} else {
|
||||
setVoices([])
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error loading voices:', error)
|
||||
setVoices([])
|
||||
} finally {
|
||||
setLoadingVoices(false)
|
||||
}
|
||||
}
|
||||
|
||||
const testTTS = async () => {
|
||||
if (selectedProvider === 'none' || !textToSpeech?.[selectedProvider]?.credentialId) {
|
||||
enqueueSnackbar({
|
||||
message: 'Please select a provider and configure credentials first',
|
||||
options: { variant: 'warning' }
|
||||
})
|
||||
return
|
||||
}
|
||||
|
||||
setIsGeneratingTest(true)
|
||||
|
||||
try {
|
||||
const providerConfig = textToSpeech?.[selectedProvider] || {}
|
||||
const body = {
|
||||
text: 'Today is a wonderful day to build something with Flowise!',
|
||||
provider: selectedProvider,
|
||||
credentialId: providerConfig.credentialId,
|
||||
voice: providerConfig.voice,
|
||||
model: providerConfig.model
|
||||
}
|
||||
|
||||
const response = await fetch('/api/v1/text-to-speech/generate', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'x-request-from': 'internal'
|
||||
},
|
||||
credentials: 'include',
|
||||
body: JSON.stringify(body)
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP error! status: ${response.status}`)
|
||||
}
|
||||
|
||||
const audioChunks = []
|
||||
const reader = response.body.getReader()
|
||||
let buffer = ''
|
||||
|
||||
let done = false
|
||||
while (!done) {
|
||||
const result = await reader.read()
|
||||
done = result.done
|
||||
if (done) break
|
||||
|
||||
const chunk = new TextDecoder().decode(result.value, { stream: true })
|
||||
buffer += chunk
|
||||
const lines = buffer.split('\n\n')
|
||||
buffer = lines.pop() || ''
|
||||
|
||||
for (const eventBlock of lines) {
|
||||
if (eventBlock.trim()) {
|
||||
const event = parseSSEEvent(eventBlock)
|
||||
if (event && event.event === 'tts_data' && event.data?.audioChunk) {
|
||||
const audioBuffer = Uint8Array.from(atob(event.data.audioChunk), (c) => c.charCodeAt(0))
|
||||
audioChunks.push(audioBuffer)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (audioChunks.length > 0) {
|
||||
// Combine all chunks into a single blob
|
||||
const totalLength = audioChunks.reduce((sum, chunk) => sum + chunk.length, 0)
|
||||
const combinedBuffer = new Uint8Array(totalLength)
|
||||
let offset = 0
|
||||
|
||||
for (const chunk of audioChunks) {
|
||||
combinedBuffer.set(chunk, offset)
|
||||
offset += chunk.length
|
||||
}
|
||||
|
||||
const audioBlob = new Blob([combinedBuffer], { type: 'audio/mpeg' })
|
||||
const audioUrl = URL.createObjectURL(audioBlob)
|
||||
|
||||
// Clean up previous audio
|
||||
if (testAudioSrc) {
|
||||
URL.revokeObjectURL(testAudioSrc)
|
||||
}
|
||||
|
||||
setTestAudioSrc(audioUrl)
|
||||
} else {
|
||||
throw new Error('No audio data received')
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error testing TTS:', error)
|
||||
enqueueSnackbar({
|
||||
message: `TTS test failed: ${error.message}`,
|
||||
options: { variant: 'error' }
|
||||
})
|
||||
} finally {
|
||||
setIsGeneratingTest(false)
|
||||
}
|
||||
}
|
||||
|
||||
const parseSSEEvent = (eventBlock) => {
|
||||
const lines = eventBlock.trim().split('\n')
|
||||
const event = { event: null, data: null }
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('event:')) {
|
||||
event.event = line.substring(6).trim()
|
||||
} else if (line.startsWith('data:')) {
|
||||
const dataStr = line.substring(5).trim()
|
||||
try {
|
||||
const parsed = JSON.parse(dataStr)
|
||||
if (parsed.data) {
|
||||
event.data = parsed.data
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Error parsing SSE data:', e)
|
||||
}
|
||||
}
|
||||
}
|
||||
return event.event ? event : null
|
||||
}
|
||||
|
||||
// Audio control functions for waveform component
|
||||
const handleTestPlay = async () => {
|
||||
// If audio already exists, just play it
|
||||
if (testAudioRef && testAudioSrc) {
|
||||
testAudioRef.play()
|
||||
setIsTestPlaying(true)
|
||||
return
|
||||
}
|
||||
|
||||
// If no audio exists, generate it first
|
||||
if (!testAudioSrc) {
|
||||
await testTTS()
|
||||
// testTTS will set the audio source, and we'll play it in the next useEffect
|
||||
}
|
||||
}
|
||||
|
||||
const handleTestPause = () => {
|
||||
if (testAudioRef) {
|
||||
testAudioRef.pause()
|
||||
setIsTestPlaying(false)
|
||||
}
|
||||
}
|
||||
|
||||
const handleTestEnded = () => {
|
||||
setIsTestPlaying(false)
|
||||
}
|
||||
|
||||
// Auto-play when audio is generated (if user clicked play)
|
||||
useEffect(() => {
|
||||
if (testAudioSrc && testAudioRef && !isTestPlaying) {
|
||||
// Small delay to ensure audio element is ready
|
||||
setTimeout(() => {
|
||||
testAudioRef.play()
|
||||
setIsTestPlaying(true)
|
||||
}, 100)
|
||||
}
|
||||
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [testAudioSrc, testAudioRef])
|
||||
|
||||
useEffect(() => {
|
||||
if (dialogProps.chatflow && dialogProps.chatflow.textToSpeech) {
|
||||
try {
|
||||
const textToSpeechConfig = JSON.parse(dialogProps.chatflow.textToSpeech)
|
||||
let selectedProvider = 'none'
|
||||
Object.keys(textToSpeechProviders).forEach((key) => {
|
||||
const providerConfig = textToSpeechConfig[key]
|
||||
if (providerConfig && providerConfig.status) {
|
||||
selectedProvider = key
|
||||
}
|
||||
})
|
||||
setSelectedProvider(selectedProvider)
|
||||
setTextToSpeech(textToSpeechConfig)
|
||||
handleProviderChange(selectedProvider, textToSpeechConfig)
|
||||
} catch {
|
||||
setTextToSpeech(null)
|
||||
setSelectedProvider('none')
|
||||
}
|
||||
}
|
||||
|
||||
return () => {
|
||||
setTextToSpeech(null)
|
||||
setSelectedProvider('none')
|
||||
setVoices([])
|
||||
resetTestAudio()
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [dialogProps])
|
||||
|
||||
return (
|
||||
<>
|
||||
<Box fullWidth sx={{ mb: 1, display: 'flex', flexDirection: 'column', gap: 1 }}>
|
||||
<Typography>Providers</Typography>
|
||||
<FormControl fullWidth>
|
||||
<Select
|
||||
size='small'
|
||||
value={selectedProvider}
|
||||
onChange={(event) => handleProviderChange(event.target.value)}
|
||||
sx={{
|
||||
'& .MuiSvgIcon-root': {
|
||||
color: theme?.customization?.isDarkMode ? '#fff' : 'inherit'
|
||||
}
|
||||
}}
|
||||
>
|
||||
<MenuItem value='none'>None</MenuItem>
|
||||
{Object.values(textToSpeechProviders).map((provider) => (
|
||||
<MenuItem key={provider.name} value={provider.name}>
|
||||
{provider.label}
|
||||
</MenuItem>
|
||||
))}
|
||||
</Select>
|
||||
</FormControl>
|
||||
</Box>
|
||||
{selectedProvider !== 'none' && (
|
||||
<>
|
||||
<ListItem sx={{ mt: 3 }} alignItems='center'>
|
||||
<ListItemAvatar>
|
||||
<div
|
||||
style={{
|
||||
width: 50,
|
||||
height: 50,
|
||||
borderRadius: '50%',
|
||||
backgroundColor: 'white',
|
||||
flexShrink: 0,
|
||||
display: 'flex',
|
||||
alignItems: 'center',
|
||||
justifyContent: 'center'
|
||||
}}
|
||||
>
|
||||
<img
|
||||
style={{
|
||||
width: '100%',
|
||||
height: '100%',
|
||||
padding: 10,
|
||||
objectFit: 'contain'
|
||||
}}
|
||||
alt='TTS Provider'
|
||||
src={textToSpeechProviders[selectedProvider].icon}
|
||||
/>
|
||||
</div>
|
||||
</ListItemAvatar>
|
||||
<ListItemText
|
||||
sx={{ ml: 1 }}
|
||||
primary={textToSpeechProviders[selectedProvider].label}
|
||||
secondary={
|
||||
<a
|
||||
target='_blank'
|
||||
rel='noreferrer'
|
||||
href={textToSpeechProviders[selectedProvider].url}
|
||||
style={{
|
||||
color: theme?.customization?.isDarkMode ? '#90caf9' : '#1976d2',
|
||||
textDecoration: 'underline'
|
||||
}}
|
||||
>
|
||||
{textToSpeechProviders[selectedProvider].url}
|
||||
</a>
|
||||
}
|
||||
/>
|
||||
</ListItem>
|
||||
{textToSpeechProviders[selectedProvider].inputs.map((inputParam) => (
|
||||
<Box key={`${selectedProvider}-${inputParam.name}`} sx={{ p: 2 }}>
|
||||
<div style={{ display: 'flex', flexDirection: 'row' }}>
|
||||
<Typography>
|
||||
{inputParam.label}
|
||||
{!inputParam.optional && <span style={{ color: 'red' }}> *</span>}
|
||||
{inputParam.description && (
|
||||
<TooltipWithParser style={{ marginLeft: 10 }} title={inputParam.description} />
|
||||
)}
|
||||
</Typography>
|
||||
</div>
|
||||
{inputParam.type === 'credential' && (
|
||||
<CredentialInputHandler
|
||||
key={textToSpeech?.[selectedProvider]?.credentialId}
|
||||
data={
|
||||
textToSpeech?.[selectedProvider]?.credentialId
|
||||
? { credential: textToSpeech?.[selectedProvider]?.credentialId }
|
||||
: {}
|
||||
}
|
||||
inputParam={inputParam}
|
||||
onSelect={(newValue) => {
|
||||
setValue(newValue, selectedProvider, 'credentialId')
|
||||
// Load voices when credential is updated
|
||||
if (newValue && selectedProvider !== 'none') {
|
||||
setTimeout(() => loadVoicesForProvider(selectedProvider, newValue), 100)
|
||||
}
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
{inputParam.type === 'boolean' && (
|
||||
<SwitchInput
|
||||
onChange={(newValue) => setValue(newValue, selectedProvider, inputParam.name)}
|
||||
value={
|
||||
textToSpeech?.[selectedProvider]
|
||||
? textToSpeech[selectedProvider][inputParam.name]
|
||||
: inputParam.default ?? false
|
||||
}
|
||||
/>
|
||||
)}
|
||||
{(inputParam.type === 'string' || inputParam.type === 'password' || inputParam.type === 'number') && (
|
||||
<Input
|
||||
inputParam={inputParam}
|
||||
onChange={(newValue) => setValue(newValue, selectedProvider, inputParam.name)}
|
||||
value={
|
||||
textToSpeech?.[selectedProvider]
|
||||
? textToSpeech[selectedProvider][inputParam.name]
|
||||
: inputParam.default ?? ''
|
||||
}
|
||||
/>
|
||||
)}
|
||||
{inputParam.type === 'options' && (
|
||||
<Dropdown
|
||||
name={inputParam.name}
|
||||
options={inputParam.options}
|
||||
onSelect={(newValue) => setValue(newValue, selectedProvider, inputParam.name)}
|
||||
value={
|
||||
textToSpeech?.[selectedProvider]
|
||||
? textToSpeech[selectedProvider][inputParam.name]
|
||||
: inputParam.default ?? 'choose an option'
|
||||
}
|
||||
/>
|
||||
)}
|
||||
{inputParam.type === 'voice_select' && (
|
||||
<Autocomplete
|
||||
size='small'
|
||||
sx={{ mt: 1 }}
|
||||
options={voices}
|
||||
loading={loadingVoices}
|
||||
getOptionLabel={(option) => option.name || ''}
|
||||
value={
|
||||
voices.find(
|
||||
(voice) =>
|
||||
voice.id === (textToSpeech?.[selectedProvider]?.[inputParam.name] || inputParam.default)
|
||||
) || null
|
||||
}
|
||||
onChange={(event, newValue) => {
|
||||
setValue(newValue ? newValue.id : '', selectedProvider, inputParam.name)
|
||||
}}
|
||||
renderInput={(params) => (
|
||||
<TextField
|
||||
{...params}
|
||||
placeholder={loadingVoices ? 'Loading voices...' : 'Choose a voice'}
|
||||
InputProps={{
|
||||
...params.InputProps,
|
||||
endAdornment: (
|
||||
<>
|
||||
{loadingVoices ? <CircularProgress color='inherit' size={20} /> : null}
|
||||
{params.InputProps.endAdornment}
|
||||
</>
|
||||
)
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
disabled={loadingVoices || !textToSpeech?.[selectedProvider]?.credentialId}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
))}
|
||||
|
||||
{/* Auto-play Toggle */}
|
||||
<Box sx={{ p: 2 }}>
|
||||
<div style={{ display: 'flex', flexDirection: 'row', alignItems: 'center' }}>
|
||||
<Typography>
|
||||
Automatically play audio
|
||||
<TooltipWithParser
|
||||
style={{ marginLeft: 10 }}
|
||||
title='When enabled, bot responses will be automatically converted to speech and played'
|
||||
/>
|
||||
</Typography>
|
||||
</div>
|
||||
<SwitchInput
|
||||
onChange={(newValue) => setValue(newValue, selectedProvider, 'autoPlay')}
|
||||
value={textToSpeech?.[selectedProvider] ? textToSpeech[selectedProvider].autoPlay ?? false : false}
|
||||
/>
|
||||
</Box>
|
||||
|
||||
{/* Test Voice Section */}
|
||||
<Box sx={{ p: 2 }}>
|
||||
<Typography variant='h6' sx={{ mb: 2, display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
<IconVolume size={20} />
|
||||
Test Voice
|
||||
</Typography>
|
||||
|
||||
<Typography variant='body2' color='textSecondary' sx={{ mb: 2 }}>
|
||||
Test text: "Today is a wonderful day to build something with Flowise!"
|
||||
</Typography>
|
||||
|
||||
<AudioWaveform
|
||||
audioSrc={testAudioSrc}
|
||||
onPlay={handleTestPlay}
|
||||
onPause={handleTestPause}
|
||||
onEnded={handleTestEnded}
|
||||
isPlaying={isTestPlaying}
|
||||
isGenerating={isGeneratingTest}
|
||||
disabled={!textToSpeech?.[selectedProvider]?.credentialId}
|
||||
externalAudioRef={testAudioRef}
|
||||
resetProgress={resetWaveform}
|
||||
/>
|
||||
|
||||
{/* Hidden audio element for waveform control */}
|
||||
{testAudioSrc && (
|
||||
<audio
|
||||
ref={(ref) => setTestAudioRef(ref)}
|
||||
src={testAudioSrc}
|
||||
onPlay={() => setIsTestPlaying(true)}
|
||||
onPause={() => setIsTestPlaying(false)}
|
||||
onEnded={handleTestEnded}
|
||||
style={{ display: 'none' }}
|
||||
>
|
||||
<track kind='captions' />
|
||||
</audio>
|
||||
)}
|
||||
</Box>
|
||||
</>
|
||||
)}
|
||||
<StyledButton
|
||||
style={{ marginBottom: 10, marginTop: 10 }}
|
||||
disabled={selectedProvider !== 'none' && !textToSpeech?.[selectedProvider]?.credentialId}
|
||||
variant='contained'
|
||||
onClick={onSave}
|
||||
>
|
||||
Save
|
||||
</StyledButton>
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
TextToSpeech.propTypes = {
|
||||
dialogProps: PropTypes.object
|
||||
}
|
||||
|
||||
export default TextToSpeech
|
||||
|
|
@ -38,7 +38,8 @@ import {
|
|||
IconSquareFilled,
|
||||
IconCheck,
|
||||
IconPaperclip,
|
||||
IconSparkles
|
||||
IconSparkles,
|
||||
IconVolume
|
||||
} from '@tabler/icons-react'
|
||||
import robotPNG from '@/assets/images/robot.png'
|
||||
import userPNG from '@/assets/images/account.png'
|
||||
|
|
@ -72,6 +73,7 @@ import attachmentsApi from '@/api/attachments'
|
|||
import chatmessagefeedbackApi from '@/api/chatmessagefeedback'
|
||||
import leadsApi from '@/api/lead'
|
||||
import executionsApi from '@/api/executions'
|
||||
import ttsApi from '@/api/tts'
|
||||
|
||||
// Hooks
|
||||
import useApi from '@/hooks/useApi'
|
||||
|
|
@ -251,6 +253,27 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
|
||||
const [isConfigLoading, setIsConfigLoading] = useState(true)
|
||||
|
||||
// TTS state
|
||||
const [isTTSLoading, setIsTTSLoading] = useState({})
|
||||
const [isTTSPlaying, setIsTTSPlaying] = useState({})
|
||||
const [ttsAudio, setTtsAudio] = useState({})
|
||||
const [isTTSEnabled, setIsTTSEnabled] = useState(false)
|
||||
|
||||
// TTS streaming state
|
||||
const [ttsStreamingState, setTtsStreamingState] = useState({
|
||||
mediaSource: null,
|
||||
sourceBuffer: null,
|
||||
audio: null,
|
||||
chunkQueue: [],
|
||||
isBuffering: false,
|
||||
audioFormat: null,
|
||||
abortController: null
|
||||
})
|
||||
|
||||
// Ref to prevent auto-scroll during TTS actions (using ref to avoid re-renders)
|
||||
const isTTSActionRef = useRef(false)
|
||||
const ttsTimeoutRef = useRef(null)
|
||||
|
||||
const isFileAllowedForUpload = (file) => {
|
||||
const constraints = getAllowChatFlowUploads.data
|
||||
/**
|
||||
|
|
@ -463,7 +486,12 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
const handleAbort = async () => {
|
||||
setIsMessageStopping(true)
|
||||
try {
|
||||
// Stop all TTS streams first
|
||||
await handleTTSAbortAll()
|
||||
stopAllTTS()
|
||||
|
||||
await chatmessageApi.abortMessage(chatflowid, chatId)
|
||||
setIsMessageStopping(false)
|
||||
} catch (error) {
|
||||
setIsMessageStopping(false)
|
||||
enqueueSnackbar({
|
||||
|
|
@ -536,6 +564,22 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
}
|
||||
}
|
||||
|
||||
// Helper function to manage TTS action flag
|
||||
const setTTSAction = (isActive) => {
|
||||
isTTSActionRef.current = isActive
|
||||
if (ttsTimeoutRef.current) {
|
||||
clearTimeout(ttsTimeoutRef.current)
|
||||
ttsTimeoutRef.current = null
|
||||
}
|
||||
if (isActive) {
|
||||
// Reset the flag after a longer delay to ensure all state changes are complete
|
||||
ttsTimeoutRef.current = setTimeout(() => {
|
||||
isTTSActionRef.current = false
|
||||
ttsTimeoutRef.current = null
|
||||
}, 300)
|
||||
}
|
||||
}
|
||||
|
||||
const onChange = useCallback((e) => setUserInput(e.target.value), [setUserInput])
|
||||
|
||||
const updateLastMessage = (text) => {
|
||||
|
|
@ -949,6 +993,7 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
setLoading(false)
|
||||
setUserInput('')
|
||||
setUploadedFiles([])
|
||||
|
||||
setTimeout(() => {
|
||||
inputRef.current?.focus()
|
||||
scrollToBottom()
|
||||
|
|
@ -1027,6 +1072,18 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
abortMessage(payload.data)
|
||||
closeResponse()
|
||||
break
|
||||
case 'tts_start':
|
||||
handleTTSStart(payload.data)
|
||||
break
|
||||
case 'tts_data':
|
||||
handleTTSDataChunk(payload.data.audioChunk)
|
||||
break
|
||||
case 'tts_end':
|
||||
handleTTSEnd()
|
||||
break
|
||||
case 'tts_abort':
|
||||
handleTTSAbort(payload.data)
|
||||
break
|
||||
case 'end':
|
||||
setLocalStorageChatflow(chatflowid, chatId)
|
||||
closeResponse()
|
||||
|
|
@ -1293,6 +1350,30 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Check if TTS is configured
|
||||
if (getChatflowConfig.data && getChatflowConfig.data.textToSpeech) {
|
||||
try {
|
||||
const ttsConfig =
|
||||
typeof getChatflowConfig.data.textToSpeech === 'string'
|
||||
? JSON.parse(getChatflowConfig.data.textToSpeech)
|
||||
: getChatflowConfig.data.textToSpeech
|
||||
|
||||
let isEnabled = false
|
||||
if (ttsConfig) {
|
||||
Object.keys(ttsConfig).forEach((provider) => {
|
||||
if (provider !== 'none' && ttsConfig?.[provider]?.status) {
|
||||
isEnabled = true
|
||||
}
|
||||
})
|
||||
}
|
||||
setIsTTSEnabled(isEnabled)
|
||||
} catch (error) {
|
||||
setIsTTSEnabled(false)
|
||||
}
|
||||
} else {
|
||||
setIsTTSEnabled(false)
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [getChatflowConfig.data])
|
||||
|
||||
|
|
@ -1313,9 +1394,11 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
}
|
||||
}, [isChatFlowAvailableForRAGFileUploads, fullFileUpload])
|
||||
|
||||
// Auto scroll chat to bottom
|
||||
// Auto scroll chat to bottom (but not during TTS actions)
|
||||
useEffect(() => {
|
||||
scrollToBottom()
|
||||
if (!isTTSActionRef.current) {
|
||||
scrollToBottom()
|
||||
}
|
||||
}, [messages])
|
||||
|
||||
useEffect(() => {
|
||||
|
|
@ -1497,9 +1580,451 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
return allMessages
|
||||
})
|
||||
}
|
||||
|
||||
setIsLeadSaving(false)
|
||||
}
|
||||
|
||||
const cleanupTTSForMessage = (messageId) => {
|
||||
if (ttsAudio[messageId]) {
|
||||
ttsAudio[messageId].pause()
|
||||
ttsAudio[messageId].currentTime = 0
|
||||
setTtsAudio((prev) => {
|
||||
const newState = { ...prev }
|
||||
delete newState[messageId]
|
||||
return newState
|
||||
})
|
||||
}
|
||||
|
||||
if (ttsStreamingState.audio) {
|
||||
ttsStreamingState.audio.pause()
|
||||
cleanupTTSStreaming()
|
||||
}
|
||||
|
||||
setIsTTSPlaying((prev) => {
|
||||
const newState = { ...prev }
|
||||
delete newState[messageId]
|
||||
return newState
|
||||
})
|
||||
|
||||
setIsTTSLoading((prev) => {
|
||||
const newState = { ...prev }
|
||||
delete newState[messageId]
|
||||
return newState
|
||||
})
|
||||
}
|
||||
|
||||
const handleTTSStop = async (messageId) => {
|
||||
setTTSAction(true)
|
||||
await ttsApi.abortTTS({ chatflowId: chatflowid, chatId, chatMessageId: messageId })
|
||||
cleanupTTSForMessage(messageId)
|
||||
setIsMessageStopping(false)
|
||||
}
|
||||
|
||||
const stopAllTTS = () => {
|
||||
Object.keys(ttsAudio).forEach((messageId) => {
|
||||
if (ttsAudio[messageId]) {
|
||||
ttsAudio[messageId].pause()
|
||||
ttsAudio[messageId].currentTime = 0
|
||||
}
|
||||
})
|
||||
setTtsAudio({})
|
||||
|
||||
if (ttsStreamingState.abortController) {
|
||||
ttsStreamingState.abortController.abort()
|
||||
}
|
||||
|
||||
if (ttsStreamingState.audio) {
|
||||
ttsStreamingState.audio.pause()
|
||||
cleanupTTSStreaming()
|
||||
}
|
||||
|
||||
setIsTTSPlaying({})
|
||||
setIsTTSLoading({})
|
||||
}
|
||||
|
||||
const handleTTSClick = async (messageId, messageText) => {
|
||||
if (isTTSLoading[messageId]) return
|
||||
|
||||
if (isTTSPlaying[messageId] || ttsAudio[messageId]) {
|
||||
handleTTSStop(messageId)
|
||||
return
|
||||
}
|
||||
|
||||
setTTSAction(true)
|
||||
|
||||
// abort all ongoing streams and clear audio sources
|
||||
await handleTTSAbortAll()
|
||||
stopAllTTS()
|
||||
|
||||
handleTTSStart({ chatMessageId: messageId, format: 'mp3' })
|
||||
try {
|
||||
const abortController = new AbortController()
|
||||
setTtsStreamingState((prev) => ({ ...prev, abortController }))
|
||||
|
||||
const response = await fetch('/api/v1/text-to-speech/generate', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'x-request-from': 'internal'
|
||||
},
|
||||
credentials: 'include',
|
||||
signal: abortController.signal,
|
||||
body: JSON.stringify({
|
||||
chatflowId: chatflowid,
|
||||
chatId: chatId,
|
||||
chatMessageId: messageId,
|
||||
text: messageText
|
||||
})
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`TTS request failed: ${response.status}`)
|
||||
}
|
||||
|
||||
const reader = response.body.getReader()
|
||||
const decoder = new TextDecoder()
|
||||
let buffer = ''
|
||||
|
||||
let done = false
|
||||
while (!done) {
|
||||
if (abortController.signal.aborted) {
|
||||
break
|
||||
}
|
||||
|
||||
const result = await reader.read()
|
||||
done = result.done
|
||||
if (done) {
|
||||
break
|
||||
}
|
||||
const value = result.value
|
||||
const chunk = decoder.decode(value, { stream: true })
|
||||
buffer += chunk
|
||||
|
||||
const lines = buffer.split('\n\n')
|
||||
buffer = lines.pop() || ''
|
||||
|
||||
for (const eventBlock of lines) {
|
||||
if (eventBlock.trim()) {
|
||||
const event = parseSSEEvent(eventBlock)
|
||||
if (event) {
|
||||
switch (event.event) {
|
||||
case 'tts_start':
|
||||
break
|
||||
case 'tts_data':
|
||||
if (!abortController.signal.aborted) {
|
||||
handleTTSDataChunk(event.data.audioChunk)
|
||||
}
|
||||
break
|
||||
case 'tts_end':
|
||||
if (!abortController.signal.aborted) {
|
||||
handleTTSEnd()
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
if (error.name === 'AbortError') {
|
||||
console.error('TTS request was aborted')
|
||||
} else {
|
||||
console.error('Error with TTS:', error)
|
||||
enqueueSnackbar({
|
||||
message: `TTS failed: ${error.message}`,
|
||||
options: { variant: 'error' }
|
||||
})
|
||||
}
|
||||
} finally {
|
||||
setIsTTSLoading((prev) => {
|
||||
const newState = { ...prev }
|
||||
delete newState[messageId]
|
||||
return newState
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
const parseSSEEvent = (eventBlock) => {
|
||||
const lines = eventBlock.split('\n')
|
||||
const event = {}
|
||||
|
||||
for (const line of lines) {
|
||||
if (line.startsWith('event:')) {
|
||||
event.event = line.substring(6).trim()
|
||||
} else if (line.startsWith('data:')) {
|
||||
const dataStr = line.substring(5).trim()
|
||||
try {
|
||||
const parsed = JSON.parse(dataStr)
|
||||
if (parsed.data) {
|
||||
event.data = parsed.data
|
||||
}
|
||||
} catch (e) {
|
||||
console.error('Error parsing SSE data:', e, 'Raw data:', dataStr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return event.event ? event : null
|
||||
}
|
||||
|
||||
const initializeTTSStreaming = (data) => {
|
||||
try {
|
||||
const mediaSource = new MediaSource()
|
||||
const audio = new Audio()
|
||||
audio.src = URL.createObjectURL(mediaSource)
|
||||
|
||||
mediaSource.addEventListener('sourceopen', () => {
|
||||
try {
|
||||
const mimeType = data.format === 'mp3' ? 'audio/mpeg' : 'audio/mpeg'
|
||||
const sourceBuffer = mediaSource.addSourceBuffer(mimeType)
|
||||
|
||||
setTtsStreamingState((prevState) => ({
|
||||
...prevState,
|
||||
mediaSource,
|
||||
sourceBuffer,
|
||||
audio
|
||||
}))
|
||||
|
||||
audio.play().catch((playError) => {
|
||||
console.error('Error starting audio playback:', playError)
|
||||
})
|
||||
} catch (error) {
|
||||
console.error('Error setting up source buffer:', error)
|
||||
console.error('MediaSource readyState:', mediaSource.readyState)
|
||||
console.error('Requested MIME type:', mimeType)
|
||||
}
|
||||
})
|
||||
|
||||
audio.addEventListener('playing', () => {
|
||||
setIsTTSLoading((prevState) => {
|
||||
const newState = { ...prevState }
|
||||
delete newState[data.chatMessageId]
|
||||
return newState
|
||||
})
|
||||
setIsTTSPlaying((prevState) => ({
|
||||
...prevState,
|
||||
[data.chatMessageId]: true
|
||||
}))
|
||||
})
|
||||
|
||||
audio.addEventListener('ended', () => {
|
||||
setIsTTSPlaying((prevState) => {
|
||||
const newState = { ...prevState }
|
||||
delete newState[data.chatMessageId]
|
||||
return newState
|
||||
})
|
||||
cleanupTTSStreaming()
|
||||
})
|
||||
} catch (error) {
|
||||
console.error('Error initializing TTS streaming:', error)
|
||||
}
|
||||
}
|
||||
|
||||
const cleanupTTSStreaming = () => {
|
||||
setTtsStreamingState((prevState) => {
|
||||
if (prevState.abortController) {
|
||||
prevState.abortController.abort()
|
||||
}
|
||||
|
||||
if (prevState.audio) {
|
||||
prevState.audio.pause()
|
||||
prevState.audio.removeAttribute('src')
|
||||
if (prevState.audio.src) {
|
||||
URL.revokeObjectURL(prevState.audio.src)
|
||||
}
|
||||
}
|
||||
|
||||
if (prevState.mediaSource) {
|
||||
if (prevState.mediaSource.readyState === 'open') {
|
||||
try {
|
||||
prevState.mediaSource.endOfStream()
|
||||
} catch (e) {
|
||||
// Ignore errors during cleanup
|
||||
}
|
||||
}
|
||||
prevState.mediaSource.removeEventListener('sourceopen', () => {})
|
||||
}
|
||||
|
||||
return {
|
||||
mediaSource: null,
|
||||
sourceBuffer: null,
|
||||
audio: null,
|
||||
chunkQueue: [],
|
||||
isBuffering: false,
|
||||
audioFormat: null,
|
||||
abortController: null
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
const processChunkQueue = () => {
|
||||
setTtsStreamingState((prevState) => {
|
||||
if (!prevState.sourceBuffer || prevState.sourceBuffer.updating || prevState.chunkQueue.length === 0) {
|
||||
return prevState
|
||||
}
|
||||
|
||||
const chunk = prevState.chunkQueue.shift()
|
||||
|
||||
try {
|
||||
prevState.sourceBuffer.appendBuffer(chunk)
|
||||
return {
|
||||
...prevState,
|
||||
chunkQueue: [...prevState.chunkQueue],
|
||||
isBuffering: true
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error appending chunk to buffer:', error)
|
||||
return prevState
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
const handleTTSStart = (data) => {
|
||||
setTTSAction(true)
|
||||
|
||||
// Stop all existing TTS audio before starting new stream
|
||||
stopAllTTS()
|
||||
|
||||
setIsTTSLoading((prevState) => ({
|
||||
...prevState,
|
||||
[data.chatMessageId]: true
|
||||
}))
|
||||
setMessages((prevMessages) => {
|
||||
const allMessages = [...cloneDeep(prevMessages)]
|
||||
const lastMessage = allMessages[allMessages.length - 1]
|
||||
if (lastMessage.type === 'userMessage') return allMessages
|
||||
if (lastMessage.id) return allMessages
|
||||
allMessages[allMessages.length - 1].id = data.chatMessageId
|
||||
return allMessages
|
||||
})
|
||||
setTtsStreamingState({
|
||||
mediaSource: null,
|
||||
sourceBuffer: null,
|
||||
audio: null,
|
||||
chunkQueue: [],
|
||||
isBuffering: false,
|
||||
audioFormat: data.format,
|
||||
abortController: null
|
||||
})
|
||||
|
||||
setTimeout(() => initializeTTSStreaming(data), 0)
|
||||
}
|
||||
|
||||
const handleTTSDataChunk = (base64Data) => {
|
||||
try {
|
||||
const audioBuffer = Uint8Array.from(atob(base64Data), (c) => c.charCodeAt(0))
|
||||
|
||||
setTtsStreamingState((prevState) => {
|
||||
const newState = {
|
||||
...prevState,
|
||||
chunkQueue: [...prevState.chunkQueue, audioBuffer]
|
||||
}
|
||||
|
||||
if (prevState.sourceBuffer && !prevState.sourceBuffer.updating) {
|
||||
setTimeout(() => processChunkQueue(), 0)
|
||||
}
|
||||
|
||||
return newState
|
||||
})
|
||||
} catch (error) {
|
||||
console.error('Error handling TTS data chunk:', error)
|
||||
}
|
||||
}
|
||||
|
||||
const handleTTSEnd = () => {
|
||||
setTtsStreamingState((prevState) => {
|
||||
if (prevState.mediaSource && prevState.mediaSource.readyState === 'open') {
|
||||
try {
|
||||
if (prevState.sourceBuffer && prevState.chunkQueue.length > 0 && !prevState.sourceBuffer.updating) {
|
||||
const remainingChunks = [...prevState.chunkQueue]
|
||||
remainingChunks.forEach((chunk, index) => {
|
||||
setTimeout(() => {
|
||||
if (prevState.sourceBuffer && !prevState.sourceBuffer.updating) {
|
||||
try {
|
||||
prevState.sourceBuffer.appendBuffer(chunk)
|
||||
if (index === remainingChunks.length - 1) {
|
||||
setTimeout(() => {
|
||||
if (prevState.mediaSource && prevState.mediaSource.readyState === 'open') {
|
||||
prevState.mediaSource.endOfStream()
|
||||
}
|
||||
}, 100)
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error appending remaining chunk:', error)
|
||||
}
|
||||
}
|
||||
}, index * 50)
|
||||
})
|
||||
return {
|
||||
...prevState,
|
||||
chunkQueue: []
|
||||
}
|
||||
}
|
||||
|
||||
if (prevState.sourceBuffer && !prevState.sourceBuffer.updating) {
|
||||
prevState.mediaSource.endOfStream()
|
||||
} else if (prevState.sourceBuffer) {
|
||||
prevState.sourceBuffer.addEventListener(
|
||||
'updateend',
|
||||
() => {
|
||||
if (prevState.mediaSource && prevState.mediaSource.readyState === 'open') {
|
||||
prevState.mediaSource.endOfStream()
|
||||
}
|
||||
},
|
||||
{ once: true }
|
||||
)
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error ending TTS stream:', error)
|
||||
}
|
||||
}
|
||||
return prevState
|
||||
})
|
||||
}
|
||||
|
||||
const handleTTSAbort = (data) => {
|
||||
const messageId = data.chatMessageId
|
||||
cleanupTTSForMessage(messageId)
|
||||
}
|
||||
|
||||
const handleTTSAbortAll = async () => {
|
||||
const activeTTSMessages = Object.keys(isTTSLoading).concat(Object.keys(isTTSPlaying))
|
||||
for (const messageId of activeTTSMessages) {
|
||||
await ttsApi.abortTTS({ chatflowId: chatflowid, chatId, chatMessageId: messageId })
|
||||
}
|
||||
}
|
||||
|
||||
useEffect(() => {
|
||||
if (ttsStreamingState.sourceBuffer) {
|
||||
const sourceBuffer = ttsStreamingState.sourceBuffer
|
||||
|
||||
const handleUpdateEnd = () => {
|
||||
setTtsStreamingState((prevState) => ({
|
||||
...prevState,
|
||||
isBuffering: false
|
||||
}))
|
||||
setTimeout(() => processChunkQueue(), 0)
|
||||
}
|
||||
|
||||
sourceBuffer.addEventListener('updateend', handleUpdateEnd)
|
||||
|
||||
return () => {
|
||||
sourceBuffer.removeEventListener('updateend', handleUpdateEnd)
|
||||
}
|
||||
}
|
||||
}, [ttsStreamingState.sourceBuffer])
|
||||
|
||||
useEffect(() => {
|
||||
return () => {
|
||||
cleanupTTSStreaming()
|
||||
// Cleanup TTS timeout on unmount
|
||||
if (ttsTimeoutRef.current) {
|
||||
clearTimeout(ttsTimeoutRef.current)
|
||||
ttsTimeoutRef.current = null
|
||||
}
|
||||
}
|
||||
}, [])
|
||||
|
||||
const getInputDisabled = () => {
|
||||
return (
|
||||
loading ||
|
||||
|
|
@ -2151,7 +2676,7 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
})}
|
||||
</div>
|
||||
)}
|
||||
{message.type === 'apiMessage' && message.id && chatFeedbackStatus ? (
|
||||
{message.type === 'apiMessage' && message.id ? (
|
||||
<>
|
||||
<Box
|
||||
sx={{
|
||||
|
|
@ -2161,25 +2686,62 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
gap: 1
|
||||
}}
|
||||
>
|
||||
<CopyToClipboardButton onClick={() => copyMessageToClipboard(message.message)} />
|
||||
{!message.feedback ||
|
||||
message.feedback.rating === '' ||
|
||||
message.feedback.rating === 'THUMBS_UP' ? (
|
||||
<ThumbsUpButton
|
||||
isDisabled={message.feedback && message.feedback.rating === 'THUMBS_UP'}
|
||||
rating={message.feedback ? message.feedback.rating : ''}
|
||||
onClick={() => onThumbsUpClick(message.id)}
|
||||
/>
|
||||
) : null}
|
||||
{!message.feedback ||
|
||||
message.feedback.rating === '' ||
|
||||
message.feedback.rating === 'THUMBS_DOWN' ? (
|
||||
<ThumbsDownButton
|
||||
isDisabled={message.feedback && message.feedback.rating === 'THUMBS_DOWN'}
|
||||
rating={message.feedback ? message.feedback.rating : ''}
|
||||
onClick={() => onThumbsDownClick(message.id)}
|
||||
/>
|
||||
) : null}
|
||||
{isTTSEnabled && (
|
||||
<IconButton
|
||||
size='small'
|
||||
onClick={() =>
|
||||
isTTSPlaying[message.id]
|
||||
? handleTTSStop(message.id)
|
||||
: handleTTSClick(message.id, message.message)
|
||||
}
|
||||
disabled={isTTSLoading[message.id]}
|
||||
sx={{
|
||||
backgroundColor: ttsAudio[message.id] ? 'primary.main' : 'transparent',
|
||||
color: ttsAudio[message.id] ? 'white' : 'inherit',
|
||||
'&:hover': {
|
||||
backgroundColor: ttsAudio[message.id] ? 'primary.dark' : 'action.hover'
|
||||
}
|
||||
}}
|
||||
>
|
||||
{isTTSLoading[message.id] ? (
|
||||
<CircularProgress size={16} />
|
||||
) : isTTSPlaying[message.id] ? (
|
||||
<IconCircleDot style={{ width: '20px', height: '20px' }} color={'red'} />
|
||||
) : (
|
||||
<IconVolume
|
||||
style={{ width: '20px', height: '20px' }}
|
||||
color={customization.isDarkMode ? 'white' : '#1e88e5'}
|
||||
/>
|
||||
)}
|
||||
</IconButton>
|
||||
)}
|
||||
{chatFeedbackStatus && (
|
||||
<>
|
||||
<CopyToClipboardButton
|
||||
onClick={() => copyMessageToClipboard(message.message)}
|
||||
/>
|
||||
{!message.feedback ||
|
||||
message.feedback.rating === '' ||
|
||||
message.feedback.rating === 'THUMBS_UP' ? (
|
||||
<ThumbsUpButton
|
||||
isDisabled={message.feedback && message.feedback.rating === 'THUMBS_UP'}
|
||||
rating={message.feedback ? message.feedback.rating : ''}
|
||||
onClick={() => onThumbsUpClick(message.id)}
|
||||
/>
|
||||
) : null}
|
||||
{!message.feedback ||
|
||||
message.feedback.rating === '' ||
|
||||
message.feedback.rating === 'THUMBS_DOWN' ? (
|
||||
<ThumbsDownButton
|
||||
isDisabled={
|
||||
message.feedback && message.feedback.rating === 'THUMBS_DOWN'
|
||||
}
|
||||
rating={message.feedback ? message.feedback.rating : ''}
|
||||
onClick={() => onThumbsDownClick(message.id)}
|
||||
/>
|
||||
) : null}
|
||||
</>
|
||||
)}
|
||||
</Box>
|
||||
</>
|
||||
) : null}
|
||||
|
|
|
|||
77915
pnpm-lock.yaml
77915
pnpm-lock.yaml
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue