diff --git a/packages/components/src/Interface.ts b/packages/components/src/Interface.ts index 5e2ee383c..3e67fb241 100644 --- a/packages/components/src/Interface.ts +++ b/packages/components/src/Interface.ts @@ -441,6 +441,7 @@ export interface IServerSideEventStreamer { streamAbortEvent(chatId: string): void streamEndEvent(chatId: string): void streamUsageMetadataEvent(chatId: string, data: any): void + streamAudioEvent(chatId: string, audioData: string): void } export enum FollowUpPromptProvider { diff --git a/packages/server/src/Interface.ts b/packages/server/src/Interface.ts index 97f66d109..1db4dacec 100644 --- a/packages/server/src/Interface.ts +++ b/packages/server/src/Interface.ts @@ -64,6 +64,7 @@ export interface IChatFlow { apikeyid?: string analytic?: string speechToText?: string + textToSpeech?: string chatbotConfig?: string followUpPrompts?: string apiConfig?: string diff --git a/packages/server/src/queue/RedisEventPublisher.ts b/packages/server/src/queue/RedisEventPublisher.ts index f2b601508..200c7e9e7 100644 --- a/packages/server/src/queue/RedisEventPublisher.ts +++ b/packages/server/src/queue/RedisEventPublisher.ts @@ -393,6 +393,21 @@ export class RedisEventPublisher implements IServerSideEventStreamer { } } + streamAudioEvent(chatId: string, audioData: string): void { + try { + this.redisPublisher.publish( + chatId, + JSON.stringify({ + chatId, + eventType: 'audio', + data: audioData + }) + ) + } catch (error) { + console.error('Error streaming audio event:', error) + } + } + async disconnect() { if (this.redisPublisher) { await this.redisPublisher.quit() diff --git a/packages/server/src/utils/SSEStreamer.ts b/packages/server/src/utils/SSEStreamer.ts index 2b950579c..e762fc417 100644 --- a/packages/server/src/utils/SSEStreamer.ts +++ b/packages/server/src/utils/SSEStreamer.ts @@ -257,4 +257,15 @@ export class SSEStreamer implements IServerSideEventStreamer { client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n') } } + + streamAudioEvent(chatId: string, audioData: string): void { + const client = this.clients[chatId] + if (client) { + const clientResponse = { + event: 'audio', + data: audioData + } + client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n') + } + } } diff --git a/packages/server/src/utils/buildAgentflow.ts b/packages/server/src/utils/buildAgentflow.ts index b0d2eed64..f432691d4 100644 --- a/packages/server/src/utils/buildAgentflow.ts +++ b/packages/server/src/utils/buildAgentflow.ts @@ -11,7 +11,8 @@ import { IMessage, IServerSideEventStreamer, convertChatHistoryToText, - generateFollowUpPrompts + generateFollowUpPrompts, + convertTextToSpeech } from 'flowise-components' import { IncomingAgentflowInput, @@ -135,6 +136,59 @@ interface IExecuteNodeParams { subscriptionId: string } +// Helper function to check if auto-play TTS is enabled +const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boolean => { + if (!textToSpeechConfig) return false + try { + const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig + // Check each provider to see if any has autoPlay enabled and status true + for (const providerKey in config) { + const provider = config[providerKey] + if (provider && provider.status === true && provider.autoPlay === true) { + return true + } + } + return false + } catch (error) { + return false + } +} + +// Helper function to generate TTS for response +const generateTTSForResponse = async ( + responseText: string, + textToSpeechConfig: string | undefined, + options: ICommonObject +): Promise => { + try { + if (!textToSpeechConfig) return null + const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig + + // Find the active provider configuration + let activeProviderConfig = null + for (const providerKey in config) { + const provider = config[providerKey] + if (provider && provider.status === true) { + activeProviderConfig = { + name: providerKey, + credentialId: provider.credentialId, + voice: provider.voice, + model: provider.model + } + break + } + } + + if (!activeProviderConfig) return null + + const audioBuffer = await convertTextToSpeech(responseText, activeProviderConfig, options) + return audioBuffer + } catch (error) { + logger.error(`[server]: TTS generation failed: ${getErrorMessage(error)}`) + return null + } +} + interface IExecuteAgentFlowParams extends Omit { incomingInput: IncomingAgentflowInput } @@ -2038,5 +2092,26 @@ export const executeAgentFlow = async ({ if (sessionId) result.sessionId = sessionId + /*** Auto-play TTS Logic ***/ + if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) { + const options = { + orgId, + chatflowid, + chatId, + appDataSource, + databaseEntities + } + + const audioBuffer = await generateTTSForResponse(result.text, chatflow.textToSpeech, options) + if (audioBuffer) { + const audioBase64 = audioBuffer.toString('base64') + + // Agent flows are always streamed, so send audio via SSE + if (sseStreamer) { + sseStreamer.streamAudioEvent(chatId, audioBase64) + } + } + } + return result } diff --git a/packages/server/src/utils/buildChatflow.ts b/packages/server/src/utils/buildChatflow.ts index be0bb6a10..e705b3e98 100644 --- a/packages/server/src/utils/buildChatflow.ts +++ b/packages/server/src/utils/buildChatflow.ts @@ -6,6 +6,7 @@ import { omit } from 'lodash' import { IFileUpload, convertSpeechToText, + convertTextToSpeech, ICommonObject, addSingleFileToStorage, generateFollowUpPrompts, @@ -70,9 +71,59 @@ import { executeAgentFlow } from './buildAgentflow' import { Workspace } from '../enterprise/database/entities/workspace.entity' import { Organization } from '../enterprise/database/entities/organization.entity' -/* - * Initialize the ending node to be executed - */ +// Helper function to check if auto-play TTS is enabled +const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boolean => { + if (!textToSpeechConfig) return false + try { + const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig + // Check each provider to see if any has autoPlay enabled and status true + for (const providerKey in config) { + const provider = config[providerKey] + if (provider && provider.status === true && provider.autoPlay === true) { + return true + } + } + return false + } catch (error) { + return false + } +} + +// Helper function to generate TTS for response +const generateTTSForResponse = async ( + responseText: string, + textToSpeechConfig: string | undefined, + options: ICommonObject +): Promise => { + try { + if (!textToSpeechConfig) return null + const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig + + // Find the active provider configuration + let activeProviderConfig = null + for (const providerKey in config) { + const provider = config[providerKey] + if (provider && provider.status === true) { + activeProviderConfig = { + name: providerKey, + credentialId: provider.credentialId, + voice: provider.voice, + model: provider.model + } + break + } + } + + if (!activeProviderConfig) return null + + const audioBuffer = await convertTextToSpeech(responseText, activeProviderConfig, options) + return audioBuffer + } catch (error) { + logger.error(`[server]: TTS generation failed: ${getErrorMessage(error)}`) + return null + } +} + const initEndingNode = async ({ endingNodeIds, componentNodes, @@ -828,6 +879,29 @@ export const executeFlow = async ({ if (memoryType) result.memoryType = memoryType if (Object.keys(setVariableNodesOutput).length) result.flowVariables = setVariableNodesOutput + if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) { + logger.info('[server]: Generating TTS for response') + logger.info(`[server/executeFlow]: TTS config: ${JSON.stringify(chatflow.textToSpeech)}`) + const options = { + orgId, + chatflowid, + chatId, + appDataSource, + databaseEntities + } + + const audioBuffer = await generateTTSForResponse(result.text, chatflow.textToSpeech, options) + if (audioBuffer) { + const audioBase64 = audioBuffer.toString('base64') + + if (streaming && sseStreamer) { + sseStreamer.streamAudioEvent(chatId, audioBase64) + } else { + result.audioData = audioBase64 + } + } + } + return result } } diff --git a/packages/ui/src/ui-component/extended/TextToSpeech.jsx b/packages/ui/src/ui-component/extended/TextToSpeech.jsx index d52b56a49..1e90e42b5 100644 --- a/packages/ui/src/ui-component/extended/TextToSpeech.jsx +++ b/packages/ui/src/ui-component/extended/TextToSpeech.jsx @@ -455,6 +455,24 @@ const TextToSpeech = ({ dialogProps }) => { )} ))} + + {/* Auto-play Toggle */} + +
+ + Automatically play audio + + +
+ setValue(newValue, selectedProvider, 'autoPlay')} + value={textToSpeech[selectedProvider] ? textToSpeech[selectedProvider].autoPlay ?? false : false} + /> +
+ {/* Test TTS Button */} { inputRef.current?.focus() scrollToBottom() @@ -1033,6 +1039,9 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP abortMessage(payload.data) closeResponse() break + case 'audio': + handleAutoPlayAudio(payload.data) + break case 'end': setLocalStorageChatflow(chatflowid, chatId) closeResponse() @@ -1631,6 +1640,37 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP } } + const handleAutoPlayAudio = async (audioData) => { + try { + // Convert base64 audio data to blob and play + const audioBuffer = Uint8Array.from(atob(audioData), (c) => c.charCodeAt(0)) + const audioBlob = new Blob([audioBuffer], { type: 'audio/mpeg' }) + const audioUrl = URL.createObjectURL(audioBlob) + const audio = new Audio(audioUrl) + + audio.addEventListener('ended', () => { + URL.revokeObjectURL(audioUrl) + }) + + await audio.play() + } catch (error) { + console.error('Error playing auto TTS audio:', error) + // Fallback: Use manual TTS API call + const lastMessage = messages[messages.length - 1] + if (lastMessage && lastMessage.type === 'apiMessage' && lastMessage.message) { + try { + await handleTTSClick(lastMessage.id, lastMessage.message) + } catch (fallbackError) { + console.error('TTS fallback also failed:', fallbackError) + enqueueSnackbar({ + message: 'Auto-play audio failed', + options: { variant: 'error' } + }) + } + } + } + } + const getInputDisabled = () => { return ( loading ||