Add option to autoplay tts audio after prediction completes

2025-08-14 18:06:57 +05:30 · 2025-08-14 18:06:57 +05:30 · ef1b0dc856
parent 1902701e64
commit ef1b0dc856
8 changed files with 239 additions and 4 deletions
--- a/packages/components/src/Interface.ts
+++ b/packages/components/src/Interface.ts
@ -441,6 +441,7 @@ export interface IServerSideEventStreamer {
    streamAbortEvent(chatId: string): void
    streamEndEvent(chatId: string): void
    streamUsageMetadataEvent(chatId: string, data: any): void
    streamAudioEvent(chatId: string, audioData: string): void
 }
 export enum FollowUpPromptProvider {
--- a/packages/server/src/Interface.ts
+++ b/packages/server/src/Interface.ts
@ -64,6 +64,7 @@ export interface IChatFlow {
    apikeyid?: string
    analytic?: string
    speechToText?: string
    textToSpeech?: string
    chatbotConfig?: string
    followUpPrompts?: string
    apiConfig?: string
--- a/packages/server/src/queue/RedisEventPublisher.ts
+++ b/packages/server/src/queue/RedisEventPublisher.ts
@ -393,6 +393,21 @@ export class RedisEventPublisher implements IServerSideEventStreamer {
        }
    }
    streamAudioEvent(chatId: string, audioData: string): void {
        try {
            this.redisPublisher.publish(
                chatId,
                JSON.stringify({
                    chatId,
                    eventType: 'audio',
                    data: audioData
                })
            )
        } catch (error) {
            console.error('Error streaming audio event:', error)
        }
    }
    async disconnect() {
        if (this.redisPublisher) {
            await this.redisPublisher.quit()
--- a/packages/server/src/utils/SSEStreamer.ts
+++ b/packages/server/src/utils/SSEStreamer.ts
@ -257,4 +257,15 @@ export class SSEStreamer implements IServerSideEventStreamer {
            client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
        }
    }
    streamAudioEvent(chatId: string, audioData: string): void {
        const client = this.clients[chatId]
        if (client) {
            const clientResponse = {
                event: 'audio',
                data: audioData
            }
            client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
        }
    }
 }
--- a/packages/server/src/utils/buildAgentflow.ts
+++ b/packages/server/src/utils/buildAgentflow.ts
@ -11,7 +11,8 @@ import {
    IMessage,
    IServerSideEventStreamer,
    convertChatHistoryToText,
-    generateFollowUpPrompts
+    generateFollowUpPrompts,
    convertTextToSpeech
 } from 'flowise-components'
 import {
    IncomingAgentflowInput,
@ -135,6 +136,59 @@ interface IExecuteNodeParams {
    subscriptionId: string
 }
 // Helper function to check if auto-play TTS is enabled
 const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boolean => {
    if (!textToSpeechConfig) return false
    try {
        const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
        // Check each provider to see if any has autoPlay enabled and status true
        for (const providerKey in config) {
            const provider = config[providerKey]
            if (provider && provider.status === true && provider.autoPlay === true) {
                return true
            }
        }
        return false
    } catch (error) {
        return false
    }
 }
 // Helper function to generate TTS for response
 const generateTTSForResponse = async (
    responseText: string,
    textToSpeechConfig: string | undefined,
    options: ICommonObject
 ): Promise<Buffer | null> => {
    try {
        if (!textToSpeechConfig) return null
        const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
        // Find the active provider configuration
        let activeProviderConfig = null
        for (const providerKey in config) {
            const provider = config[providerKey]
            if (provider && provider.status === true) {
                activeProviderConfig = {
                    name: providerKey,
                    credentialId: provider.credentialId,
                    voice: provider.voice,
                    model: provider.model
                }
                break
            }
        }
        if (!activeProviderConfig) return null
        const audioBuffer = await convertTextToSpeech(responseText, activeProviderConfig, options)
        return audioBuffer
    } catch (error) {
        logger.error(`[server]: TTS generation failed: ${getErrorMessage(error)}`)
        return null
    }
 }
 interface IExecuteAgentFlowParams extends Omit<IExecuteFlowParams, 'incomingInput'> {
    incomingInput: IncomingAgentflowInput
 }
@ -2038,5 +2092,26 @@ export const executeAgentFlow = async ({
    if (sessionId) result.sessionId = sessionId
    /*** Auto-play TTS Logic ***/
    if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
        const options = {
            orgId,
            chatflowid,
            chatId,
            appDataSource,
            databaseEntities
        }
        const audioBuffer = await generateTTSForResponse(result.text, chatflow.textToSpeech, options)
        if (audioBuffer) {
            const audioBase64 = audioBuffer.toString('base64')
            // Agent flows are always streamed, so send audio via SSE
            if (sseStreamer) {
                sseStreamer.streamAudioEvent(chatId, audioBase64)
            }
        }
    }
    return result
 }
--- a/packages/server/src/utils/buildChatflow.ts
+++ b/packages/server/src/utils/buildChatflow.ts
@ -6,6 +6,7 @@ import { omit } from 'lodash'
 import {
    IFileUpload,
    convertSpeechToText,
    convertTextToSpeech,
    ICommonObject,
    addSingleFileToStorage,
    generateFollowUpPrompts,
@ -70,9 +71,59 @@ import { executeAgentFlow } from './buildAgentflow'
 import { Workspace } from '../enterprise/database/entities/workspace.entity'
 import { Organization } from '../enterprise/database/entities/organization.entity'
-/*
+// Helper function to check if auto-play TTS is enabled
- * Initialize the ending node to be executed
+const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boolean => {
- */
+    if (!textToSpeechConfig) return false
    try {
        const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
        // Check each provider to see if any has autoPlay enabled and status true
        for (const providerKey in config) {
            const provider = config[providerKey]
            if (provider && provider.status === true && provider.autoPlay === true) {
                return true
            }
        }
        return false
    } catch (error) {
        return false
    }
 }
 // Helper function to generate TTS for response
 const generateTTSForResponse = async (
    responseText: string,
    textToSpeechConfig: string | undefined,
    options: ICommonObject
 ): Promise<Buffer | null> => {
    try {
        if (!textToSpeechConfig) return null
        const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
        // Find the active provider configuration
        let activeProviderConfig = null
        for (const providerKey in config) {
            const provider = config[providerKey]
            if (provider && provider.status === true) {
                activeProviderConfig = {
                    name: providerKey,
                    credentialId: provider.credentialId,
                    voice: provider.voice,
                    model: provider.model
                }
                break
            }
        }
        if (!activeProviderConfig) return null
        const audioBuffer = await convertTextToSpeech(responseText, activeProviderConfig, options)
        return audioBuffer
    } catch (error) {
        logger.error(`[server]: TTS generation failed: ${getErrorMessage(error)}`)
        return null
    }
 }
 const initEndingNode = async ({
    endingNodeIds,
    componentNodes,
@ -828,6 +879,29 @@ export const executeFlow = async ({
        if (memoryType) result.memoryType = memoryType
        if (Object.keys(setVariableNodesOutput).length) result.flowVariables = setVariableNodesOutput
        if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
            logger.info('[server]: Generating TTS for response')
            logger.info(`[server/executeFlow]: TTS config: ${JSON.stringify(chatflow.textToSpeech)}`)
            const options = {
                orgId,
                chatflowid,
                chatId,
                appDataSource,
                databaseEntities
            }
            const audioBuffer = await generateTTSForResponse(result.text, chatflow.textToSpeech, options)
            if (audioBuffer) {
                const audioBase64 = audioBuffer.toString('base64')
                if (streaming && sseStreamer) {
                    sseStreamer.streamAudioEvent(chatId, audioBase64)
                } else {
                    result.audioData = audioBase64
                }
            }
        }
        return result
    }
 }
--- a/packages/ui/src/ui-component/extended/TextToSpeech.jsx
+++ b/packages/ui/src/ui-component/extended/TextToSpeech.jsx
@ -455,6 +455,24 @@ const TextToSpeech = ({ dialogProps }) => {
                            )}
                        </Box>
                    ))}
                    {/* Auto-play Toggle */}
                    <Box sx={{ p: 2 }}>
                        <div style={{ display: 'flex', flexDirection: 'row', alignItems: 'center' }}>
                            <Typography>
                                Automatically play audio
                                <TooltipWithParser
                                    style={{ marginLeft: 10 }}
                                    title='When enabled, bot responses will be automatically converted to speech and played'
                                />
                            </Typography>
                        </div>
                        <SwitchInput
                            onChange={(newValue) => setValue(newValue, selectedProvider, 'autoPlay')}
                            value={textToSpeech[selectedProvider] ? textToSpeech[selectedProvider].autoPlay ?? false : false}
                        />
                    </Box>
                    {/* Test TTS Button */}
                    <Box sx={{ p: 2 }}>
                        <StyledButton
--- a/packages/ui/src/views/chatmessage/ChatMessage.jsx
+++ b/packages/ui/src/views/chatmessage/ChatMessage.jsx
@ -955,6 +955,12 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
                    setLoading(false)
                    setUserInput('')
                    setUploadedFiles([])
                    // Handle auto-play audio for non-streaming responses
                    if (data.audioData) {
                        handleAutoPlayAudio(data.audioData)
                    }
                    setTimeout(() => {
                        inputRef.current?.focus()
                        scrollToBottom()
@ -1033,6 +1039,9 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
                        abortMessage(payload.data)
                        closeResponse()
                        break
                    case 'audio':
                        handleAutoPlayAudio(payload.data)
                        break
                    case 'end':
                        setLocalStorageChatflow(chatflowid, chatId)
                        closeResponse()
@ -1631,6 +1640,37 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
        }
    }
    const handleAutoPlayAudio = async (audioData) => {
        try {
            // Convert base64 audio data to blob and play
            const audioBuffer = Uint8Array.from(atob(audioData), (c) => c.charCodeAt(0))
            const audioBlob = new Blob([audioBuffer], { type: 'audio/mpeg' })
            const audioUrl = URL.createObjectURL(audioBlob)
            const audio = new Audio(audioUrl)
            audio.addEventListener('ended', () => {
                URL.revokeObjectURL(audioUrl)
            })
            await audio.play()
        } catch (error) {
            console.error('Error playing auto TTS audio:', error)
            // Fallback: Use manual TTS API call
            const lastMessage = messages[messages.length - 1]
            if (lastMessage && lastMessage.type === 'apiMessage' && lastMessage.message) {
                try {
                    await handleTTSClick(lastMessage.id, lastMessage.message)
                } catch (fallbackError) {
                    console.error('TTS fallback also failed:', fallbackError)
                    enqueueSnackbar({
                        message: 'Auto-play audio failed',
                        options: { variant: 'error' }
                    })
                }
            }
        }
    }
    const getInputDisabled = () => {
        return (
            loading ||