Use existing sse streamer to stream tts audio before sse client is removed

2025-08-20 13:12:59 +05:30 · 2025-08-20 13:12:59 +05:30 · 2b5554aafe
parent 2247646182
commit 2b5554aafe
3 changed files with 62 additions and 89 deletions
--- a/packages/server/src/utils/SSEStreamer.ts
+++ b/packages/server/src/utils/SSEStreamer.ts
@ -268,4 +268,30 @@ export class SSEStreamer implements IServerSideEventStreamer {
            client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
        }
    }
    streamTTSDataEvent(chatId: string, audioChunk: string): void {
        const client = this.clients[chatId]
        console.log('clients', this.clients)
        console.log('client', client)
        if (client) {
            const clientResponse = {
                event: 'tts_data',
                data: audioChunk
            }
            client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
        }
    }
    streamTTSEndEvent(chatId: string): void {
        const client = this.clients[chatId]
        console.log('clients', this.clients)
        console.log('client', client)
        if (client) {
            const clientResponse = {
                event: 'tts_end',
                data: {}
            }
            client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
        }
    }
 }
--- a/packages/server/src/utils/buildAgentflow.ts
+++ b/packages/server/src/utils/buildAgentflow.ts
@ -11,8 +11,7 @@ import {
    IMessage,
    IServerSideEventStreamer,
    convertChatHistoryToText,
-    generateFollowUpPrompts,
+    generateFollowUpPrompts
    convertTextToSpeech
 } from 'flowise-components'
 import {
    IncomingAgentflowInput,
@ -58,6 +57,7 @@ import { ChatMessage } from '../database/entities/ChatMessage'
 import { Telemetry } from './telemetry'
 import { getWorkspaceSearchOptions } from '../enterprise/utils/ControllerServiceUtils'
 import { UsageCacheManager } from '../UsageCacheManager'
 import { generateTTSForResponseStream, shouldAutoPlayTTS } from './buildChatflow'
 interface IWaitingNode {
    nodeId: string
@ -136,59 +136,6 @@ interface IExecuteNodeParams {
    subscriptionId: string
 }
 // Helper function to check if auto-play TTS is enabled
 const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boolean => {
    if (!textToSpeechConfig) return false
    try {
        const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
        // Check each provider to see if any has autoPlay enabled and status true
        for (const providerKey in config) {
            const provider = config[providerKey]
            if (provider && provider.status === true && provider.autoPlay === true) {
                return true
            }
        }
        return false
    } catch (error) {
        return false
    }
 }
 // Helper function to generate TTS for response
 const generateTTSForResponse = async (
    responseText: string,
    textToSpeechConfig: string | undefined,
    options: ICommonObject
 ): Promise<Buffer | null> => {
    try {
        if (!textToSpeechConfig) return null
        const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
        // Find the active provider configuration
        let activeProviderConfig = null
        for (const providerKey in config) {
            const provider = config[providerKey]
            if (provider && provider.status === true) {
                activeProviderConfig = {
                    name: providerKey,
                    credentialId: provider.credentialId,
                    voice: provider.voice,
                    model: provider.model
                }
                break
            }
        }
        if (!activeProviderConfig) return null
        const audioBuffer = await convertTextToSpeech(responseText, activeProviderConfig, options)
        return audioBuffer
    } catch (error) {
        logger.error(`[server]: TTS generation failed: ${getErrorMessage(error)}`)
        return null
    }
 }
 interface IExecuteAgentFlowParams extends Omit<IExecuteFlowParams, 'incomingInput'> {
    incomingInput: IncomingAgentflowInput
 }
@ -2092,7 +2039,6 @@ export const executeAgentFlow = async ({
    if (sessionId) result.sessionId = sessionId
    /*** Auto-play TTS Logic ***/
    if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
        const options = {
            orgId,
@ -2102,14 +2048,8 @@ export const executeAgentFlow = async ({
            databaseEntities
        }
        const audioBuffer = await generateTTSForResponse(result.text, chatflow.textToSpeech, options)
        if (audioBuffer) {
            const audioBase64 = audioBuffer.toString('base64')
            // Agent flows are always streamed, so send audio via SSE
        if (sseStreamer) {
-                sseStreamer.streamAudioEvent(chatId, audioBase64)
+            await generateTTSForResponseStream(result.text, chatflow.textToSpeech, options, chatId, sseStreamer)
            }
        }
    }
--- a/packages/server/src/utils/buildChatflow.ts
+++ b/packages/server/src/utils/buildChatflow.ts
@ -7,6 +7,7 @@ import {
    IFileUpload,
    convertSpeechToText,
    convertTextToSpeech,
    convertTextToSpeechStream,
    ICommonObject,
    addSingleFileToStorage,
    generateFollowUpPrompts,
@ -17,7 +18,8 @@ import {
    getFileFromUpload,
    removeSpecificFileFromUpload,
    EvaluationRunner,
-    handleEscapeCharacters
+    handleEscapeCharacters,
    IServerSideEventStreamer
 } from 'flowise-components'
 import { StatusCodes } from 'http-status-codes'
 import {
@ -71,12 +73,10 @@ import { executeAgentFlow } from './buildAgentflow'
 import { Workspace } from '../enterprise/database/entities/workspace.entity'
 import { Organization } from '../enterprise/database/entities/organization.entity'
 // Helper function to check if auto-play TTS is enabled
 const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boolean => {
    if (!textToSpeechConfig) return false
    try {
        const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
        // Check each provider to see if any has autoPlay enabled and status true
        for (const providerKey in config) {
            const provider = config[providerKey]
            if (provider && provider.status === true && provider.autoPlay === true) {
@ -85,21 +85,22 @@ const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boole
        }
        return false
    } catch (error) {
        logger.error(`Error parsing textToSpeechConfig: ${getErrorMessage(error)}`)
        return false
    }
 }
-// Helper function to generate TTS for response
+const generateTTSForResponseStream = async (
 const generateTTSForResponse = async (
    responseText: string,
    textToSpeechConfig: string | undefined,
-    options: ICommonObject
+    options: ICommonObject,
-): Promise<Buffer | null> => {
+    chatId: string,
    sseStreamer: IServerSideEventStreamer
 ): Promise<void> => {
    try {
-        if (!textToSpeechConfig) return null
+        if (!textToSpeechConfig) return
        const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
        // Find the active provider configuration
        let activeProviderConfig = null
        for (const providerKey in config) {
            const provider = config[providerKey]
@ -114,13 +115,24 @@ const generateTTSForResponse = async (
            }
        }
-        if (!activeProviderConfig) return null
+        if (!activeProviderConfig) return
-        const audioBuffer = await convertTextToSpeech(responseText, activeProviderConfig, options)
+        await convertTextToSpeechStream(
-        return audioBuffer
+            responseText,
            activeProviderConfig,
            options,
            (chunk: Buffer) => {
                const audioBase64 = chunk.toString('base64')
                logger.info(`Received TTS chunk: ${audioBase64}`)
                sseStreamer.streamTTSDataEvent(chatId, audioBase64)
            },
            () => {
                sseStreamer.streamTTSEndEvent(chatId)
            }
        )
    } catch (error) {
-        logger.error(`[server]: TTS generation failed: ${getErrorMessage(error)}`)
+        logger.error(`[server]: TTS streaming failed: ${getErrorMessage(error)}`)
-        return null
+        sseStreamer.streamTTSEndEvent(chatId)
    }
 }
@ -880,8 +892,6 @@ export const executeFlow = async ({
        if (Object.keys(setVariableNodesOutput).length) result.flowVariables = setVariableNodesOutput
        if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
            logger.info('[server]: Generating TTS for response')
            logger.info(`[server/executeFlow]: TTS config: ${JSON.stringify(chatflow.textToSpeech)}`)
            const options = {
                orgId,
                chatflowid,
@ -890,15 +900,10 @@ export const executeFlow = async ({
                databaseEntities
            }
            const audioBuffer = await generateTTSForResponse(result.text, chatflow.textToSpeech, options)
            if (audioBuffer) {
                const audioBase64 = audioBuffer.toString('base64')
            if (streaming && sseStreamer) {
-                    sseStreamer.streamAudioEvent(chatId, audioBase64)
+                await generateTTSForResponseStream(result.text, chatflow.textToSpeech, options, chatId, sseStreamer)
-                } else {
+            } else if (sseStreamer) {
-                    result.audioData = audioBase64
+                await generateTTSForResponseStream(result.text, chatflow.textToSpeech, options, chatId, sseStreamer)
                }
            }
        }
@ -1129,3 +1134,5 @@ const incrementFailedMetricCounter = (metricsProvider: IMetricsProvider, isInter
        )
    }
 }
 export { shouldAutoPlayTTS, generateTTSForResponseStream }