Add option to autoplay tts audio after prediction completes

This commit is contained in:
Ilango Rajagopal 2025-08-14 18:06:57 +05:30
parent 1902701e64
commit ef1b0dc856
8 changed files with 239 additions and 4 deletions

View File

@ -441,6 +441,7 @@ export interface IServerSideEventStreamer {
streamAbortEvent(chatId: string): void
streamEndEvent(chatId: string): void
streamUsageMetadataEvent(chatId: string, data: any): void
streamAudioEvent(chatId: string, audioData: string): void
}
export enum FollowUpPromptProvider {

View File

@ -64,6 +64,7 @@ export interface IChatFlow {
apikeyid?: string
analytic?: string
speechToText?: string
textToSpeech?: string
chatbotConfig?: string
followUpPrompts?: string
apiConfig?: string

View File

@ -393,6 +393,21 @@ export class RedisEventPublisher implements IServerSideEventStreamer {
}
}
streamAudioEvent(chatId: string, audioData: string): void {
try {
this.redisPublisher.publish(
chatId,
JSON.stringify({
chatId,
eventType: 'audio',
data: audioData
})
)
} catch (error) {
console.error('Error streaming audio event:', error)
}
}
async disconnect() {
if (this.redisPublisher) {
await this.redisPublisher.quit()

View File

@ -257,4 +257,15 @@ export class SSEStreamer implements IServerSideEventStreamer {
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
}
}
streamAudioEvent(chatId: string, audioData: string): void {
const client = this.clients[chatId]
if (client) {
const clientResponse = {
event: 'audio',
data: audioData
}
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
}
}
}

View File

@ -11,7 +11,8 @@ import {
IMessage,
IServerSideEventStreamer,
convertChatHistoryToText,
generateFollowUpPrompts
generateFollowUpPrompts,
convertTextToSpeech
} from 'flowise-components'
import {
IncomingAgentflowInput,
@ -135,6 +136,59 @@ interface IExecuteNodeParams {
subscriptionId: string
}
// Helper function to check if auto-play TTS is enabled
const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boolean => {
if (!textToSpeechConfig) return false
try {
const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
// Check each provider to see if any has autoPlay enabled and status true
for (const providerKey in config) {
const provider = config[providerKey]
if (provider && provider.status === true && provider.autoPlay === true) {
return true
}
}
return false
} catch (error) {
return false
}
}
// Helper function to generate TTS for response
const generateTTSForResponse = async (
responseText: string,
textToSpeechConfig: string | undefined,
options: ICommonObject
): Promise<Buffer | null> => {
try {
if (!textToSpeechConfig) return null
const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
// Find the active provider configuration
let activeProviderConfig = null
for (const providerKey in config) {
const provider = config[providerKey]
if (provider && provider.status === true) {
activeProviderConfig = {
name: providerKey,
credentialId: provider.credentialId,
voice: provider.voice,
model: provider.model
}
break
}
}
if (!activeProviderConfig) return null
const audioBuffer = await convertTextToSpeech(responseText, activeProviderConfig, options)
return audioBuffer
} catch (error) {
logger.error(`[server]: TTS generation failed: ${getErrorMessage(error)}`)
return null
}
}
interface IExecuteAgentFlowParams extends Omit<IExecuteFlowParams, 'incomingInput'> {
incomingInput: IncomingAgentflowInput
}
@ -2038,5 +2092,26 @@ export const executeAgentFlow = async ({
if (sessionId) result.sessionId = sessionId
/*** Auto-play TTS Logic ***/
if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
const options = {
orgId,
chatflowid,
chatId,
appDataSource,
databaseEntities
}
const audioBuffer = await generateTTSForResponse(result.text, chatflow.textToSpeech, options)
if (audioBuffer) {
const audioBase64 = audioBuffer.toString('base64')
// Agent flows are always streamed, so send audio via SSE
if (sseStreamer) {
sseStreamer.streamAudioEvent(chatId, audioBase64)
}
}
}
return result
}

View File

@ -6,6 +6,7 @@ import { omit } from 'lodash'
import {
IFileUpload,
convertSpeechToText,
convertTextToSpeech,
ICommonObject,
addSingleFileToStorage,
generateFollowUpPrompts,
@ -70,9 +71,59 @@ import { executeAgentFlow } from './buildAgentflow'
import { Workspace } from '../enterprise/database/entities/workspace.entity'
import { Organization } from '../enterprise/database/entities/organization.entity'
/*
* Initialize the ending node to be executed
*/
// Helper function to check if auto-play TTS is enabled
const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boolean => {
if (!textToSpeechConfig) return false
try {
const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
// Check each provider to see if any has autoPlay enabled and status true
for (const providerKey in config) {
const provider = config[providerKey]
if (provider && provider.status === true && provider.autoPlay === true) {
return true
}
}
return false
} catch (error) {
return false
}
}
// Helper function to generate TTS for response
const generateTTSForResponse = async (
responseText: string,
textToSpeechConfig: string | undefined,
options: ICommonObject
): Promise<Buffer | null> => {
try {
if (!textToSpeechConfig) return null
const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
// Find the active provider configuration
let activeProviderConfig = null
for (const providerKey in config) {
const provider = config[providerKey]
if (provider && provider.status === true) {
activeProviderConfig = {
name: providerKey,
credentialId: provider.credentialId,
voice: provider.voice,
model: provider.model
}
break
}
}
if (!activeProviderConfig) return null
const audioBuffer = await convertTextToSpeech(responseText, activeProviderConfig, options)
return audioBuffer
} catch (error) {
logger.error(`[server]: TTS generation failed: ${getErrorMessage(error)}`)
return null
}
}
const initEndingNode = async ({
endingNodeIds,
componentNodes,
@ -828,6 +879,29 @@ export const executeFlow = async ({
if (memoryType) result.memoryType = memoryType
if (Object.keys(setVariableNodesOutput).length) result.flowVariables = setVariableNodesOutput
if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
logger.info('[server]: Generating TTS for response')
logger.info(`[server/executeFlow]: TTS config: ${JSON.stringify(chatflow.textToSpeech)}`)
const options = {
orgId,
chatflowid,
chatId,
appDataSource,
databaseEntities
}
const audioBuffer = await generateTTSForResponse(result.text, chatflow.textToSpeech, options)
if (audioBuffer) {
const audioBase64 = audioBuffer.toString('base64')
if (streaming && sseStreamer) {
sseStreamer.streamAudioEvent(chatId, audioBase64)
} else {
result.audioData = audioBase64
}
}
}
return result
}
}

View File

@ -455,6 +455,24 @@ const TextToSpeech = ({ dialogProps }) => {
)}
</Box>
))}
{/* Auto-play Toggle */}
<Box sx={{ p: 2 }}>
<div style={{ display: 'flex', flexDirection: 'row', alignItems: 'center' }}>
<Typography>
Automatically play audio
<TooltipWithParser
style={{ marginLeft: 10 }}
title='When enabled, bot responses will be automatically converted to speech and played'
/>
</Typography>
</div>
<SwitchInput
onChange={(newValue) => setValue(newValue, selectedProvider, 'autoPlay')}
value={textToSpeech[selectedProvider] ? textToSpeech[selectedProvider].autoPlay ?? false : false}
/>
</Box>
{/* Test TTS Button */}
<Box sx={{ p: 2 }}>
<StyledButton

View File

@ -955,6 +955,12 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
setLoading(false)
setUserInput('')
setUploadedFiles([])
// Handle auto-play audio for non-streaming responses
if (data.audioData) {
handleAutoPlayAudio(data.audioData)
}
setTimeout(() => {
inputRef.current?.focus()
scrollToBottom()
@ -1033,6 +1039,9 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
abortMessage(payload.data)
closeResponse()
break
case 'audio':
handleAutoPlayAudio(payload.data)
break
case 'end':
setLocalStorageChatflow(chatflowid, chatId)
closeResponse()
@ -1631,6 +1640,37 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
}
}
const handleAutoPlayAudio = async (audioData) => {
try {
// Convert base64 audio data to blob and play
const audioBuffer = Uint8Array.from(atob(audioData), (c) => c.charCodeAt(0))
const audioBlob = new Blob([audioBuffer], { type: 'audio/mpeg' })
const audioUrl = URL.createObjectURL(audioBlob)
const audio = new Audio(audioUrl)
audio.addEventListener('ended', () => {
URL.revokeObjectURL(audioUrl)
})
await audio.play()
} catch (error) {
console.error('Error playing auto TTS audio:', error)
// Fallback: Use manual TTS API call
const lastMessage = messages[messages.length - 1]
if (lastMessage && lastMessage.type === 'apiMessage' && lastMessage.message) {
try {
await handleTTSClick(lastMessage.id, lastMessage.message)
} catch (fallbackError) {
console.error('TTS fallback also failed:', fallbackError)
enqueueSnackbar({
message: 'Auto-play audio failed',
options: { variant: 'error' }
})
}
}
}
}
const getInputDisabled = () => {
return (
loading ||