Add option to autoplay tts audio after prediction completes
This commit is contained in:
parent
1902701e64
commit
ef1b0dc856
|
|
@ -441,6 +441,7 @@ export interface IServerSideEventStreamer {
|
||||||
streamAbortEvent(chatId: string): void
|
streamAbortEvent(chatId: string): void
|
||||||
streamEndEvent(chatId: string): void
|
streamEndEvent(chatId: string): void
|
||||||
streamUsageMetadataEvent(chatId: string, data: any): void
|
streamUsageMetadataEvent(chatId: string, data: any): void
|
||||||
|
streamAudioEvent(chatId: string, audioData: string): void
|
||||||
}
|
}
|
||||||
|
|
||||||
export enum FollowUpPromptProvider {
|
export enum FollowUpPromptProvider {
|
||||||
|
|
|
||||||
|
|
@ -64,6 +64,7 @@ export interface IChatFlow {
|
||||||
apikeyid?: string
|
apikeyid?: string
|
||||||
analytic?: string
|
analytic?: string
|
||||||
speechToText?: string
|
speechToText?: string
|
||||||
|
textToSpeech?: string
|
||||||
chatbotConfig?: string
|
chatbotConfig?: string
|
||||||
followUpPrompts?: string
|
followUpPrompts?: string
|
||||||
apiConfig?: string
|
apiConfig?: string
|
||||||
|
|
|
||||||
|
|
@ -393,6 +393,21 @@ export class RedisEventPublisher implements IServerSideEventStreamer {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
streamAudioEvent(chatId: string, audioData: string): void {
|
||||||
|
try {
|
||||||
|
this.redisPublisher.publish(
|
||||||
|
chatId,
|
||||||
|
JSON.stringify({
|
||||||
|
chatId,
|
||||||
|
eventType: 'audio',
|
||||||
|
data: audioData
|
||||||
|
})
|
||||||
|
)
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error streaming audio event:', error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async disconnect() {
|
async disconnect() {
|
||||||
if (this.redisPublisher) {
|
if (this.redisPublisher) {
|
||||||
await this.redisPublisher.quit()
|
await this.redisPublisher.quit()
|
||||||
|
|
|
||||||
|
|
@ -257,4 +257,15 @@ export class SSEStreamer implements IServerSideEventStreamer {
|
||||||
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
streamAudioEvent(chatId: string, audioData: string): void {
|
||||||
|
const client = this.clients[chatId]
|
||||||
|
if (client) {
|
||||||
|
const clientResponse = {
|
||||||
|
event: 'audio',
|
||||||
|
data: audioData
|
||||||
|
}
|
||||||
|
client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,8 @@ import {
|
||||||
IMessage,
|
IMessage,
|
||||||
IServerSideEventStreamer,
|
IServerSideEventStreamer,
|
||||||
convertChatHistoryToText,
|
convertChatHistoryToText,
|
||||||
generateFollowUpPrompts
|
generateFollowUpPrompts,
|
||||||
|
convertTextToSpeech
|
||||||
} from 'flowise-components'
|
} from 'flowise-components'
|
||||||
import {
|
import {
|
||||||
IncomingAgentflowInput,
|
IncomingAgentflowInput,
|
||||||
|
|
@ -135,6 +136,59 @@ interface IExecuteNodeParams {
|
||||||
subscriptionId: string
|
subscriptionId: string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Helper function to check if auto-play TTS is enabled
|
||||||
|
const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boolean => {
|
||||||
|
if (!textToSpeechConfig) return false
|
||||||
|
try {
|
||||||
|
const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
|
||||||
|
// Check each provider to see if any has autoPlay enabled and status true
|
||||||
|
for (const providerKey in config) {
|
||||||
|
const provider = config[providerKey]
|
||||||
|
if (provider && provider.status === true && provider.autoPlay === true) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
} catch (error) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function to generate TTS for response
|
||||||
|
const generateTTSForResponse = async (
|
||||||
|
responseText: string,
|
||||||
|
textToSpeechConfig: string | undefined,
|
||||||
|
options: ICommonObject
|
||||||
|
): Promise<Buffer | null> => {
|
||||||
|
try {
|
||||||
|
if (!textToSpeechConfig) return null
|
||||||
|
const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
|
||||||
|
|
||||||
|
// Find the active provider configuration
|
||||||
|
let activeProviderConfig = null
|
||||||
|
for (const providerKey in config) {
|
||||||
|
const provider = config[providerKey]
|
||||||
|
if (provider && provider.status === true) {
|
||||||
|
activeProviderConfig = {
|
||||||
|
name: providerKey,
|
||||||
|
credentialId: provider.credentialId,
|
||||||
|
voice: provider.voice,
|
||||||
|
model: provider.model
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!activeProviderConfig) return null
|
||||||
|
|
||||||
|
const audioBuffer = await convertTextToSpeech(responseText, activeProviderConfig, options)
|
||||||
|
return audioBuffer
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`[server]: TTS generation failed: ${getErrorMessage(error)}`)
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
interface IExecuteAgentFlowParams extends Omit<IExecuteFlowParams, 'incomingInput'> {
|
interface IExecuteAgentFlowParams extends Omit<IExecuteFlowParams, 'incomingInput'> {
|
||||||
incomingInput: IncomingAgentflowInput
|
incomingInput: IncomingAgentflowInput
|
||||||
}
|
}
|
||||||
|
|
@ -2038,5 +2092,26 @@ export const executeAgentFlow = async ({
|
||||||
|
|
||||||
if (sessionId) result.sessionId = sessionId
|
if (sessionId) result.sessionId = sessionId
|
||||||
|
|
||||||
|
/*** Auto-play TTS Logic ***/
|
||||||
|
if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
|
||||||
|
const options = {
|
||||||
|
orgId,
|
||||||
|
chatflowid,
|
||||||
|
chatId,
|
||||||
|
appDataSource,
|
||||||
|
databaseEntities
|
||||||
|
}
|
||||||
|
|
||||||
|
const audioBuffer = await generateTTSForResponse(result.text, chatflow.textToSpeech, options)
|
||||||
|
if (audioBuffer) {
|
||||||
|
const audioBase64 = audioBuffer.toString('base64')
|
||||||
|
|
||||||
|
// Agent flows are always streamed, so send audio via SSE
|
||||||
|
if (sseStreamer) {
|
||||||
|
sseStreamer.streamAudioEvent(chatId, audioBase64)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ import { omit } from 'lodash'
|
||||||
import {
|
import {
|
||||||
IFileUpload,
|
IFileUpload,
|
||||||
convertSpeechToText,
|
convertSpeechToText,
|
||||||
|
convertTextToSpeech,
|
||||||
ICommonObject,
|
ICommonObject,
|
||||||
addSingleFileToStorage,
|
addSingleFileToStorage,
|
||||||
generateFollowUpPrompts,
|
generateFollowUpPrompts,
|
||||||
|
|
@ -70,9 +71,59 @@ import { executeAgentFlow } from './buildAgentflow'
|
||||||
import { Workspace } from '../enterprise/database/entities/workspace.entity'
|
import { Workspace } from '../enterprise/database/entities/workspace.entity'
|
||||||
import { Organization } from '../enterprise/database/entities/organization.entity'
|
import { Organization } from '../enterprise/database/entities/organization.entity'
|
||||||
|
|
||||||
/*
|
// Helper function to check if auto-play TTS is enabled
|
||||||
* Initialize the ending node to be executed
|
const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boolean => {
|
||||||
*/
|
if (!textToSpeechConfig) return false
|
||||||
|
try {
|
||||||
|
const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
|
||||||
|
// Check each provider to see if any has autoPlay enabled and status true
|
||||||
|
for (const providerKey in config) {
|
||||||
|
const provider = config[providerKey]
|
||||||
|
if (provider && provider.status === true && provider.autoPlay === true) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
} catch (error) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function to generate TTS for response
|
||||||
|
const generateTTSForResponse = async (
|
||||||
|
responseText: string,
|
||||||
|
textToSpeechConfig: string | undefined,
|
||||||
|
options: ICommonObject
|
||||||
|
): Promise<Buffer | null> => {
|
||||||
|
try {
|
||||||
|
if (!textToSpeechConfig) return null
|
||||||
|
const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
|
||||||
|
|
||||||
|
// Find the active provider configuration
|
||||||
|
let activeProviderConfig = null
|
||||||
|
for (const providerKey in config) {
|
||||||
|
const provider = config[providerKey]
|
||||||
|
if (provider && provider.status === true) {
|
||||||
|
activeProviderConfig = {
|
||||||
|
name: providerKey,
|
||||||
|
credentialId: provider.credentialId,
|
||||||
|
voice: provider.voice,
|
||||||
|
model: provider.model
|
||||||
|
}
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!activeProviderConfig) return null
|
||||||
|
|
||||||
|
const audioBuffer = await convertTextToSpeech(responseText, activeProviderConfig, options)
|
||||||
|
return audioBuffer
|
||||||
|
} catch (error) {
|
||||||
|
logger.error(`[server]: TTS generation failed: ${getErrorMessage(error)}`)
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const initEndingNode = async ({
|
const initEndingNode = async ({
|
||||||
endingNodeIds,
|
endingNodeIds,
|
||||||
componentNodes,
|
componentNodes,
|
||||||
|
|
@ -828,6 +879,29 @@ export const executeFlow = async ({
|
||||||
if (memoryType) result.memoryType = memoryType
|
if (memoryType) result.memoryType = memoryType
|
||||||
if (Object.keys(setVariableNodesOutput).length) result.flowVariables = setVariableNodesOutput
|
if (Object.keys(setVariableNodesOutput).length) result.flowVariables = setVariableNodesOutput
|
||||||
|
|
||||||
|
if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
|
||||||
|
logger.info('[server]: Generating TTS for response')
|
||||||
|
logger.info(`[server/executeFlow]: TTS config: ${JSON.stringify(chatflow.textToSpeech)}`)
|
||||||
|
const options = {
|
||||||
|
orgId,
|
||||||
|
chatflowid,
|
||||||
|
chatId,
|
||||||
|
appDataSource,
|
||||||
|
databaseEntities
|
||||||
|
}
|
||||||
|
|
||||||
|
const audioBuffer = await generateTTSForResponse(result.text, chatflow.textToSpeech, options)
|
||||||
|
if (audioBuffer) {
|
||||||
|
const audioBase64 = audioBuffer.toString('base64')
|
||||||
|
|
||||||
|
if (streaming && sseStreamer) {
|
||||||
|
sseStreamer.streamAudioEvent(chatId, audioBase64)
|
||||||
|
} else {
|
||||||
|
result.audioData = audioBase64
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -455,6 +455,24 @@ const TextToSpeech = ({ dialogProps }) => {
|
||||||
)}
|
)}
|
||||||
</Box>
|
</Box>
|
||||||
))}
|
))}
|
||||||
|
|
||||||
|
{/* Auto-play Toggle */}
|
||||||
|
<Box sx={{ p: 2 }}>
|
||||||
|
<div style={{ display: 'flex', flexDirection: 'row', alignItems: 'center' }}>
|
||||||
|
<Typography>
|
||||||
|
Automatically play audio
|
||||||
|
<TooltipWithParser
|
||||||
|
style={{ marginLeft: 10 }}
|
||||||
|
title='When enabled, bot responses will be automatically converted to speech and played'
|
||||||
|
/>
|
||||||
|
</Typography>
|
||||||
|
</div>
|
||||||
|
<SwitchInput
|
||||||
|
onChange={(newValue) => setValue(newValue, selectedProvider, 'autoPlay')}
|
||||||
|
value={textToSpeech[selectedProvider] ? textToSpeech[selectedProvider].autoPlay ?? false : false}
|
||||||
|
/>
|
||||||
|
</Box>
|
||||||
|
|
||||||
{/* Test TTS Button */}
|
{/* Test TTS Button */}
|
||||||
<Box sx={{ p: 2 }}>
|
<Box sx={{ p: 2 }}>
|
||||||
<StyledButton
|
<StyledButton
|
||||||
|
|
|
||||||
|
|
@ -955,6 +955,12 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
||||||
setLoading(false)
|
setLoading(false)
|
||||||
setUserInput('')
|
setUserInput('')
|
||||||
setUploadedFiles([])
|
setUploadedFiles([])
|
||||||
|
|
||||||
|
// Handle auto-play audio for non-streaming responses
|
||||||
|
if (data.audioData) {
|
||||||
|
handleAutoPlayAudio(data.audioData)
|
||||||
|
}
|
||||||
|
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
inputRef.current?.focus()
|
inputRef.current?.focus()
|
||||||
scrollToBottom()
|
scrollToBottom()
|
||||||
|
|
@ -1033,6 +1039,9 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
||||||
abortMessage(payload.data)
|
abortMessage(payload.data)
|
||||||
closeResponse()
|
closeResponse()
|
||||||
break
|
break
|
||||||
|
case 'audio':
|
||||||
|
handleAutoPlayAudio(payload.data)
|
||||||
|
break
|
||||||
case 'end':
|
case 'end':
|
||||||
setLocalStorageChatflow(chatflowid, chatId)
|
setLocalStorageChatflow(chatflowid, chatId)
|
||||||
closeResponse()
|
closeResponse()
|
||||||
|
|
@ -1631,6 +1640,37 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const handleAutoPlayAudio = async (audioData) => {
|
||||||
|
try {
|
||||||
|
// Convert base64 audio data to blob and play
|
||||||
|
const audioBuffer = Uint8Array.from(atob(audioData), (c) => c.charCodeAt(0))
|
||||||
|
const audioBlob = new Blob([audioBuffer], { type: 'audio/mpeg' })
|
||||||
|
const audioUrl = URL.createObjectURL(audioBlob)
|
||||||
|
const audio = new Audio(audioUrl)
|
||||||
|
|
||||||
|
audio.addEventListener('ended', () => {
|
||||||
|
URL.revokeObjectURL(audioUrl)
|
||||||
|
})
|
||||||
|
|
||||||
|
await audio.play()
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error playing auto TTS audio:', error)
|
||||||
|
// Fallback: Use manual TTS API call
|
||||||
|
const lastMessage = messages[messages.length - 1]
|
||||||
|
if (lastMessage && lastMessage.type === 'apiMessage' && lastMessage.message) {
|
||||||
|
try {
|
||||||
|
await handleTTSClick(lastMessage.id, lastMessage.message)
|
||||||
|
} catch (fallbackError) {
|
||||||
|
console.error('TTS fallback also failed:', fallbackError)
|
||||||
|
enqueueSnackbar({
|
||||||
|
message: 'Auto-play audio failed',
|
||||||
|
options: { variant: 'error' }
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const getInputDisabled = () => {
|
const getInputDisabled = () => {
|
||||||
return (
|
return (
|
||||||
loading ||
|
loading ||
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue