From 2c39b51cff9828b1f1523762e2a2f7d63f682e30 Mon Sep 17 00:00:00 2001 From: Ilango Rajagopal Date: Wed, 13 Aug 2025 08:34:55 +0530 Subject: [PATCH] Add tts UI --- packages/ui/src/api/tts.js | 10 + packages/ui/src/assets/images/elevenlabs.svg | 7 + .../dialog/ChatflowConfigurationDialog.jsx | 10 +- .../ui-component/extended/TextToSpeech.jsx | 500 ++++++++++++++++++ .../ui/src/views/chatmessage/ChatMessage.jsx | 203 ++++++- 5 files changed, 707 insertions(+), 23 deletions(-) create mode 100644 packages/ui/src/api/tts.js create mode 100644 packages/ui/src/assets/images/elevenlabs.svg create mode 100644 packages/ui/src/ui-component/extended/TextToSpeech.jsx diff --git a/packages/ui/src/api/tts.js b/packages/ui/src/api/tts.js new file mode 100644 index 000000000..89598e5e8 --- /dev/null +++ b/packages/ui/src/api/tts.js @@ -0,0 +1,10 @@ +import client from './client' + +const generateVoice = (body) => client.post('/text-to-speech/generate', body) + +const listVoices = (params) => client.get('/text-to-speech/voices', { params }) + +export default { + generateVoice, + listVoices +} diff --git a/packages/ui/src/assets/images/elevenlabs.svg b/packages/ui/src/assets/images/elevenlabs.svg new file mode 100644 index 000000000..5e3632613 --- /dev/null +++ b/packages/ui/src/assets/images/elevenlabs.svg @@ -0,0 +1,7 @@ + + + + + + + diff --git a/packages/ui/src/ui-component/dialog/ChatflowConfigurationDialog.jsx b/packages/ui/src/ui-component/dialog/ChatflowConfigurationDialog.jsx index cfa10cfbe..96624dfe8 100644 --- a/packages/ui/src/ui-component/dialog/ChatflowConfigurationDialog.jsx +++ b/packages/ui/src/ui-component/dialog/ChatflowConfigurationDialog.jsx @@ -4,6 +4,7 @@ import { createPortal } from 'react-dom' import { Box, Dialog, DialogContent, DialogTitle, Tabs, Tab } from '@mui/material' import { tabsClasses } from '@mui/material/Tabs' import SpeechToText from '@/ui-component/extended/SpeechToText' +import TextToSpeech from '@/ui-component/extended/TextToSpeech' import Security from '@/ui-component/extended/Security' import ChatFeedback from '@/ui-component/extended/ChatFeedback' import AnalyseFlow from '@/ui-component/extended/AnalyseFlow' @@ -30,6 +31,10 @@ const CHATFLOW_CONFIGURATION_TABS = [ label: 'Speech to Text', id: 'speechToText' }, + { + label: 'Text to Speech', + id: 'textToSpeech' + }, { label: 'Chat Feedback', id: 'chatFeedback' @@ -125,18 +130,19 @@ const ChatflowConfigurationDialog = ({ show, isAgentCanvas, dialogProps, onCance alignItems: 'center', mb: 1 }} - key={index} + key={item.id} label={item.label} {...a11yProps(index)} > ))} {filteredTabs.map((item, index) => ( - + {item.id === 'security' && } {item.id === 'conversationStarters' ? : null} {item.id === 'followUpPrompts' ? : null} {item.id === 'speechToText' ? : null} + {item.id === 'textToSpeech' ? : null} {item.id === 'chatFeedback' ? : null} {item.id === 'analyseChatflow' ? : null} {item.id === 'leads' ? : null} diff --git a/packages/ui/src/ui-component/extended/TextToSpeech.jsx b/packages/ui/src/ui-component/extended/TextToSpeech.jsx new file mode 100644 index 000000000..0615f41c0 --- /dev/null +++ b/packages/ui/src/ui-component/extended/TextToSpeech.jsx @@ -0,0 +1,500 @@ +import { useDispatch } from 'react-redux' +import { useState, useEffect } from 'react' +import PropTypes from 'prop-types' +import { enqueueSnackbar as enqueueSnackbarAction, closeSnackbar as closeSnackbarAction, SET_CHATFLOW } from '@/store/actions' + +// material-ui +import { + Typography, + Box, + Button, + FormControl, + ListItem, + ListItemAvatar, + ListItemText, + MenuItem, + Select, + CircularProgress +} from '@mui/material' +import { IconX, IconVolume } from '@tabler/icons-react' +import { useTheme } from '@mui/material/styles' + +// Project import +import CredentialInputHandler from '@/views/canvas/CredentialInputHandler' +import { TooltipWithParser } from '@/ui-component/tooltip/TooltipWithParser' +import { SwitchInput } from '@/ui-component/switch/Switch' +import { Input } from '@/ui-component/input/Input' +import { StyledButton } from '@/ui-component/button/StyledButton' +import { Dropdown } from '@/ui-component/dropdown/Dropdown' +import openAISVG from '@/assets/images/openai.svg' +import elevenLabsSVG from '@/assets/images/elevenlabs.svg' + +// store +import useNotifier from '@/utils/useNotifier' + +// API +import useApi from '@/hooks/useApi' +import chatflowsApi from '@/api/chatflows' +import ttsApi from '@/api/tts' + +const TextToSpeechType = { + OPENAI_TTS: 'openai', + ELEVEN_LABS_TTS: 'elevenlabs' +} + +// Weird quirk - the key must match the name property value. +const textToSpeechProviders = { + [TextToSpeechType.OPENAI_TTS]: { + label: 'OpenAI TTS', + name: TextToSpeechType.OPENAI_TTS, + icon: openAISVG, + url: 'https://platform.openai.com/docs/guides/text-to-speech', + inputs: [ + { + label: 'Connect Credential', + name: 'credential', + type: 'credential', + credentialNames: ['openAIApi'] + }, + { + label: 'Voice', + name: 'voice', + type: 'options', + description: 'The voice to use when generating the audio', + options: [ + { label: 'Alloy', name: 'alloy' }, + { label: 'Echo', name: 'echo' }, + { label: 'Fable', name: 'fable' }, + { label: 'Onyx', name: 'onyx' }, + { label: 'Nova', name: 'nova' }, + { label: 'Shimmer', name: 'shimmer' } + ], + default: 'alloy', + optional: true + }, + { + label: 'Model', + name: 'model', + type: 'options', + description: 'The TTS model to use', + options: [ + { label: 'TTS-1', name: 'tts-1' }, + { label: 'TTS-1 HD', name: 'tts-1-hd' } + ], + default: 'tts-1', + optional: true + } + ] + }, + [TextToSpeechType.ELEVEN_LABS_TTS]: { + label: 'Eleven Labs TTS', + name: TextToSpeechType.ELEVEN_LABS_TTS, + icon: elevenLabsSVG, + url: 'https://elevenlabs.io/', + inputs: [ + { + label: 'Connect Credential', + name: 'credential', + type: 'credential', + credentialNames: ['elevenLabsApi'] + }, + { + label: 'Voice', + name: 'voice', + type: 'voice_select', + description: 'The voice to use for text-to-speech', + default: '21m00Tcm4TlvDq8ikWAM', + optional: true + } + ] + } +} + +const TextToSpeech = ({ dialogProps }) => { + const dispatch = useDispatch() + + useNotifier() + const theme = useTheme() + + const enqueueSnackbar = (...args) => dispatch(enqueueSnackbarAction(...args)) + const closeSnackbar = (...args) => dispatch(closeSnackbarAction(...args)) + + const [textToSpeech, setTextToSpeech] = useState({}) + const [selectedProvider, setSelectedProvider] = useState('none') + const [voices, setVoices] = useState([]) + const [loadingVoices, setLoadingVoices] = useState(false) + + const listVoicesApi = useApi(ttsApi.listVoices) + + const onSave = async () => { + const textToSpeechConfig = setValue(true, selectedProvider, 'status') + try { + const saveResp = await chatflowsApi.updateChatflow(dialogProps.chatflow.id, { + textToSpeech: JSON.stringify(textToSpeechConfig) + }) + if (saveResp.data) { + enqueueSnackbar({ + message: 'Text To Speech Configuration Saved', + options: { + key: Date.now() + Math.random(), + variant: 'success', + action: (key) => ( + + ) + } + }) + dispatch({ type: SET_CHATFLOW, chatflow: saveResp.data }) + } + } catch (error) { + enqueueSnackbar({ + message: `Failed to save Text To Speech Configuration: ${ + typeof error.response.data === 'object' ? error.response.data.message : error.response.data + }`, + options: { + key: Date.now() + Math.random(), + variant: 'error', + persist: true, + action: (key) => ( + + ) + } + }) + } + } + + const setValue = (value, providerName, inputParamName) => { + let newVal = {} + if (!Object.hasOwn(textToSpeech, providerName)) { + newVal = { ...textToSpeech, [providerName]: {} } + } else { + newVal = { ...textToSpeech } + } + + newVal[providerName][inputParamName] = value + if (inputParamName === 'status' && value === true) { + // ensure that the others are turned off + Object.keys(textToSpeechProviders).forEach((key) => { + const provider = textToSpeechProviders[key] + if (provider.name !== providerName) { + newVal[provider.name] = { ...textToSpeech[provider.name], status: false } + } + }) + if (providerName !== 'none' && newVal['none']) { + newVal['none'].status = false + } + } + setTextToSpeech(newVal) + return newVal + } + + const handleProviderChange = (event) => { + setSelectedProvider(event.target.value) + setVoices([]) + const provider = event.target.value + if (provider !== 'none') { + const credentialId = textToSpeech[provider]?.credentialId + if (credentialId) { + loadVoicesForProvider(provider, credentialId) + } + } + } + + const loadVoicesForProvider = async (provider, credentialId) => { + if (provider === 'none' || !credentialId) return + + setLoadingVoices(true) + try { + const params = new URLSearchParams({ provider }) + params.append('credentialId', credentialId) + + const response = await ttsApi.listVoices(params) + + if (response.data) { + const voicesData = await response.data + setVoices(voicesData) + } else { + setVoices([]) + } + } catch (error) { + console.error('Error loading voices:', error) + setVoices([]) + } finally { + setLoadingVoices(false) + } + } + + const testTTS = async () => { + if (selectedProvider === 'none' || !textToSpeech[selectedProvider]?.credentialId) { + enqueueSnackbar({ + message: 'Please select a provider and configure credentials first', + options: { variant: 'warning' } + }) + return + } + + try { + const providerConfig = textToSpeech[selectedProvider] + const body = { + text: 'Today is a wonderful day to build something with Flowise!', + provider: selectedProvider, + credentialId: providerConfig.credentialId, + voice: providerConfig.voice, + model: providerConfig.model + } + const response = await fetch('/api/v1/text-to-speech/generate', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-request-from': 'internal' + }, + credentials: 'include', + body: JSON.stringify(body) + }) + if (response.ok) { + const audioBuffer = await response.arrayBuffer() + const audioBlob = new Blob([audioBuffer], { type: 'audio/mpeg' }) + const audioUrl = URL.createObjectURL(audioBlob) + const audio = new Audio(audioUrl) + + audio + .play() + .then(() => { + enqueueSnackbar({ + message: 'Test audio played successfully', + options: { variant: 'success' } + }) + }) + .catch((error) => { + console.error('Error playing audio:', error) + enqueueSnackbar({ + message: 'Error playing test audio', + options: { variant: 'error' } + }) + }) + + // Clean up URL after audio finishes + audio.addEventListener('ended', () => { + URL.revokeObjectURL(audioUrl) + }) + } else { + throw new Error(`TTS request failed: ${response.status}`) + } + } catch (error) { + console.error('Error testing TTS:', error) + enqueueSnackbar({ + message: `TTS test failed: ${error.message}`, + options: { variant: 'error' } + }) + } + } + + useEffect(() => { + if (dialogProps.chatflow && dialogProps.chatflow.textToSpeech) { + try { + const textToSpeechConfig = JSON.parse(dialogProps.chatflow.textToSpeech) + let selectedProvider = 'none' + Object.keys(textToSpeechProviders).forEach((key) => { + const providerConfig = textToSpeechConfig[key] + if (providerConfig && providerConfig.status) { + selectedProvider = key + } + }) + setSelectedProvider(selectedProvider) + setTextToSpeech(textToSpeechConfig) + + // Voices will be loaded when credentials are configured + } catch { + setTextToSpeech({}) + setSelectedProvider('none') + } + } + + return () => { + setTextToSpeech({}) + setSelectedProvider('none') + setVoices([]) + } + }, [dialogProps]) + + return ( + <> + + Providers + + + + + {selectedProvider !== 'none' && ( + <> + + +
+ TTS Provider +
+
+ + {textToSpeechProviders[selectedProvider].url} + + } + /> +
+ {textToSpeechProviders[selectedProvider].inputs.map((inputParam) => ( + +
+ + {inputParam.label} + {!inputParam.optional &&  *} + {inputParam.description && ( + + )} + +
+ {inputParam.type === 'credential' && ( + { + setValue(newValue, selectedProvider, 'credentialId') + // Load voices when credential is updated + if (newValue && selectedProvider !== 'none') { + setTimeout(() => loadVoicesForProvider(selectedProvider, newValue), 100) + } + }} + /> + )} + {inputParam.type === 'boolean' && ( + setValue(newValue, selectedProvider, inputParam.name)} + value={ + textToSpeech[selectedProvider] + ? textToSpeech[selectedProvider][inputParam.name] + : inputParam.default ?? false + } + /> + )} + {(inputParam.type === 'string' || inputParam.type === 'password' || inputParam.type === 'number') && ( + setValue(newValue, selectedProvider, inputParam.name)} + value={ + textToSpeech[selectedProvider] + ? textToSpeech[selectedProvider][inputParam.name] + : inputParam.default ?? '' + } + /> + )} + {inputParam.type === 'options' && ( + setValue(newValue, selectedProvider, inputParam.name)} + value={ + textToSpeech[selectedProvider] + ? textToSpeech[selectedProvider][inputParam.name] + : inputParam.default ?? 'choose an option' + } + /> + )} + {inputParam.type === 'voice_select' && ( + + {loadingVoices ? ( + + + Loading voices... + + ) : ( + ({ label: voice.name, name: voice.id }))} + onSelect={(newValue) => setValue(newValue, selectedProvider, inputParam.name)} + value={ + textToSpeech[selectedProvider] + ? textToSpeech[selectedProvider][inputParam.name] + : inputParam.default ?? 'choose a voice' + } + /> + )} + + )} +
+ ))} + {/* Test TTS Button */} + + } + onClick={testTTS} + disabled={!textToSpeech[selectedProvider]?.credentialId} + > + Test Voice + + + + )} + + Save + + + ) +} + +TextToSpeech.propTypes = { + dialogProps: PropTypes.object +} + +export default TextToSpeech diff --git a/packages/ui/src/views/chatmessage/ChatMessage.jsx b/packages/ui/src/views/chatmessage/ChatMessage.jsx index b3eb5ed27..2438e97b2 100644 --- a/packages/ui/src/views/chatmessage/ChatMessage.jsx +++ b/packages/ui/src/views/chatmessage/ChatMessage.jsx @@ -38,7 +38,8 @@ import { IconSquareFilled, IconCheck, IconPaperclip, - IconSparkles + IconSparkles, + IconVolume } from '@tabler/icons-react' import robotPNG from '@/assets/images/robot.png' import userPNG from '@/assets/images/account.png' @@ -251,6 +252,11 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP const [isConfigLoading, setIsConfigLoading] = useState(true) + // TTS state + const [ttsLoading, setTtsLoading] = useState({}) + const [ttsAudio, setTtsAudio] = useState({}) + const [isTTSEnabled, setIsTTSEnabled] = useState(false) + const isFileAllowedForUpload = (file) => { const constraints = getAllowChatFlowUploads.data /** @@ -1293,9 +1299,35 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP } } } + + // Check if TTS is configured + if (getChatflowConfig.data && getChatflowConfig.data.textToSpeech) { + try { + const ttsConfig = + typeof getChatflowConfig.data.textToSpeech === 'string' + ? JSON.parse(getChatflowConfig.data.textToSpeech) + : getChatflowConfig.data.textToSpeech + + let isEnabled = false + if (ttsConfig) { + Object.keys(ttsConfig).forEach((provider) => { + if (ttsConfig[provider] && ttsConfig[provider].status && ttsConfig[provider].credentialId) { + isEnabled = true + } + }) + } + setIsTTSEnabled(isEnabled) + } catch (error) { + setIsTTSEnabled(false) + } + } else { + setIsTTSEnabled(false) + } // eslint-disable-next-line react-hooks/exhaustive-deps }, [getChatflowConfig.data]) + console.log('isTTSEnabled:', isTTSEnabled) + useEffect(() => { if (getChatflowConfig.error) { setIsConfigLoading(false) @@ -1497,9 +1529,110 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP return allMessages }) } + setIsLeadSaving(false) } + const handleTTSClick = async (messageId, messageText) => { + if (ttsLoading[messageId]) return + + if (ttsAudio[messageId]) { + ttsAudio[messageId].pause() + ttsAudio[messageId].currentTime = 0 + setTtsAudio((prev) => { + const newState = { ...prev } + delete newState[messageId] + return newState + }) + return + } + + setTtsLoading((prev) => ({ ...prev, [messageId]: true })) + + try { + let ttsConfig = null + if (getChatflowConfig.data && getChatflowConfig.data.textToSpeech) { + try { + ttsConfig = + typeof getChatflowConfig.data.textToSpeech === 'string' + ? JSON.parse(getChatflowConfig.data.textToSpeech) + : getChatflowConfig.data.textToSpeech + } catch (error) { + console.error('Error parsing TTS config:', error) + } + } + + let activeProvider = null + let providerConfig = null + if (ttsConfig) { + Object.keys(ttsConfig).forEach((provider) => { + if (ttsConfig[provider] && ttsConfig[provider].status) { + activeProvider = provider + providerConfig = ttsConfig[provider] + } + }) + } + + if (!activeProvider || !providerConfig || !providerConfig.credentialId) { + enqueueSnackbar({ + message: 'Text-to-speech is not configured for this chatflow', + options: { variant: 'warning' } + }) + return + } + + const response = await fetch('/api/v1/text-to-speech/generate', { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-request-from': 'internal' + }, + credentials: 'include', + body: JSON.stringify({ + text: messageText, + provider: activeProvider, + credentialId: providerConfig.credentialId, + voice: providerConfig.voice, + model: providerConfig.model + }) + }) + + if (!response.ok) { + throw new Error(`TTS request failed: ${response.status}`) + } + + const audioBuffer = await response.arrayBuffer() + const audioBlob = new Blob([audioBuffer], { type: 'audio/mpeg' }) + const audioUrl = URL.createObjectURL(audioBlob) + const audio = new Audio(audioUrl) + + setTtsAudio((prev) => ({ ...prev, [messageId]: audio })) + + audio.addEventListener('ended', () => { + setTtsAudio((prev) => { + const newState = { ...prev } + delete newState[messageId] + return newState + }) + URL.revokeObjectURL(audioUrl) + }) + + await audio.play() + } catch (error) { + console.error('Error with TTS:', error) + enqueueSnackbar({ + message: `TTS failed: ${error.message}`, + options: { variant: 'error' } + }) + } finally { + setTtsLoading((prev) => { + const newState = { ...prev } + delete newState[messageId] + return newState + }) + } + } + const getInputDisabled = () => { return ( loading || @@ -2151,7 +2284,7 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP })} )} - {message.type === 'apiMessage' && message.id && chatFeedbackStatus ? ( + {message.type === 'apiMessage' && message.id ? ( <> - copyMessageToClipboard(message.message)} /> - {!message.feedback || - message.feedback.rating === '' || - message.feedback.rating === 'THUMBS_UP' ? ( - onThumbsUpClick(message.id)} - /> - ) : null} - {!message.feedback || - message.feedback.rating === '' || - message.feedback.rating === 'THUMBS_DOWN' ? ( - onThumbsDownClick(message.id)} - /> - ) : null} + {isTTSEnabled && ( + handleTTSClick(message.id, message.message)} + disabled={ttsLoading[message.id]} + sx={{ + backgroundColor: ttsAudio[message.id] ? 'primary.main' : 'transparent', + color: ttsAudio[message.id] ? 'white' : 'inherit', + '&:hover': { + backgroundColor: ttsAudio[message.id] ? 'primary.dark' : 'action.hover' + } + }} + > + {ttsLoading[message.id] ? ( + + ) : ( + + )} + + )} + {chatFeedbackStatus && ( + <> + copyMessageToClipboard(message.message)} + /> + {!message.feedback || + message.feedback.rating === '' || + message.feedback.rating === 'THUMBS_UP' ? ( + onThumbsUpClick(message.id)} + /> + ) : null} + {!message.feedback || + message.feedback.rating === '' || + message.feedback.rating === 'THUMBS_DOWN' ? ( + onThumbsDownClick(message.id)} + /> + ) : null} + + )} ) : null}