import { useDispatch } from 'react-redux' import { useState, useEffect } from 'react' import PropTypes from 'prop-types' import { enqueueSnackbar as enqueueSnackbarAction, closeSnackbar as closeSnackbarAction, SET_CHATFLOW } from '@/store/actions' // material-ui import { Typography, Box, Button, FormControl, ListItem, ListItemAvatar, ListItemText, MenuItem, Select, CircularProgress } from '@mui/material' import { IconX, IconVolume } from '@tabler/icons-react' import { useTheme } from '@mui/material/styles' // Project import import CredentialInputHandler from '@/views/canvas/CredentialInputHandler' import { TooltipWithParser } from '@/ui-component/tooltip/TooltipWithParser' import { SwitchInput } from '@/ui-component/switch/Switch' import { Input } from '@/ui-component/input/Input' import { StyledButton } from '@/ui-component/button/StyledButton' import { Dropdown } from '@/ui-component/dropdown/Dropdown' import openAISVG from '@/assets/images/openai.svg' import elevenLabsSVG from '@/assets/images/elevenlabs.svg' // store import useNotifier from '@/utils/useNotifier' // API import chatflowsApi from '@/api/chatflows' import ttsApi from '@/api/tts' const TextToSpeechType = { OPENAI_TTS: 'openai', ELEVEN_LABS_TTS: 'elevenlabs' } // Weird quirk - the key must match the name property value. const textToSpeechProviders = { [TextToSpeechType.OPENAI_TTS]: { label: 'OpenAI TTS', name: TextToSpeechType.OPENAI_TTS, icon: openAISVG, url: 'https://platform.openai.com/docs/guides/text-to-speech', inputs: [ { label: 'Connect Credential', name: 'credential', type: 'credential', credentialNames: ['openAIApi'] }, { label: 'Voice', name: 'voice', type: 'voice_select', description: 'The voice to use when generating the audio', default: 'alloy', optional: true } ] }, [TextToSpeechType.ELEVEN_LABS_TTS]: { label: 'Eleven Labs TTS', name: TextToSpeechType.ELEVEN_LABS_TTS, icon: elevenLabsSVG, url: 'https://elevenlabs.io/', inputs: [ { label: 'Connect Credential', name: 'credential', type: 'credential', credentialNames: ['elevenLabsApi'] }, { label: 'Voice', name: 'voice', type: 'voice_select', description: 'The voice to use for text-to-speech', default: '21m00Tcm4TlvDq8ikWAM', optional: true } ] } } const TextToSpeech = ({ dialogProps }) => { const dispatch = useDispatch() useNotifier() const theme = useTheme() const enqueueSnackbar = (...args) => dispatch(enqueueSnackbarAction(...args)) const closeSnackbar = (...args) => dispatch(closeSnackbarAction(...args)) const [textToSpeech, setTextToSpeech] = useState(null) const [selectedProvider, setSelectedProvider] = useState('none') const [voices, setVoices] = useState([]) const [loadingVoices, setLoadingVoices] = useState(false) const onSave = async () => { const textToSpeechConfig = setValue(true, selectedProvider, 'status') try { const saveResp = await chatflowsApi.updateChatflow(dialogProps.chatflow.id, { textToSpeech: JSON.stringify(textToSpeechConfig) }) if (saveResp.data) { enqueueSnackbar({ message: 'Text To Speech Configuration Saved', options: { key: Date.now() + Math.random(), variant: 'success', action: (key) => ( ) } }) dispatch({ type: SET_CHATFLOW, chatflow: saveResp.data }) } } catch (error) { enqueueSnackbar({ message: `Failed to save Text To Speech Configuration: ${ typeof error.response.data === 'object' ? error.response.data.message : error.response.data }`, options: { key: Date.now() + Math.random(), variant: 'error', persist: true, action: (key) => ( ) } }) } } const setValue = (value, providerName, inputParamName) => { let newVal = {} if (!textToSpeech || !Object.hasOwn(textToSpeech, providerName)) { newVal = { ...(textToSpeech || {}), [providerName]: {} } } else { newVal = { ...textToSpeech } } newVal[providerName][inputParamName] = value if (inputParamName === 'status' && value === true) { // ensure that the others are turned off Object.keys(textToSpeechProviders).forEach((key) => { const provider = textToSpeechProviders[key] if (provider.name !== providerName) { newVal[provider.name] = { ...(textToSpeech?.[provider.name] || {}), status: false } } }) if (providerName !== 'none' && newVal['none']) { newVal['none'].status = false } } setTextToSpeech(newVal) return newVal } const handleProviderChange = (provider, configOverride = null) => { setSelectedProvider(() => provider) setVoices([]) if (provider !== 'none') { const config = configOverride || textToSpeech?.[provider] const credentialId = config?.credentialId if (credentialId) { loadVoicesForProvider(provider, credentialId) } } } const loadVoicesForProvider = async (provider, credentialId) => { if (provider === 'none' || !credentialId) return setLoadingVoices(true) try { const params = new URLSearchParams({ provider }) params.append('credentialId', credentialId) const response = await ttsApi.listVoices(params) if (response.data) { const voicesData = await response.data setVoices(voicesData) } else { setVoices([]) } } catch (error) { console.error('Error loading voices:', error) setVoices([]) } finally { setLoadingVoices(false) } } const testTTS = async () => { if (selectedProvider === 'none' || !textToSpeech?.[selectedProvider]?.credentialId) { enqueueSnackbar({ message: 'Please select a provider and configure credentials first', options: { variant: 'warning' } }) return } try { const providerConfig = textToSpeech?.[selectedProvider] || {} const body = { text: 'Today is a wonderful day to build something with Flowise!', provider: selectedProvider, credentialId: providerConfig.credentialId, voice: providerConfig.voice, model: providerConfig.model } // Use streaming approach like in ChatMessage.jsx const mediaSource = new MediaSource() const audio = new Audio() audio.src = URL.createObjectURL(mediaSource) const streamingState = { mediaSource, sourceBuffer: null, audio, chunkQueue: [], isBuffering: false, abortController: new AbortController(), streamEnded: false } mediaSource.addEventListener('sourceopen', () => { try { const mimeType = 'audio/mpeg' streamingState.sourceBuffer = mediaSource.addSourceBuffer(mimeType) streamingState.sourceBuffer.addEventListener('updateend', () => { streamingState.isBuffering = false if (streamingState.chunkQueue.length > 0 && !streamingState.sourceBuffer.updating) { const chunk = streamingState.chunkQueue.shift() try { streamingState.sourceBuffer.appendBuffer(chunk) streamingState.isBuffering = true } catch (error) { console.error('Error appending chunk:', error) } } else if (streamingState.streamEnded && streamingState.chunkQueue.length === 0) { // All chunks processed and stream ended, now we can safely end the stream try { if (streamingState.mediaSource.readyState === 'open') { streamingState.mediaSource.endOfStream() } } catch (error) { console.error('Error ending MediaSource stream:', error) } } }) audio.play().catch((playError) => { console.error('Error starting audio playback:', playError) }) } catch (error) { console.error('Error setting up source buffer:', error) } }) audio.addEventListener('playing', () => { enqueueSnackbar({ message: 'Test audio playing...', options: { variant: 'info' } }) }) audio.addEventListener('ended', () => { enqueueSnackbar({ message: 'Test audio completed successfully', options: { variant: 'success' } }) // Cleanup if (streamingState.audio.src) { URL.revokeObjectURL(streamingState.audio.src) } }) const response = await fetch('/api/v1/text-to-speech/generate', { method: 'POST', headers: { 'Content-Type': 'application/json', 'x-request-from': 'internal' }, credentials: 'include', body: JSON.stringify(body), signal: streamingState.abortController.signal }) if (!response.ok) { throw new Error(`HTTP error! status: ${response.status}`) } const reader = response.body.getReader() let buffer = '' let done = false while (!done) { if (streamingState.abortController.signal.aborted) { reader.cancel() break } const result = await reader.read() done = result.done if (done) break const chunk = new TextDecoder().decode(result.value, { stream: true }) buffer += chunk const lines = buffer.split('\n\n') buffer = lines.pop() || '' for (const eventBlock of lines) { if (eventBlock.trim()) { const event = parseSSEEvent(eventBlock) if (event) { switch (event.event) { case 'tts_data': if (event.data?.audioChunk) { const audioBuffer = Uint8Array.from(atob(event.data.audioChunk), (c) => c.charCodeAt(0)) streamingState.chunkQueue.push(audioBuffer) if (streamingState.sourceBuffer && !streamingState.sourceBuffer.updating) { const chunk = streamingState.chunkQueue.shift() try { streamingState.sourceBuffer.appendBuffer(chunk) streamingState.isBuffering = true } catch (error) { console.error('Error appending initial chunk:', error) } } } break case 'tts_end': streamingState.streamEnded = true // Check if we can end the stream immediately (no chunks queued and not updating) if ( streamingState.sourceBuffer && streamingState.chunkQueue.length === 0 && !streamingState.sourceBuffer.updating && streamingState.mediaSource.readyState === 'open' ) { try { streamingState.mediaSource.endOfStream() } catch (error) { console.error('Error ending MediaSource stream:', error) } } break } } } } } } catch (error) { console.error('Error testing TTS:', error) enqueueSnackbar({ message: `TTS test failed: ${error.message}`, options: { variant: 'error' } }) } } const parseSSEEvent = (eventBlock) => { const lines = eventBlock.trim().split('\n') const event = { event: null, data: null } for (const line of lines) { if (line.startsWith('event:')) { event.event = line.substring(6).trim() } else if (line.startsWith('data:')) { const dataStr = line.substring(5).trim() try { const parsed = JSON.parse(dataStr) if (parsed.data) { event.data = parsed.data } } catch (e) { console.error('Error parsing SSE data:', e) } } } return event.event ? event : null } useEffect(() => { if (dialogProps.chatflow && dialogProps.chatflow.textToSpeech) { try { const textToSpeechConfig = JSON.parse(dialogProps.chatflow.textToSpeech) let selectedProvider = 'none' Object.keys(textToSpeechProviders).forEach((key) => { const providerConfig = textToSpeechConfig[key] if (providerConfig && providerConfig.status) { selectedProvider = key } }) setSelectedProvider(selectedProvider) setTextToSpeech(textToSpeechConfig) handleProviderChange(selectedProvider, textToSpeechConfig) } catch { setTextToSpeech(null) setSelectedProvider('none') } } return () => { setTextToSpeech(null) setSelectedProvider('none') setVoices([]) } // eslint-disable-next-line react-hooks/exhaustive-deps }, [dialogProps]) return ( <> Providers {selectedProvider !== 'none' && ( <>

{textToSpeechProviders[selectedProvider].url} } /> {textToSpeechProviders[selectedProvider].inputs.map((inputParam) => (

{inputParam.label} {!inputParam.optional && *} {inputParam.description && ( )}

{inputParam.type === 'credential' && ( { setValue(newValue, selectedProvider, 'credentialId') // Load voices when credential is updated if (newValue && selectedProvider !== 'none') { setTimeout(() => loadVoicesForProvider(selectedProvider, newValue), 100) } }} /> )} {inputParam.type === 'boolean' && ( setValue(newValue, selectedProvider, inputParam.name)} value={ textToSpeech?.[selectedProvider] ? textToSpeech[selectedProvider][inputParam.name] : inputParam.default ?? false } /> )} {(inputParam.type === 'string' || inputParam.type === 'password' || inputParam.type === 'number') && ( setValue(newValue, selectedProvider, inputParam.name)} value={ textToSpeech?.[selectedProvider] ? textToSpeech[selectedProvider][inputParam.name] : inputParam.default ?? '' } /> )} {inputParam.type === 'options' && ( setValue(newValue, selectedProvider, inputParam.name)} value={ textToSpeech?.[selectedProvider] ? textToSpeech[selectedProvider][inputParam.name] : inputParam.default ?? 'choose an option' } /> )} {inputParam.type === 'voice_select' && ( {loadingVoices ? ( Loading voices... ) : ( ({ label: voice.name, name: voice.id }))} onSelect={(newValue) => setValue(newValue, selectedProvider, inputParam.name)} value={ textToSpeech?.[selectedProvider] ? textToSpeech[selectedProvider][inputParam.name] : inputParam.default ?? 'choose a voice' } /> )} )} ))} {/* Auto-play Toggle */}

Automatically play audio

setValue(newValue, selectedProvider, 'autoPlay')} value={textToSpeech?.[selectedProvider] ? textToSpeech[selectedProvider].autoPlay ?? false : false} /> {/* Test TTS Button */} } onClick={testTTS} disabled={!textToSpeech?.[selectedProvider]?.credentialId} > Test Voice )} Save ) } TextToSpeech.propTypes = { dialogProps: PropTypes.object } export default TextToSpeech