Add tts UI

2025-08-13 08:34:55 +05:30 · 2025-08-13 08:34:55 +05:30 · 2c39b51cff
parent 141c49013a
commit 2c39b51cff
5 changed files with 707 additions and 23 deletions
--- a/packages/ui/src/api/tts.js
+++ b/packages/ui/src/api/tts.js
@ -0,0 +1,10 @@
+import client from './client'
+
+const generateVoice = (body) => client.post('/text-to-speech/generate', body)
+
+const listVoices = (params) => client.get('/text-to-speech/voices', { params })
+
+export default {
+    generateVoice,
+    listVoices
+}
--- a/packages/ui/src/assets/images/elevenlabs.svg
+++ b/packages/ui/src/assets/images/elevenlabs.svg
@ -0,0 +1,7 @@
+<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+  <rect width="24" height="24" rx="4" fill="#000000"/>
+  <path d="M6 8h3v8H6V8zm5-2h3v12h-3V6zm5 4h3v4h-3v-4z" fill="#ffffff"/>
+  <circle cx="7.5" cy="12" r="1" fill="#00ff88"/>
+  <circle cx="12.5" cy="12" r="1" fill="#00ff88"/>
+  <circle cx="17.5" cy="12" r="1" fill="#00ff88"/>
+</svg>
--- a/packages/ui/src/ui-component/dialog/ChatflowConfigurationDialog.jsx
+++ b/packages/ui/src/ui-component/dialog/ChatflowConfigurationDialog.jsx
@ -4,6 +4,7 @@ import { createPortal } from 'react-dom'
 import { Box, Dialog, DialogContent, DialogTitle, Tabs, Tab } from '@mui/material'
 import { tabsClasses } from '@mui/material/Tabs'
 import SpeechToText from '@/ui-component/extended/SpeechToText'
+import TextToSpeech from '@/ui-component/extended/TextToSpeech'
 import Security from '@/ui-component/extended/Security'
 import ChatFeedback from '@/ui-component/extended/ChatFeedback'
 import AnalyseFlow from '@/ui-component/extended/AnalyseFlow'
@ -30,6 +31,10 @@ const CHATFLOW_CONFIGURATION_TABS = [
        label: 'Speech to Text',
        id: 'speechToText'
    },
+    {
+        label: 'Text to Speech',
+        id: 'textToSpeech'
+    },
    {
        label: 'Chat Feedback',
        id: 'chatFeedback'
@ -125,18 +130,19 @@ const ChatflowConfigurationDialog = ({ show, isAgentCanvas, dialogProps, onCance
                                alignItems: 'center',
                                mb: 1
                            }}
-                            key={index}
+                            key={item.id}
                            label={item.label}
                            {...a11yProps(index)}
                        ></Tab>
                    ))}
                </Tabs>
                {filteredTabs.map((item, index) => (
-                    <TabPanel key={index} value={tabValue} index={index}>
+                    <TabPanel key={item.id} value={tabValue} index={index}>
                        {item.id === 'security' && <Security dialogProps={dialogProps} />}
                        {item.id === 'conversationStarters' ? <StarterPrompts dialogProps={dialogProps} /> : null}
                        {item.id === 'followUpPrompts' ? <FollowUpPrompts dialogProps={dialogProps} /> : null}
                        {item.id === 'speechToText' ? <SpeechToText dialogProps={dialogProps} /> : null}
+                        {item.id === 'textToSpeech' ? <TextToSpeech dialogProps={dialogProps} /> : null}
                        {item.id === 'chatFeedback' ? <ChatFeedback dialogProps={dialogProps} /> : null}
                        {item.id === 'analyseChatflow' ? <AnalyseFlow dialogProps={dialogProps} /> : null}
                        {item.id === 'leads' ? <Leads dialogProps={dialogProps} /> : null}
--- a/packages/ui/src/ui-component/extended/TextToSpeech.jsx
+++ b/packages/ui/src/ui-component/extended/TextToSpeech.jsx
@ -0,0 +1,500 @@
+import { useDispatch } from 'react-redux'
+import { useState, useEffect } from 'react'
+import PropTypes from 'prop-types'
+import { enqueueSnackbar as enqueueSnackbarAction, closeSnackbar as closeSnackbarAction, SET_CHATFLOW } from '@/store/actions'
+
+// material-ui
+import {
+    Typography,
+    Box,
+    Button,
+    FormControl,
+    ListItem,
+    ListItemAvatar,
+    ListItemText,
+    MenuItem,
+    Select,
+    CircularProgress
+} from '@mui/material'
+import { IconX, IconVolume } from '@tabler/icons-react'
+import { useTheme } from '@mui/material/styles'
+
+// Project import
+import CredentialInputHandler from '@/views/canvas/CredentialInputHandler'
+import { TooltipWithParser } from '@/ui-component/tooltip/TooltipWithParser'
+import { SwitchInput } from '@/ui-component/switch/Switch'
+import { Input } from '@/ui-component/input/Input'
+import { StyledButton } from '@/ui-component/button/StyledButton'
+import { Dropdown } from '@/ui-component/dropdown/Dropdown'
+import openAISVG from '@/assets/images/openai.svg'
+import elevenLabsSVG from '@/assets/images/elevenlabs.svg'
+
+// store
+import useNotifier from '@/utils/useNotifier'
+
+// API
+import useApi from '@/hooks/useApi'
+import chatflowsApi from '@/api/chatflows'
+import ttsApi from '@/api/tts'
+
+const TextToSpeechType = {
+    OPENAI_TTS: 'openai',
+    ELEVEN_LABS_TTS: 'elevenlabs'
+}
+
+// Weird quirk - the key must match the name property value.
+const textToSpeechProviders = {
+    [TextToSpeechType.OPENAI_TTS]: {
+        label: 'OpenAI TTS',
+        name: TextToSpeechType.OPENAI_TTS,
+        icon: openAISVG,
+        url: 'https://platform.openai.com/docs/guides/text-to-speech',
+        inputs: [
+            {
+                label: 'Connect Credential',
+                name: 'credential',
+                type: 'credential',
+                credentialNames: ['openAIApi']
+            },
+            {
+                label: 'Voice',
+                name: 'voice',
+                type: 'options',
+                description: 'The voice to use when generating the audio',
+                options: [
+                    { label: 'Alloy', name: 'alloy' },
+                    { label: 'Echo', name: 'echo' },
+                    { label: 'Fable', name: 'fable' },
+                    { label: 'Onyx', name: 'onyx' },
+                    { label: 'Nova', name: 'nova' },
+                    { label: 'Shimmer', name: 'shimmer' }
+                ],
+                default: 'alloy',
+                optional: true
+            },
+            {
+                label: 'Model',
+                name: 'model',
+                type: 'options',
+                description: 'The TTS model to use',
+                options: [
+                    { label: 'TTS-1', name: 'tts-1' },
+                    { label: 'TTS-1 HD', name: 'tts-1-hd' }
+                ],
+                default: 'tts-1',
+                optional: true
+            }
+        ]
+    },
+    [TextToSpeechType.ELEVEN_LABS_TTS]: {
+        label: 'Eleven Labs TTS',
+        name: TextToSpeechType.ELEVEN_LABS_TTS,
+        icon: elevenLabsSVG,
+        url: 'https://elevenlabs.io/',
+        inputs: [
+            {
+                label: 'Connect Credential',
+                name: 'credential',
+                type: 'credential',
+                credentialNames: ['elevenLabsApi']
+            },
+            {
+                label: 'Voice',
+                name: 'voice',
+                type: 'voice_select',
+                description: 'The voice to use for text-to-speech',
+                default: '21m00Tcm4TlvDq8ikWAM',
+                optional: true
+            }
+        ]
+    }
+}
+
+const TextToSpeech = ({ dialogProps }) => {
+    const dispatch = useDispatch()
+
+    useNotifier()
+    const theme = useTheme()
+
+    const enqueueSnackbar = (...args) => dispatch(enqueueSnackbarAction(...args))
+    const closeSnackbar = (...args) => dispatch(closeSnackbarAction(...args))
+
+    const [textToSpeech, setTextToSpeech] = useState({})
+    const [selectedProvider, setSelectedProvider] = useState('none')
+    const [voices, setVoices] = useState([])
+    const [loadingVoices, setLoadingVoices] = useState(false)
+
+    const listVoicesApi = useApi(ttsApi.listVoices)
+
+    const onSave = async () => {
+        const textToSpeechConfig = setValue(true, selectedProvider, 'status')
+        try {
+            const saveResp = await chatflowsApi.updateChatflow(dialogProps.chatflow.id, {
+                textToSpeech: JSON.stringify(textToSpeechConfig)
+            })
+            if (saveResp.data) {
+                enqueueSnackbar({
+                    message: 'Text To Speech Configuration Saved',
+                    options: {
+                        key: Date.now() + Math.random(),
+                        variant: 'success',
+                        action: (key) => (
+                            <Button style={{ color: 'white' }} onClick={() => closeSnackbar(key)}>
+                                <IconX />
+                            </Button>
+                        )
+                    }
+                })
+                dispatch({ type: SET_CHATFLOW, chatflow: saveResp.data })
+            }
+        } catch (error) {
+            enqueueSnackbar({
+                message: `Failed to save Text To Speech Configuration: ${
+                    typeof error.response.data === 'object' ? error.response.data.message : error.response.data
+                }`,
+                options: {
+                    key: Date.now() + Math.random(),
+                    variant: 'error',
+                    persist: true,
+                    action: (key) => (
+                        <Button style={{ color: 'white' }} onClick={() => closeSnackbar(key)}>
+                            <IconX />
+                        </Button>
+                    )
+                }
+            })
+        }
+    }
+
+    const setValue = (value, providerName, inputParamName) => {
+        let newVal = {}
+        if (!Object.hasOwn(textToSpeech, providerName)) {
+            newVal = { ...textToSpeech, [providerName]: {} }
+        } else {
+            newVal = { ...textToSpeech }
+        }
+
+        newVal[providerName][inputParamName] = value
+        if (inputParamName === 'status' && value === true) {
+            // ensure that the others are turned off
+            Object.keys(textToSpeechProviders).forEach((key) => {
+                const provider = textToSpeechProviders[key]
+                if (provider.name !== providerName) {
+                    newVal[provider.name] = { ...textToSpeech[provider.name], status: false }
+                }
+            })
+            if (providerName !== 'none' && newVal['none']) {
+                newVal['none'].status = false
+            }
+        }
+        setTextToSpeech(newVal)
+        return newVal
+    }
+
+    const handleProviderChange = (event) => {
+        setSelectedProvider(event.target.value)
+        setVoices([])
+        const provider = event.target.value
+        if (provider !== 'none') {
+            const credentialId = textToSpeech[provider]?.credentialId
+            if (credentialId) {
+                loadVoicesForProvider(provider, credentialId)
+            }
+        }
+    }
+
+    const loadVoicesForProvider = async (provider, credentialId) => {
+        if (provider === 'none' || !credentialId) return
+
+        setLoadingVoices(true)
+        try {
+            const params = new URLSearchParams({ provider })
+            params.append('credentialId', credentialId)
+
+            const response = await ttsApi.listVoices(params)
+
+            if (response.data) {
+                const voicesData = await response.data
+                setVoices(voicesData)
+            } else {
+                setVoices([])
+            }
+        } catch (error) {
+            console.error('Error loading voices:', error)
+            setVoices([])
+        } finally {
+            setLoadingVoices(false)
+        }
+    }
+
+    const testTTS = async () => {
+        if (selectedProvider === 'none' || !textToSpeech[selectedProvider]?.credentialId) {
+            enqueueSnackbar({
+                message: 'Please select a provider and configure credentials first',
+                options: { variant: 'warning' }
+            })
+            return
+        }
+
+        try {
+            const providerConfig = textToSpeech[selectedProvider]
+            const body = {
+                text: 'Today is a wonderful day to build something with Flowise!',
+                provider: selectedProvider,
+                credentialId: providerConfig.credentialId,
+                voice: providerConfig.voice,
+                model: providerConfig.model
+            }
+            const response = await fetch('/api/v1/text-to-speech/generate', {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                    'x-request-from': 'internal'
+                },
+                credentials: 'include',
+                body: JSON.stringify(body)
+            })
+            if (response.ok) {
+                const audioBuffer = await response.arrayBuffer()
+                const audioBlob = new Blob([audioBuffer], { type: 'audio/mpeg' })
+                const audioUrl = URL.createObjectURL(audioBlob)
+                const audio = new Audio(audioUrl)
+
+                audio
+                    .play()
+                    .then(() => {
+                        enqueueSnackbar({
+                            message: 'Test audio played successfully',
+                            options: { variant: 'success' }
+                        })
+                    })
+                    .catch((error) => {
+                        console.error('Error playing audio:', error)
+                        enqueueSnackbar({
+                            message: 'Error playing test audio',
+                            options: { variant: 'error' }
+                        })
+                    })
+
+                // Clean up URL after audio finishes
+                audio.addEventListener('ended', () => {
+                    URL.revokeObjectURL(audioUrl)
+                })
+            } else {
+                throw new Error(`TTS request failed: ${response.status}`)
+            }
+        } catch (error) {
+            console.error('Error testing TTS:', error)
+            enqueueSnackbar({
+                message: `TTS test failed: ${error.message}`,
+                options: { variant: 'error' }
+            })
+        }
+    }
+
+    useEffect(() => {
+        if (dialogProps.chatflow && dialogProps.chatflow.textToSpeech) {
+            try {
+                const textToSpeechConfig = JSON.parse(dialogProps.chatflow.textToSpeech)
+                let selectedProvider = 'none'
+                Object.keys(textToSpeechProviders).forEach((key) => {
+                    const providerConfig = textToSpeechConfig[key]
+                    if (providerConfig && providerConfig.status) {
+                        selectedProvider = key
+                    }
+                })
+                setSelectedProvider(selectedProvider)
+                setTextToSpeech(textToSpeechConfig)
+
+                // Voices will be loaded when credentials are configured
+            } catch {
+                setTextToSpeech({})
+                setSelectedProvider('none')
+            }
+        }
+
+        return () => {
+            setTextToSpeech({})
+            setSelectedProvider('none')
+            setVoices([])
+        }
+    }, [dialogProps])
+
+    return (
+        <>
+            <Box fullWidth sx={{ mb: 1, display: 'flex', flexDirection: 'column', gap: 1 }}>
+                <Typography>Providers</Typography>
+                <FormControl fullWidth>
+                    <Select
+                        size='small'
+                        value={selectedProvider}
+                        onChange={handleProviderChange}
+                        sx={{
+                            '& .MuiSvgIcon-root': {
+                                color: theme?.customization?.isDarkMode ? '#fff' : 'inherit'
+                            }
+                        }}
+                    >
+                        <MenuItem value='none'>None</MenuItem>
+                        {Object.values(textToSpeechProviders).map((provider) => (
+                            <MenuItem key={provider.name} value={provider.name}>
+                                {provider.label}
+                            </MenuItem>
+                        ))}
+                    </Select>
+                </FormControl>
+            </Box>
+            {selectedProvider !== 'none' && (
+                <>
+                    <ListItem sx={{ mt: 3 }} alignItems='center'>
+                        <ListItemAvatar>
+                            <div
+                                style={{
+                                    width: 50,
+                                    height: 50,
+                                    borderRadius: '50%',
+                                    backgroundColor: 'white',
+                                    flexShrink: 0,
+                                    display: 'flex',
+                                    alignItems: 'center',
+                                    justifyContent: 'center'
+                                }}
+                            >
+                                <img
+                                    style={{
+                                        width: '100%',
+                                        height: '100%',
+                                        padding: 10,
+                                        objectFit: 'contain'
+                                    }}
+                                    alt='TTS Provider'
+                                    src={textToSpeechProviders[selectedProvider].icon}
+                                />
+                            </div>
+                        </ListItemAvatar>
+                        <ListItemText
+                            sx={{ ml: 1 }}
+                            primary={textToSpeechProviders[selectedProvider].label}
+                            secondary={
+                                <a target='_blank' rel='noreferrer' href={textToSpeechProviders[selectedProvider].url}>
+                                    {textToSpeechProviders[selectedProvider].url}
+                                </a>
+                            }
+                        />
+                    </ListItem>
+                    {textToSpeechProviders[selectedProvider].inputs.map((inputParam) => (
+                        <Box key={`${selectedProvider}-${inputParam.name}`} sx={{ p: 2 }}>
+                            <div style={{ display: 'flex', flexDirection: 'row' }}>
+                                <Typography>
+                                    {inputParam.label}
+                                    {!inputParam.optional && <span style={{ color: 'red' }}>&nbsp;*</span>}
+                                    {inputParam.description && (
+                                        <TooltipWithParser style={{ marginLeft: 10 }} title={inputParam.description} />
+                                    )}
+                                </Typography>
+                            </div>
+                            {inputParam.type === 'credential' && (
+                                <CredentialInputHandler
+                                    key={textToSpeech[selectedProvider]?.credentialId}
+                                    data={
+                                        textToSpeech[selectedProvider]?.credentialId
+                                            ? { credential: textToSpeech[selectedProvider].credentialId }
+                                            : {}
+                                    }
+                                    inputParam={inputParam}
+                                    onSelect={(newValue) => {
+                                        setValue(newValue, selectedProvider, 'credentialId')
+                                        // Load voices when credential is updated
+                                        if (newValue && selectedProvider !== 'none') {
+                                            setTimeout(() => loadVoicesForProvider(selectedProvider, newValue), 100)
+                                        }
+                                    }}
+                                />
+                            )}
+                            {inputParam.type === 'boolean' && (
+                                <SwitchInput
+                                    onChange={(newValue) => setValue(newValue, selectedProvider, inputParam.name)}
+                                    value={
+                                        textToSpeech[selectedProvider]
+                                            ? textToSpeech[selectedProvider][inputParam.name]
+                                            : inputParam.default ?? false
+                                    }
+                                />
+                            )}
+                            {(inputParam.type === 'string' || inputParam.type === 'password' || inputParam.type === 'number') && (
+                                <Input
+                                    inputParam={inputParam}
+                                    onChange={(newValue) => setValue(newValue, selectedProvider, inputParam.name)}
+                                    value={
+                                        textToSpeech[selectedProvider]
+                                            ? textToSpeech[selectedProvider][inputParam.name]
+                                            : inputParam.default ?? ''
+                                    }
+                                />
+                            )}
+                            {inputParam.type === 'options' && (
+                                <Dropdown
+                                    name={inputParam.name}
+                                    options={inputParam.options}
+                                    onSelect={(newValue) => setValue(newValue, selectedProvider, inputParam.name)}
+                                    value={
+                                        textToSpeech[selectedProvider]
+                                            ? textToSpeech[selectedProvider][inputParam.name]
+                                            : inputParam.default ?? 'choose an option'
+                                    }
+                                />
+                            )}
+                            {inputParam.type === 'voice_select' && (
+                                <Box>
+                                    {loadingVoices ? (
+                                        <Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
+                                            <CircularProgress size={16} />
+                                            <Typography variant='body2'>Loading voices...</Typography>
+                                        </Box>
+                                    ) : (
+                                        <Dropdown
+                                            name={inputParam.name}
+                                            options={voices.map((voice) => ({ label: voice.name, name: voice.id }))}
+                                            onSelect={(newValue) => setValue(newValue, selectedProvider, inputParam.name)}
+                                            value={
+                                                textToSpeech[selectedProvider]
+                                                    ? textToSpeech[selectedProvider][inputParam.name]
+                                                    : inputParam.default ?? 'choose a voice'
+                                            }
+                                        />
+                                    )}
+                                </Box>
+                            )}
+                        </Box>
+                    ))}
+                    {/* Test TTS Button */}
+                    <Box sx={{ p: 2 }}>
+                        <StyledButton
+                            variant='outlined'
+                            size='small'
+                            startIcon={<IconVolume />}
+                            onClick={testTTS}
+                            disabled={!textToSpeech[selectedProvider]?.credentialId}
+                        >
+                            Test Voice
+                        </StyledButton>
+                    </Box>
+                </>
+            )}
+            <StyledButton
+                style={{ marginBottom: 10, marginTop: 10 }}
+                disabled={selectedProvider !== 'none' && !textToSpeech[selectedProvider]?.credentialId}
+                variant='contained'
+                onClick={onSave}
+            >
+                Save
+            </StyledButton>
+        </>
+    )
+}
+
+TextToSpeech.propTypes = {
+    dialogProps: PropTypes.object
+}
+
+export default TextToSpeech
--- a/packages/ui/src/views/chatmessage/ChatMessage.jsx
+++ b/packages/ui/src/views/chatmessage/ChatMessage.jsx
@ -38,7 +38,8 @@ import {
    IconSquareFilled,
    IconCheck,
    IconPaperclip,
-    IconSparkles
+    IconSparkles,
+    IconVolume
 } from '@tabler/icons-react'
 import robotPNG from '@/assets/images/robot.png'
 import userPNG from '@/assets/images/account.png'
@ -251,6 +252,11 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP

    const [isConfigLoading, setIsConfigLoading] = useState(true)

+    // TTS state
+    const [ttsLoading, setTtsLoading] = useState({})
+    const [ttsAudio, setTtsAudio] = useState({})
+    const [isTTSEnabled, setIsTTSEnabled] = useState(false)
+
    const isFileAllowedForUpload = (file) => {
        const constraints = getAllowChatFlowUploads.data
        /**
@ -1293,9 +1299,35 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
                }
            }
        }
+
+        // Check if TTS is configured
+        if (getChatflowConfig.data && getChatflowConfig.data.textToSpeech) {
+            try {
+                const ttsConfig =
+                    typeof getChatflowConfig.data.textToSpeech === 'string'
+                        ? JSON.parse(getChatflowConfig.data.textToSpeech)
+                        : getChatflowConfig.data.textToSpeech
+
+                let isEnabled = false
+                if (ttsConfig) {
+                    Object.keys(ttsConfig).forEach((provider) => {
+                        if (ttsConfig[provider] && ttsConfig[provider].status && ttsConfig[provider].credentialId) {
+                            isEnabled = true
+                        }
+                    })
+                }
+                setIsTTSEnabled(isEnabled)
+            } catch (error) {
+                setIsTTSEnabled(false)
+            }
+        } else {
+            setIsTTSEnabled(false)
+        }
        // eslint-disable-next-line react-hooks/exhaustive-deps
    }, [getChatflowConfig.data])

+    console.log('isTTSEnabled:', isTTSEnabled)
+
    useEffect(() => {
        if (getChatflowConfig.error) {
            setIsConfigLoading(false)
@ -1497,9 +1529,110 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
                return allMessages
            })
        }
+
        setIsLeadSaving(false)
    }

+    const handleTTSClick = async (messageId, messageText) => {
+        if (ttsLoading[messageId]) return
+
+        if (ttsAudio[messageId]) {
+            ttsAudio[messageId].pause()
+            ttsAudio[messageId].currentTime = 0
+            setTtsAudio((prev) => {
+                const newState = { ...prev }
+                delete newState[messageId]
+                return newState
+            })
+            return
+        }
+
+        setTtsLoading((prev) => ({ ...prev, [messageId]: true }))
+
+        try {
+            let ttsConfig = null
+            if (getChatflowConfig.data && getChatflowConfig.data.textToSpeech) {
+                try {
+                    ttsConfig =
+                        typeof getChatflowConfig.data.textToSpeech === 'string'
+                            ? JSON.parse(getChatflowConfig.data.textToSpeech)
+                            : getChatflowConfig.data.textToSpeech
+                } catch (error) {
+                    console.error('Error parsing TTS config:', error)
+                }
+            }
+
+            let activeProvider = null
+            let providerConfig = null
+            if (ttsConfig) {
+                Object.keys(ttsConfig).forEach((provider) => {
+                    if (ttsConfig[provider] && ttsConfig[provider].status) {
+                        activeProvider = provider
+                        providerConfig = ttsConfig[provider]
+                    }
+                })
+            }
+
+            if (!activeProvider || !providerConfig || !providerConfig.credentialId) {
+                enqueueSnackbar({
+                    message: 'Text-to-speech is not configured for this chatflow',
+                    options: { variant: 'warning' }
+                })
+                return
+            }
+
+            const response = await fetch('/api/v1/text-to-speech/generate', {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                    'x-request-from': 'internal'
+                },
+                credentials: 'include',
+                body: JSON.stringify({
+                    text: messageText,
+                    provider: activeProvider,
+                    credentialId: providerConfig.credentialId,
+                    voice: providerConfig.voice,
+                    model: providerConfig.model
+                })
+            })
+
+            if (!response.ok) {
+                throw new Error(`TTS request failed: ${response.status}`)
+            }
+
+            const audioBuffer = await response.arrayBuffer()
+            const audioBlob = new Blob([audioBuffer], { type: 'audio/mpeg' })
+            const audioUrl = URL.createObjectURL(audioBlob)
+            const audio = new Audio(audioUrl)
+
+            setTtsAudio((prev) => ({ ...prev, [messageId]: audio }))
+
+            audio.addEventListener('ended', () => {
+                setTtsAudio((prev) => {
+                    const newState = { ...prev }
+                    delete newState[messageId]
+                    return newState
+                })
+                URL.revokeObjectURL(audioUrl)
+            })
+
+            await audio.play()
+        } catch (error) {
+            console.error('Error with TTS:', error)
+            enqueueSnackbar({
+                message: `TTS failed: ${error.message}`,
+                options: { variant: 'error' }
+            })
+        } finally {
+            setTtsLoading((prev) => {
+                const newState = { ...prev }
+                delete newState[messageId]
+                return newState
+            })
+        }
+    }
+
    const getInputDisabled = () => {
        return (
            loading ||
@ -2151,7 +2284,7 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
                                                })}
                                            </div>
                                        )}
-                                        {message.type === 'apiMessage' && message.id && chatFeedbackStatus ? (
+                                        {message.type === 'apiMessage' && message.id ? (
                                            <>
                                                <Box
                                                    sx={{
@ -2161,25 +2294,53 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
                                                        gap: 1
                                                    }}
                                                >
-                                                    <CopyToClipboardButton onClick={() => copyMessageToClipboard(message.message)} />
-                                                    {!message.feedback ||
-                                                    message.feedback.rating === '' ||
-                                                    message.feedback.rating === 'THUMBS_UP' ? (
-                                                        <ThumbsUpButton
-                                                            isDisabled={message.feedback && message.feedback.rating === 'THUMBS_UP'}
-                                                            rating={message.feedback ? message.feedback.rating : ''}
-                                                            onClick={() => onThumbsUpClick(message.id)}
-                                                        />
-                                                    ) : null}
-                                                    {!message.feedback ||
-                                                    message.feedback.rating === '' ||
-                                                    message.feedback.rating === 'THUMBS_DOWN' ? (
-                                                        <ThumbsDownButton
-                                                            isDisabled={message.feedback && message.feedback.rating === 'THUMBS_DOWN'}
-                                                            rating={message.feedback ? message.feedback.rating : ''}
-                                                            onClick={() => onThumbsDownClick(message.id)}
-                                                        />
-                                                    ) : null}
+                                                    {isTTSEnabled && (
+                                                        <IconButton
+                                                            size='small'
+                                                            onClick={() => handleTTSClick(message.id, message.message)}
+                                                            disabled={ttsLoading[message.id]}
+                                                            sx={{
+                                                                backgroundColor: ttsAudio[message.id] ? 'primary.main' : 'transparent',
+                                                                color: ttsAudio[message.id] ? 'white' : 'inherit',
+                                                                '&:hover': {
+                                                                    backgroundColor: ttsAudio[message.id] ? 'primary.dark' : 'action.hover'
+                                                                }
+                                                            }}
+                                                        >
+                                                            {ttsLoading[message.id] ? (
+                                                                <CircularProgress size={16} />
+                                                            ) : (
+                                                                <IconVolume size={16} />
+                                                            )}
+                                                        </IconButton>
+                                                    )}
+                                                    {chatFeedbackStatus && (
+                                                        <>
+                                                            <CopyToClipboardButton
+                                                                onClick={() => copyMessageToClipboard(message.message)}
+                                                            />
+                                                            {!message.feedback ||
+                                                            message.feedback.rating === '' ||
+                                                            message.feedback.rating === 'THUMBS_UP' ? (
+                                                                <ThumbsUpButton
+                                                                    isDisabled={message.feedback && message.feedback.rating === 'THUMBS_UP'}
+                                                                    rating={message.feedback ? message.feedback.rating : ''}
+                                                                    onClick={() => onThumbsUpClick(message.id)}
+                                                                />
+                                                            ) : null}
+                                                            {!message.feedback ||
+                                                            message.feedback.rating === '' ||
+                                                            message.feedback.rating === 'THUMBS_DOWN' ? (
+                                                                <ThumbsDownButton
+                                                                    isDisabled={
+                                                                        message.feedback && message.feedback.rating === 'THUMBS_DOWN'
+                                                                    }
+                                                                    rating={message.feedback ? message.feedback.rating : ''}
+                                                                    onClick={() => onThumbsDownClick(message.id)}
+                                                                />
+                                                            ) : null}
+                                                        </>
+                                                    )}
                                                </Box>
                                            </>
                                        ) : null}