Merge branch 'main' into feature/tts

Remove redundant condition for tts autoplay in chatflow
Remove elevenlabs sdk from root package.json
2025-10-02 11:16:12 +01:00 · 2025-10-01 13:37:59 +05:30 · 2025-10-01 13:25:25 +05:30 · 2025-09-30 13:27:01 +01:00 · 2025-09-30 13:23:30 +01:00 · 2025-09-30 13:50:45 +05:30
34 changed files with 41358 additions and 39056 deletions
--- a/packages/components/credentials/ElevenLabsApi.credential.ts
+++ b/packages/components/credentials/ElevenLabsApi.credential.ts
@ -0,0 +1,26 @@
+import { INodeParams, INodeCredential } from '../src/Interface'
+
+class ElevenLabsApi implements INodeCredential {
+    label: string
+    name: string
+    version: number
+    description: string
+    inputs: INodeParams[]
+
+    constructor() {
+        this.label = 'Eleven Labs API'
+        this.name = 'elevenLabsApi'
+        this.version = 1.0
+        this.description =
+            'Sign up for a Eleven Labs account and <a target="_blank" href="https://elevenlabs.io/app/settings/api-keys">create an API Key</a>.'
+        this.inputs = [
+            {
+                label: 'Eleven Labs API Key',
+                name: 'elevenLabsApiKey',
+                type: 'password'
+            }
+        ]
+    }
+}
+
+module.exports = { credClass: ElevenLabsApi }
--- a/packages/components/package.json
+++ b/packages/components/package.json
@ -33,6 +33,7 @@
        "@dqbd/tiktoken": "^1.0.21",
        "@e2b/code-interpreter": "^1.5.1",
        "@elastic/elasticsearch": "^8.9.0",
+        "@elevenlabs/elevenlabs-js": "^2.8.0",
        "@flowiseai/nodevm": "^3.9.25",
        "@getzep/zep-cloud": "~1.0.7",
        "@getzep/zep-js": "^0.9.0",
--- a/packages/components/src/Interface.ts
+++ b/packages/components/src/Interface.ts
@ -441,6 +441,9 @@ export interface IServerSideEventStreamer {
    streamAbortEvent(chatId: string): void
    streamEndEvent(chatId: string): void
    streamUsageMetadataEvent(chatId: string, data: any): void
+    streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void
+    streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void
+    streamTTSEndEvent(chatId: string, chatMessageId: string): void
 }

 export enum FollowUpPromptProvider {
--- a/packages/components/src/index.ts
+++ b/packages/components/src/index.ts
@ -7,6 +7,7 @@ dotenv.config({ path: envPath, override: true })
 export * from './Interface'
 export * from './utils'
 export * from './speechToText'
+export * from './textToSpeech'
 export * from './storageUtils'
 export * from './handler'
 export * from '../evaluation/EvaluationRunner'
--- a/packages/components/src/textToSpeech.ts
+++ b/packages/components/src/textToSpeech.ts
@ -0,0 +1,240 @@
+import { ICommonObject } from './Interface'
+import { getCredentialData } from './utils'
+import OpenAI from 'openai'
+import { ElevenLabsClient } from '@elevenlabs/elevenlabs-js'
+import { Readable } from 'node:stream'
+import type { ReadableStream } from 'node:stream/web'
+
+const TextToSpeechType = {
+    OPENAI_TTS: 'openai',
+    ELEVEN_LABS_TTS: 'elevenlabs'
+}
+
+export const convertTextToSpeechStream = async (
+    text: string,
+    textToSpeechConfig: ICommonObject,
+    options: ICommonObject,
+    abortController: AbortController,
+    onStart: (format: string) => void,
+    onChunk: (chunk: Buffer) => void,
+    onEnd: () => void
+): Promise<void> => {
+    return new Promise<void>((resolve, reject) => {
+        let streamDestroyed = false
+
+        // Handle abort signal early
+        if (abortController.signal.aborted) {
+            reject(new Error('TTS generation aborted'))
+            return
+        }
+        const processStream = async () => {
+            try {
+                if (textToSpeechConfig) {
+                    const credentialId = textToSpeechConfig.credentialId as string
+                    const credentialData = await getCredentialData(credentialId ?? '', options)
+
+                    switch (textToSpeechConfig.name) {
+                        case TextToSpeechType.OPENAI_TTS: {
+                            onStart('mp3')
+
+                            const openai = new OpenAI({
+                                apiKey: credentialData.openAIApiKey
+                            })
+
+                            const response = await openai.audio.speech.create(
+                                {
+                                    model: 'gpt-4o-mini-tts',
+                                    voice: (textToSpeechConfig.voice || 'alloy') as
+                                        | 'alloy'
+                                        | 'ash'
+                                        | 'ballad'
+                                        | 'coral'
+                                        | 'echo'
+                                        | 'fable'
+                                        | 'nova'
+                                        | 'onyx'
+                                        | 'sage'
+                                        | 'shimmer',
+                                    input: text,
+                                    response_format: 'mp3'
+                                },
+                                {
+                                    signal: abortController.signal
+                                }
+                            )
+
+                            const stream = response.body as unknown as Readable
+                            if (!stream) {
+                                throw new Error('Failed to get response stream')
+                            }
+
+                            await processStreamWithRateLimit(stream, onChunk, onEnd, resolve, reject, 640, 20, abortController, () => {
+                                streamDestroyed = true
+                            })
+                            break
+                        }
+
+                        case TextToSpeechType.ELEVEN_LABS_TTS: {
+                            onStart('mp3')
+
+                            const client = new ElevenLabsClient({
+                                apiKey: credentialData.elevenLabsApiKey
+                            })
+
+                            const response = await client.textToSpeech.stream(
+                                textToSpeechConfig.voice || '21m00Tcm4TlvDq8ikWAM',
+                                {
+                                    text: text,
+                                    modelId: 'eleven_multilingual_v2'
+                                },
+                                { abortSignal: abortController.signal }
+                            )
+
+                            const stream = Readable.fromWeb(response as unknown as ReadableStream)
+                            if (!stream) {
+                                throw new Error('Failed to get response stream')
+                            }
+
+                            await processStreamWithRateLimit(stream, onChunk, onEnd, resolve, reject, 640, 40, abortController, () => {
+                                streamDestroyed = true
+                            })
+                            break
+                        }
+                    }
+                } else {
+                    reject(new Error('Text to speech is not selected. Please configure TTS in the chatflow.'))
+                }
+            } catch (error) {
+                reject(error)
+            }
+        }
+
+        // Handle abort signal
+        abortController.signal.addEventListener('abort', () => {
+            if (!streamDestroyed) {
+                reject(new Error('TTS generation aborted'))
+            }
+        })
+
+        processStream()
+    })
+}
+
+const processStreamWithRateLimit = async (
+    stream: Readable,
+    onChunk: (chunk: Buffer) => void,
+    onEnd: () => void,
+    resolve: () => void,
+    reject: (error: any) => void,
+    targetChunkSize: number = 640,
+    rateLimitMs: number = 20,
+    abortController: AbortController,
+    onStreamDestroy?: () => void
+) => {
+    const TARGET_CHUNK_SIZE = targetChunkSize
+    const RATE_LIMIT_MS = rateLimitMs
+
+    let buffer: Buffer = Buffer.alloc(0)
+    let isEnded = false
+
+    const processChunks = async () => {
+        while (!isEnded || buffer.length > 0) {
+            // Check if aborted
+            if (abortController.signal.aborted) {
+                if (!stream.destroyed) {
+                    stream.destroy()
+                }
+                onStreamDestroy?.()
+                reject(new Error('TTS generation aborted'))
+                return
+            }
+
+            if (buffer.length >= TARGET_CHUNK_SIZE) {
+                const chunk = buffer.subarray(0, TARGET_CHUNK_SIZE)
+                buffer = buffer.subarray(TARGET_CHUNK_SIZE)
+                onChunk(chunk)
+                await sleep(RATE_LIMIT_MS)
+            } else if (isEnded && buffer.length > 0) {
+                onChunk(buffer)
+                buffer = Buffer.alloc(0)
+            } else if (!isEnded) {
+                await sleep(RATE_LIMIT_MS)
+            } else {
+                break
+            }
+        }
+
+        onEnd()
+        resolve()
+    }
+
+    stream.on('data', (chunk) => {
+        if (!abortController.signal.aborted) {
+            buffer = Buffer.concat([buffer, Buffer.from(chunk)])
+        }
+    })
+
+    stream.on('end', () => {
+        isEnded = true
+    })
+
+    stream.on('error', (error) => {
+        reject(error)
+    })
+
+    // Handle abort signal
+    abortController.signal.addEventListener('abort', () => {
+        if (!stream.destroyed) {
+            stream.destroy()
+        }
+        onStreamDestroy?.()
+        reject(new Error('TTS generation aborted'))
+    })
+
+    processChunks().catch(reject)
+}
+
+const sleep = (ms: number): Promise<void> => {
+    return new Promise((resolve) => setTimeout(resolve, ms))
+}
+
+export const getVoices = async (provider: string, credentialId: string, options: ICommonObject) => {
+    const credentialData = await getCredentialData(credentialId ?? '', options)
+
+    switch (provider) {
+        case TextToSpeechType.OPENAI_TTS:
+            return [
+                { id: 'alloy', name: 'Alloy' },
+                { id: 'ash', name: 'Ash' },
+                { id: 'ballad', name: 'Ballad' },
+                { id: 'coral', name: 'Coral' },
+                { id: 'echo', name: 'Echo' },
+                { id: 'fable', name: 'Fable' },
+                { id: 'nova', name: 'Nova' },
+                { id: 'onyx', name: 'Onyx' },
+                { id: 'sage', name: 'Sage' },
+                { id: 'shimmer', name: 'Shimmer' }
+            ]
+
+        case TextToSpeechType.ELEVEN_LABS_TTS: {
+            const client = new ElevenLabsClient({
+                apiKey: credentialData.elevenLabsApiKey
+            })
+
+            const voices = await client.voices.search({
+                pageSize: 100,
+                voiceType: 'default',
+                category: 'premade'
+            })
+
+            return voices.voices.map((voice) => ({
+                id: voice.voiceId,
+                name: voice.name,
+                category: voice.category
+            }))
+        }
+
+        default:
+            throw new Error(`Unsupported TTS provider: ${provider}`)
+    }
+}
--- a/packages/server/src/Interface.ts
+++ b/packages/server/src/Interface.ts
@ -64,6 +64,7 @@ export interface IChatFlow {
    apikeyid?: string
    analytic?: string
    speechToText?: string
+    textToSpeech?: string
    chatbotConfig?: string
    followUpPrompts?: string
    apiConfig?: string
--- a/packages/server/src/controllers/text-to-speech/index.ts
+++ b/packages/server/src/controllers/text-to-speech/index.ts
@ -0,0 +1,226 @@
+import { Request, Response, NextFunction } from 'express'
+import chatflowsService from '../../services/chatflows'
+import textToSpeechService from '../../services/text-to-speech'
+import { InternalFlowiseError } from '../../errors/internalFlowiseError'
+import { StatusCodes } from 'http-status-codes'
+import { getRunningExpressApp } from '../../utils/getRunningExpressApp'
+import { convertTextToSpeechStream } from 'flowise-components'
+import { databaseEntities } from '../../utils'
+
+const generateTextToSpeech = async (req: Request, res: Response) => {
+    try {
+        const {
+            chatId,
+            chatflowId,
+            chatMessageId,
+            text,
+            provider: bodyProvider,
+            credentialId: bodyCredentialId,
+            voice: bodyVoice,
+            model: bodyModel
+        } = req.body
+
+        if (!text) {
+            throw new InternalFlowiseError(
+                StatusCodes.BAD_REQUEST,
+                `Error: textToSpeechController.generateTextToSpeech - text not provided!`
+            )
+        }
+
+        let provider: string, credentialId: string, voice: string, model: string
+
+        if (chatflowId) {
+            // Get TTS config from chatflow
+            const chatflow = await chatflowsService.getChatflowById(chatflowId)
+            const ttsConfig = JSON.parse(chatflow.textToSpeech)
+
+            // Find the provider with status: true
+            const activeProviderKey = Object.keys(ttsConfig).find((key) => ttsConfig[key].status === true)
+            if (!activeProviderKey) {
+                throw new InternalFlowiseError(
+                    StatusCodes.BAD_REQUEST,
+                    `Error: textToSpeechController.generateTextToSpeech - no active TTS provider configured in chatflow!`
+                )
+            }
+
+            const providerConfig = ttsConfig[activeProviderKey]
+            provider = activeProviderKey
+            credentialId = providerConfig.credentialId
+            voice = providerConfig.voice
+            model = providerConfig.model
+        } else {
+            // Use TTS config from request body
+            provider = bodyProvider
+            credentialId = bodyCredentialId
+            voice = bodyVoice
+            model = bodyModel
+        }
+
+        if (!provider) {
+            throw new InternalFlowiseError(
+                StatusCodes.BAD_REQUEST,
+                `Error: textToSpeechController.generateTextToSpeech - provider not provided!`
+            )
+        }
+
+        if (!credentialId) {
+            throw new InternalFlowiseError(
+                StatusCodes.BAD_REQUEST,
+                `Error: textToSpeechController.generateTextToSpeech - credentialId not provided!`
+            )
+        }
+
+        res.setHeader('Content-Type', 'text/event-stream')
+        res.setHeader('Cache-Control', 'no-cache')
+        res.setHeader('Connection', 'keep-alive')
+        res.setHeader('Access-Control-Allow-Origin', '*')
+        res.setHeader('Access-Control-Allow-Headers', 'Cache-Control')
+
+        const appServer = getRunningExpressApp()
+        const options = {
+            orgId: '',
+            chatflowid: chatflowId || '',
+            chatId: chatId || '',
+            appDataSource: appServer.AppDataSource,
+            databaseEntities: databaseEntities
+        }
+
+        const textToSpeechConfig = {
+            name: provider,
+            credentialId: credentialId,
+            voice: voice,
+            model: model
+        }
+
+        // Create and store AbortController
+        const abortController = new AbortController()
+        const ttsAbortId = `tts_${chatId}_${chatMessageId}`
+        appServer.abortControllerPool.add(ttsAbortId, abortController)
+
+        try {
+            await convertTextToSpeechStream(
+                text,
+                textToSpeechConfig,
+                options,
+                abortController,
+                (format: string) => {
+                    const startResponse = {
+                        event: 'tts_start',
+                        data: { chatMessageId, format }
+                    }
+                    res.write('event: tts_start\n')
+                    res.write(`data: ${JSON.stringify(startResponse)}\n\n`)
+                },
+                (chunk: Buffer) => {
+                    const audioBase64 = chunk.toString('base64')
+                    const clientResponse = {
+                        event: 'tts_data',
+                        data: { chatMessageId, audioChunk: audioBase64 }
+                    }
+                    res.write('event: tts_data\n')
+                    res.write(`data: ${JSON.stringify(clientResponse)}\n\n`)
+                },
+                async () => {
+                    const endResponse = {
+                        event: 'tts_end',
+                        data: { chatMessageId }
+                    }
+                    res.write('event: tts_end\n')
+                    res.write(`data: ${JSON.stringify(endResponse)}\n\n`)
+                    res.end()
+                    // Clean up from pool on successful completion
+                    appServer.abortControllerPool.remove(ttsAbortId)
+                }
+            )
+        } catch (error) {
+            // Clean up from pool on error
+            appServer.abortControllerPool.remove(ttsAbortId)
+            throw error
+        }
+    } catch (error) {
+        if (!res.headersSent) {
+            res.setHeader('Content-Type', 'text/event-stream')
+            res.setHeader('Cache-Control', 'no-cache')
+            res.setHeader('Connection', 'keep-alive')
+        }
+
+        const errorResponse = {
+            event: 'tts_error',
+            data: { error: error instanceof Error ? error.message : 'TTS generation failed' }
+        }
+        res.write('event: tts_error\n')
+        res.write(`data: ${JSON.stringify(errorResponse)}\n\n`)
+        res.end()
+    }
+}
+
+const abortTextToSpeech = async (req: Request, res: Response) => {
+    try {
+        const { chatId, chatMessageId, chatflowId } = req.body
+
+        if (!chatId) {
+            throw new InternalFlowiseError(
+                StatusCodes.BAD_REQUEST,
+                `Error: textToSpeechController.abortTextToSpeech - chatId not provided!`
+            )
+        }
+
+        if (!chatMessageId) {
+            throw new InternalFlowiseError(
+                StatusCodes.BAD_REQUEST,
+                `Error: textToSpeechController.abortTextToSpeech - chatMessageId not provided!`
+            )
+        }
+
+        if (!chatflowId) {
+            throw new InternalFlowiseError(
+                StatusCodes.BAD_REQUEST,
+                `Error: textToSpeechController.abortTextToSpeech - chatflowId not provided!`
+            )
+        }
+
+        const appServer = getRunningExpressApp()
+
+        // Abort the TTS generation using existing pool
+        const ttsAbortId = `tts_${chatId}_${chatMessageId}`
+        appServer.abortControllerPool.abort(ttsAbortId)
+
+        // Also abort the main chat flow AbortController for auto-TTS
+        const chatFlowAbortId = `${chatflowId}_${chatId}`
+        if (appServer.abortControllerPool.get(chatFlowAbortId)) {
+            appServer.abortControllerPool.abort(chatFlowAbortId)
+            appServer.sseStreamer.streamMetadataEvent(chatId, { chatId, chatMessageId })
+        }
+
+        // Send abort event to client
+        appServer.sseStreamer.streamTTSAbortEvent(chatId, chatMessageId)
+
+        res.json({ message: 'TTS stream aborted successfully', chatId, chatMessageId })
+    } catch (error) {
+        res.status(500).json({
+            error: error instanceof Error ? error.message : 'Failed to abort TTS stream'
+        })
+    }
+}
+
+const getVoices = async (req: Request, res: Response, next: NextFunction) => {
+    try {
+        const { provider, credentialId } = req.query
+
+        if (!provider) {
+            throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, `Error: textToSpeechController.getVoices - provider not provided!`)
+        }
+
+        const voices = await textToSpeechService.getVoices(provider as any, credentialId as string)
+
+        return res.json(voices)
+    } catch (error) {
+        next(error)
+    }
+}
+
+export default {
+    generateTextToSpeech,
+    abortTextToSpeech,
+    getVoices
+}
--- a/packages/server/src/database/entities/ChatFlow.ts
+++ b/packages/server/src/database/entities/ChatFlow.ts
@ -41,6 +41,9 @@ export class ChatFlow implements IChatFlow {
    @Column({ nullable: true, type: 'text' })
    speechToText?: string

+    @Column({ nullable: true, type: 'text' })
+    textToSpeech?: string
+
    @Column({ nullable: true, type: 'text' })
    followUpPrompts?: string

--- a/packages/server/src/database/migrations/mariadb/1754986457485-AddTextToSpeechToChatFlow.ts
+++ b/packages/server/src/database/migrations/mariadb/1754986457485-AddTextToSpeechToChatFlow.ts
@ -0,0 +1,12 @@
+import { MigrationInterface, QueryRunner } from 'typeorm'
+
+export class AddTextToSpeechToChatFlow1754986457485 implements MigrationInterface {
+    public async up(queryRunner: QueryRunner): Promise<void> {
+        const columnExists = await queryRunner.hasColumn('chat_flow', 'textToSpeech')
+        if (!columnExists) queryRunner.query(`ALTER TABLE \`chat_flow\` ADD COLUMN \`textToSpeech\` TEXT;`)
+    }
+
+    public async down(queryRunner: QueryRunner): Promise<void> {
+        await queryRunner.query(`ALTER TABLE \`chat_flow\` DROP COLUMN \`textToSpeech\`;`)
+    }
+}
--- a/packages/server/src/database/migrations/mariadb/index.ts
+++ b/packages/server/src/database/migrations/mariadb/index.ts
@ -36,6 +36,7 @@ import { AddExecutionEntity1738090872625 } from './1738090872625-AddExecutionEnt
 import { FixOpenSourceAssistantTable1743758056188 } from './1743758056188-FixOpenSourceAssistantTable'
 import { AddErrorToEvaluationRun1744964560174 } from './1744964560174-AddErrorToEvaluationRun'
 import { ModifyExecutionDataColumnType1747902489801 } from './1747902489801-ModifyExecutionDataColumnType'
+import { AddTextToSpeechToChatFlow1754986457485 } from './1754986457485-AddTextToSpeechToChatFlow'
 import { ModifyChatflowType1755066758601 } from './1755066758601-ModifyChatflowType'
 import { AddChatFlowNameIndex1755748356008 } from './1755748356008-AddChatFlowNameIndex'

@ -101,6 +102,7 @@ export const mariadbMigrations = [
    AddErrorToEvaluationRun1744964560174,
    ExecutionLinkWorkspaceId1746862866554,
    ModifyExecutionDataColumnType1747902489801,
+    AddTextToSpeechToChatFlow1754986457485,
    ModifyChatflowType1755066758601,
    AddChatFlowNameIndex1755748356008
 ]
--- a/packages/server/src/database/migrations/mysql/1754986468397-AddTextToSpeechToChatFlow.ts
+++ b/packages/server/src/database/migrations/mysql/1754986468397-AddTextToSpeechToChatFlow.ts
@ -0,0 +1,12 @@
+import { MigrationInterface, QueryRunner } from 'typeorm'
+
+export class AddTextToSpeechToChatFlow1754986468397 implements MigrationInterface {
+    public async up(queryRunner: QueryRunner): Promise<void> {
+        const columnExists = await queryRunner.hasColumn('chat_flow', 'textToSpeech')
+        if (!columnExists) queryRunner.query(`ALTER TABLE \`chat_flow\` ADD COLUMN \`textToSpeech\` TEXT;`)
+    }
+
+    public async down(queryRunner: QueryRunner): Promise<void> {
+        await queryRunner.query(`ALTER TABLE \`chat_flow\` DROP COLUMN \`textToSpeech\`;`)
+    }
+}
--- a/packages/server/src/database/migrations/mysql/index.ts
+++ b/packages/server/src/database/migrations/mysql/index.ts
@ -37,6 +37,7 @@ import { FixOpenSourceAssistantTable1743758056188 } from './1743758056188-FixOpe
 import { AddErrorToEvaluationRun1744964560174 } from './1744964560174-AddErrorToEvaluationRun'
 import { FixErrorsColumnInEvaluationRun1746437114935 } from './1746437114935-FixErrorsColumnInEvaluationRun'
 import { ModifyExecutionDataColumnType1747902489801 } from './1747902489801-ModifyExecutionDataColumnType'
+import { AddTextToSpeechToChatFlow1754986468397 } from './1754986468397-AddTextToSpeechToChatFlow'
 import { ModifyChatflowType1755066758601 } from './1755066758601-ModifyChatflowType'
 import { AddChatFlowNameIndex1755748356008 } from './1755748356008-AddChatFlowNameIndex'

@ -103,6 +104,7 @@ export const mysqlMigrations = [
    FixErrorsColumnInEvaluationRun1746437114935,
    ExecutionLinkWorkspaceId1746862866554,
    ModifyExecutionDataColumnType1747902489801,
+    AddTextToSpeechToChatFlow1754986468397,
    ModifyChatflowType1755066758601,
    AddChatFlowNameIndex1755748356008
 ]
--- a/packages/server/src/database/migrations/postgres/1754986480347-AddTextToSpeechToChatFlow.ts
+++ b/packages/server/src/database/migrations/postgres/1754986480347-AddTextToSpeechToChatFlow.ts
@ -0,0 +1,11 @@
+import { MigrationInterface, QueryRunner } from 'typeorm'
+
+export class AddTextToSpeechToChatFlow1754986480347 implements MigrationInterface {
+    public async up(queryRunner: QueryRunner): Promise<void> {
+        await queryRunner.query(`ALTER TABLE "chat_flow" ADD COLUMN IF NOT EXISTS "textToSpeech" TEXT;`)
+    }
+
+    public async down(queryRunner: QueryRunner): Promise<void> {
+        await queryRunner.query(`ALTER TABLE "chat_flow" DROP COLUMN "textToSpeech";`)
+    }
+}
--- a/packages/server/src/database/migrations/postgres/index.ts
+++ b/packages/server/src/database/migrations/postgres/index.ts
@ -36,6 +36,7 @@ import { AddExecutionEntity1738090872625 } from './1738090872625-AddExecutionEnt
 import { FixOpenSourceAssistantTable1743758056188 } from './1743758056188-FixOpenSourceAssistantTable'
 import { AddErrorToEvaluationRun1744964560174 } from './1744964560174-AddErrorToEvaluationRun'
 import { ModifyExecutionSessionIdFieldType1748450230238 } from './1748450230238-ModifyExecutionSessionIdFieldType'
+import { AddTextToSpeechToChatFlow1754986480347 } from './1754986480347-AddTextToSpeechToChatFlow'
 import { ModifyChatflowType1755066758601 } from './1755066758601-ModifyChatflowType'
 import { AddChatFlowNameIndex1755748356008 } from './1755748356008-AddChatFlowNameIndex'

@ -101,6 +102,7 @@ export const postgresMigrations = [
    AddErrorToEvaluationRun1744964560174,
    ExecutionLinkWorkspaceId1746862866554,
    ModifyExecutionSessionIdFieldType1748450230238,
+    AddTextToSpeechToChatFlow1754986480347,
    ModifyChatflowType1755066758601,
    AddChatFlowNameIndex1755748356008
 ]
--- a/packages/server/src/database/migrations/sqlite/1754986486669-AddTextToSpeechToChatFlow.ts
+++ b/packages/server/src/database/migrations/sqlite/1754986486669-AddTextToSpeechToChatFlow.ts
@ -0,0 +1,11 @@
+import { MigrationInterface, QueryRunner } from 'typeorm'
+
+export class AddTextToSpeechToChatFlow1754986486669 implements MigrationInterface {
+    public async up(queryRunner: QueryRunner): Promise<void> {
+        await queryRunner.query(`ALTER TABLE "chat_flow" ADD COLUMN "textToSpeech" TEXT;`)
+    }
+
+    public async down(queryRunner: QueryRunner): Promise<void> {
+        await queryRunner.query(`ALTER TABLE "chat_flow" DROP COLUMN "textToSpeech";`)
+    }
+}
--- a/packages/server/src/database/migrations/sqlite/index.ts
+++ b/packages/server/src/database/migrations/sqlite/index.ts
@ -34,6 +34,7 @@ import { AddSeqNoToDatasetRow1733752119696 } from './1733752119696-AddSeqNoToDat
 import { AddExecutionEntity1738090872625 } from './1738090872625-AddExecutionEntity'
 import { FixOpenSourceAssistantTable1743758056188 } from './1743758056188-FixOpenSourceAssistantTable'
 import { AddErrorToEvaluationRun1744964560174 } from './1744964560174-AddErrorToEvaluationRun'
+import { AddTextToSpeechToChatFlow1754986486669 } from './1754986486669-AddTextToSpeechToChatFlow'
 import { ModifyChatflowType1755066758601 } from './1755066758601-ModifyChatflowType'
 import { AddChatFlowNameIndex1755748356008 } from './1755748356008-AddChatFlowNameIndex'

@ -97,6 +98,7 @@ export const sqliteMigrations = [
    FixOpenSourceAssistantTable1743758056188,
    AddErrorToEvaluationRun1744964560174,
    ExecutionLinkWorkspaceId1746862866554,
+    AddTextToSpeechToChatFlow1754986486669,
    ModifyChatflowType1755066758601,
    AddChatFlowNameIndex1755748356008
 ]
--- a/packages/server/src/queue/RedisEventPublisher.ts
+++ b/packages/server/src/queue/RedisEventPublisher.ts
@ -380,6 +380,70 @@ export class RedisEventPublisher implements IServerSideEventStreamer {
        }
    }

+    streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void {
+        try {
+            this.redisPublisher.publish(
+                chatId,
+                JSON.stringify({
+                    chatId,
+                    chatMessageId,
+                    eventType: 'tts_start',
+                    data: { format }
+                })
+            )
+        } catch (error) {
+            console.error('Error streaming TTS start event:', error)
+        }
+    }
+
+    streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void {
+        try {
+            this.redisPublisher.publish(
+                chatId,
+                JSON.stringify({
+                    chatId,
+                    chatMessageId,
+                    eventType: 'tts_data',
+                    data: audioChunk
+                })
+            )
+        } catch (error) {
+            console.error('Error streaming TTS data event:', error)
+        }
+    }
+
+    streamTTSEndEvent(chatId: string, chatMessageId: string): void {
+        try {
+            this.redisPublisher.publish(
+                chatId,
+                JSON.stringify({
+                    chatId,
+                    chatMessageId,
+                    eventType: 'tts_end',
+                    data: {}
+                })
+            )
+        } catch (error) {
+            console.error('Error streaming TTS end event:', error)
+        }
+    }
+
+    streamTTSAbortEvent(chatId: string, chatMessageId: string): void {
+        try {
+            this.redisPublisher.publish(
+                chatId,
+                JSON.stringify({
+                    chatId,
+                    chatMessageId,
+                    eventType: 'tts_abort',
+                    data: {}
+                })
+            )
+        } catch (error) {
+            console.error('Error streaming TTS abort event:', error)
+        }
+    }
+
    async disconnect() {
        if (this.redisPublisher) {
            await this.redisPublisher.quit()
--- a/packages/server/src/queue/RedisEventSubscriber.ts
+++ b/packages/server/src/queue/RedisEventSubscriber.ts
@ -102,7 +102,7 @@ export class RedisEventSubscriber {
    private handleEvent(message: string) {
        // Parse the message from Redis
        const event = JSON.parse(message)
-        const { eventType, chatId, data } = event
+        const { eventType, chatId, chatMessageId, data } = event

        // Stream the event to the client
        switch (eventType) {
@ -121,6 +121,9 @@ export class RedisEventSubscriber {
            case 'usedTools':
                this.sseStreamer.streamUsedToolsEvent(chatId, data)
                break
+            case 'calledTools':
+                this.sseStreamer.streamCalledToolsEvent(chatId, data)
+                break
            case 'fileAnnotations':
                this.sseStreamer.streamFileAnnotationsEvent(chatId, data)
                break
@ -154,6 +157,21 @@ export class RedisEventSubscriber {
            case 'metadata':
                this.sseStreamer.streamMetadataEvent(chatId, data)
                break
+            case 'usageMetadata':
+                this.sseStreamer.streamUsageMetadataEvent(chatId, data)
+                break
+            case 'tts_start':
+                this.sseStreamer.streamTTSStartEvent(chatId, chatMessageId, data.format)
+                break
+            case 'tts_data':
+                this.sseStreamer.streamTTSDataEvent(chatId, chatMessageId, data)
+                break
+            case 'tts_end':
+                this.sseStreamer.streamTTSEndEvent(chatId, chatMessageId)
+                break
+            case 'tts_abort':
+                this.sseStreamer.streamTTSAbortEvent(chatId, chatMessageId)
+                break
        }
    }

--- a/packages/server/src/routes/index.ts
+++ b/packages/server/src/routes/index.ts
@ -55,6 +55,7 @@ import nvidiaNimRouter from './nvidia-nim'
 import executionsRouter from './executions'
 import validationRouter from './validation'
 import agentflowv2GeneratorRouter from './agentflowv2-generator'
+import textToSpeechRouter from './text-to-speech'

 import authRouter from '../enterprise/routes/auth'
 import auditRouter from '../enterprise/routes/audit'
@ -124,6 +125,7 @@ router.use('/nvidia-nim', nvidiaNimRouter)
 router.use('/executions', executionsRouter)
 router.use('/validation', validationRouter)
 router.use('/agentflowv2-generator', agentflowv2GeneratorRouter)
+router.use('/text-to-speech', textToSpeechRouter)

 router.use('/auth', authRouter)
 router.use('/audit', IdentityManager.checkFeatureByPlan('feat:login-activity'), auditRouter)
--- a/packages/server/src/routes/text-to-speech/index.ts
+++ b/packages/server/src/routes/text-to-speech/index.ts
@ -0,0 +1,12 @@
+import express from 'express'
+import textToSpeechController from '../../controllers/text-to-speech'
+
+const router = express.Router()
+
+router.post('/generate', textToSpeechController.generateTextToSpeech)
+
+router.post('/abort', textToSpeechController.abortTextToSpeech)
+
+router.get('/voices', textToSpeechController.getVoices)
+
+export default router
--- a/packages/server/src/services/chatflows/index.ts
+++ b/packages/server/src/services/chatflows/index.ts
@ -363,7 +363,18 @@ const getSinglePublicChatbotConfig = async (chatflowId: string): Promise<any> =>
        if (dbResponse.chatbotConfig || uploadsConfig) {
            try {
                const parsedConfig = dbResponse.chatbotConfig ? JSON.parse(dbResponse.chatbotConfig) : {}
-                return { ...parsedConfig, uploads: uploadsConfig, flowData: dbResponse.flowData }
+                const ttsConfig =
+                    typeof dbResponse.textToSpeech === 'string' ? JSON.parse(dbResponse.textToSpeech) : dbResponse.textToSpeech
+
+                let isTTSEnabled = false
+                if (ttsConfig) {
+                    Object.keys(ttsConfig).forEach((provider) => {
+                        if (provider !== 'none' && ttsConfig?.[provider]?.status) {
+                            isTTSEnabled = true
+                        }
+                    })
+                }
+                return { ...parsedConfig, uploads: uploadsConfig, flowData: dbResponse.flowData, isTTSEnabled }
            } catch (e) {
                throw new InternalFlowiseError(StatusCodes.INTERNAL_SERVER_ERROR, `Error parsing Chatbot Config for Chatflow ${chatflowId}`)
            }
--- a/packages/server/src/services/text-to-speech/index.ts
+++ b/packages/server/src/services/text-to-speech/index.ts
@ -0,0 +1,52 @@
+import { StatusCodes } from 'http-status-codes'
+import { getRunningExpressApp } from '../../utils/getRunningExpressApp'
+import { InternalFlowiseError } from '../../errors/internalFlowiseError'
+import { getErrorMessage } from '../../errors/utils'
+import { getVoices } from 'flowise-components'
+import { databaseEntities } from '../../utils'
+
+export enum TextToSpeechProvider {
+    OPENAI = 'openai',
+    ELEVEN_LABS = 'elevenlabs'
+}
+
+export interface TTSRequest {
+    text: string
+    provider: TextToSpeechProvider
+    credentialId: string
+    voice?: string
+    model?: string
+}
+
+export interface TTSResponse {
+    audioBuffer: Buffer
+    contentType: string
+}
+
+const getVoicesForProvider = async (provider: string, credentialId?: string): Promise<any[]> => {
+    try {
+        if (!credentialId) {
+            throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, 'Credential ID required for this provider')
+        }
+
+        const appServer = getRunningExpressApp()
+        const options = {
+            orgId: '',
+            chatflowid: '',
+            chatId: '',
+            appDataSource: appServer.AppDataSource,
+            databaseEntities: databaseEntities
+        }
+
+        return await getVoices(provider, credentialId, options)
+    } catch (error) {
+        throw new InternalFlowiseError(
+            StatusCodes.INTERNAL_SERVER_ERROR,
+            `Error: textToSpeechService.getVoices - ${getErrorMessage(error)}`
+        )
+    }
+}
+
+export default {
+    getVoices: getVoicesForProvider
+}
--- a/packages/server/src/utils/SSEStreamer.ts
+++ b/packages/server/src/utils/SSEStreamer.ts
@ -257,4 +257,50 @@ export class SSEStreamer implements IServerSideEventStreamer {
            client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
        }
    }
+
+    streamTTSStartEvent(chatId: string, chatMessageId: string, format: string): void {
+        const client = this.clients[chatId]
+        if (client) {
+            const clientResponse = {
+                event: 'tts_start',
+                data: { chatMessageId, format }
+            }
+            client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
+        }
+    }
+
+    streamTTSDataEvent(chatId: string, chatMessageId: string, audioChunk: string): void {
+        const client = this.clients[chatId]
+        if (client) {
+            const clientResponse = {
+                event: 'tts_data',
+                data: { chatMessageId, audioChunk }
+            }
+            client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
+        }
+    }
+
+    streamTTSEndEvent(chatId: string, chatMessageId: string): void {
+        const client = this.clients[chatId]
+        if (client) {
+            const clientResponse = {
+                event: 'tts_end',
+                data: { chatMessageId }
+            }
+            client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
+        }
+    }
+
+    streamTTSAbortEvent(chatId: string, chatMessageId: string): void {
+        const client = this.clients[chatId]
+        if (client) {
+            const clientResponse = {
+                event: 'tts_abort',
+                data: { chatMessageId }
+            }
+            client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n')
+            client.response.end()
+            delete this.clients[chatId]
+        }
+    }
 }
--- a/packages/server/src/utils/buildAgentflow.ts
+++ b/packages/server/src/utils/buildAgentflow.ts
@ -58,6 +58,7 @@ import { ChatMessage } from '../database/entities/ChatMessage'
 import { Telemetry } from './telemetry'
 import { getWorkspaceSearchOptions } from '../enterprise/utils/ControllerServiceUtils'
 import { UsageCacheManager } from '../UsageCacheManager'
+import { generateTTSForResponseStream, shouldAutoPlayTTS } from './buildChatflow'

 interface IWaitingNode {
    nodeId: string
@ -2208,5 +2209,27 @@ export const executeAgentFlow = async ({

    if (sessionId) result.sessionId = sessionId

+    if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
+        const options = {
+            orgId,
+            chatflowid,
+            chatId,
+            appDataSource,
+            databaseEntities
+        }
+
+        if (sseStreamer) {
+            await generateTTSForResponseStream(
+                result.text,
+                chatflow.textToSpeech,
+                options,
+                chatId,
+                chatMessage?.id,
+                sseStreamer,
+                abortController
+            )
+        }
+    }
+
    return result
 }
--- a/packages/server/src/utils/buildChatflow.ts
+++ b/packages/server/src/utils/buildChatflow.ts
@ -6,6 +6,7 @@ import { omit } from 'lodash'
 import {
    IFileUpload,
    convertSpeechToText,
+    convertTextToSpeechStream,
    ICommonObject,
    addSingleFileToStorage,
    generateFollowUpPrompts,
@ -16,7 +17,8 @@ import {
    getFileFromUpload,
    removeSpecificFileFromUpload,
    EvaluationRunner,
-    handleEscapeCharacters
+    handleEscapeCharacters,
+    IServerSideEventStreamer
 } from 'flowise-components'
 import { StatusCodes } from 'http-status-codes'
 import {
@ -70,9 +72,74 @@ import { executeAgentFlow } from './buildAgentflow'
 import { Workspace } from '../enterprise/database/entities/workspace.entity'
 import { Organization } from '../enterprise/database/entities/organization.entity'

-/*
- * Initialize the ending node to be executed
- */
+const shouldAutoPlayTTS = (textToSpeechConfig: string | undefined | null): boolean => {
+    if (!textToSpeechConfig) return false
+    try {
+        const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
+        for (const providerKey in config) {
+            const provider = config[providerKey]
+            if (provider && provider.status === true && provider.autoPlay === true) {
+                return true
+            }
+        }
+        return false
+    } catch (error) {
+        logger.error(`Error parsing textToSpeechConfig: ${getErrorMessage(error)}`)
+        return false
+    }
+}
+
+const generateTTSForResponseStream = async (
+    responseText: string,
+    textToSpeechConfig: string | undefined,
+    options: ICommonObject,
+    chatId: string,
+    chatMessageId: string,
+    sseStreamer: IServerSideEventStreamer,
+    abortController?: AbortController
+): Promise<void> => {
+    try {
+        if (!textToSpeechConfig) return
+        const config = typeof textToSpeechConfig === 'string' ? JSON.parse(textToSpeechConfig) : textToSpeechConfig
+
+        let activeProviderConfig = null
+        for (const providerKey in config) {
+            const provider = config[providerKey]
+            if (provider && provider.status === true) {
+                activeProviderConfig = {
+                    name: providerKey,
+                    credentialId: provider.credentialId,
+                    voice: provider.voice,
+                    model: provider.model
+                }
+                break
+            }
+        }
+
+        if (!activeProviderConfig) return
+
+        await convertTextToSpeechStream(
+            responseText,
+            activeProviderConfig,
+            options,
+            abortController || new AbortController(),
+            (format: string) => {
+                sseStreamer.streamTTSStartEvent(chatId, chatMessageId, format)
+            },
+            (chunk: Buffer) => {
+                const audioBase64 = chunk.toString('base64')
+                sseStreamer.streamTTSDataEvent(chatId, chatMessageId, audioBase64)
+            },
+            () => {
+                sseStreamer.streamTTSEndEvent(chatId, chatMessageId)
+            }
+        )
+    } catch (error) {
+        logger.error(`[server]: TTS streaming failed: ${getErrorMessage(error)}`)
+        sseStreamer.streamTTSEndEvent(chatId, chatMessageId)
+    }
+}
+
 const initEndingNode = async ({
    endingNodeIds,
    componentNodes,
@ -833,6 +900,17 @@ export const executeFlow = async ({
        if (memoryType) result.memoryType = memoryType
        if (Object.keys(setVariableNodesOutput).length) result.flowVariables = setVariableNodesOutput

+        if (shouldAutoPlayTTS(chatflow.textToSpeech) && result.text) {
+            const options = {
+                orgId,
+                chatflowid,
+                chatId,
+                appDataSource,
+                databaseEntities
+            }
+            await generateTTSForResponseStream(result.text, chatflow.textToSpeech, options, chatId, chatMessage?.id, sseStreamer, signal)
+        }
+
        return result
    }
 }
@ -1064,3 +1142,5 @@ const incrementFailedMetricCounter = (metricsProvider: IMetricsProvider, isInter
        )
    }
 }
+
+export { shouldAutoPlayTTS, generateTTSForResponseStream }
--- a/packages/server/src/utils/constants.ts
+++ b/packages/server/src/utils/constants.ts
@ -41,6 +41,8 @@ export const WHITELIST_URLS = [
    '/api/v1/user/test',
    '/api/v1/oauth2-credential/callback',
    '/api/v1/oauth2-credential/refresh',
+    '/api/v1/text-to-speech/generate',
+    '/api/v1/text-to-speech/abort',
    AzureSSO.LOGIN_URI,
    AzureSSO.LOGOUT_URI,
    AzureSSO.CALLBACK_URI,
--- a/packages/ui/src/api/tts.js
+++ b/packages/ui/src/api/tts.js
@ -0,0 +1,16 @@
+import client from './client'
+
+const abortTTS = (body) => client.post('/text-to-speech/abort', body)
+
+const generateVoice = (body) =>
+    client.post('/text-to-speech/generate', body, {
+        responseType: 'arraybuffer'
+    })
+
+const listVoices = (params) => client.get('/text-to-speech/voices', { params })
+
+export default {
+    abortTTS,
+    generateVoice,
+    listVoices
+}
--- a/packages/ui/src/assets/images/elevenlabs.svg
+++ b/packages/ui/src/assets/images/elevenlabs.svg
@ -0,0 +1,7 @@
+<svg width="24" height="24" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg">
+  <rect width="24" height="24" rx="4" fill="#000000"/>
+  <path d="M6 8h3v8H6V8zm5-2h3v12h-3V6zm5 4h3v4h-3v-4z" fill="#ffffff"/>
+  <circle cx="7.5" cy="12" r="1" fill="#00ff88"/>
+  <circle cx="12.5" cy="12" r="1" fill="#00ff88"/>
+  <circle cx="17.5" cy="12" r="1" fill="#00ff88"/>
+</svg>
--- a/packages/ui/src/ui-component/dialog/ChatflowConfigurationDialog.jsx
+++ b/packages/ui/src/ui-component/dialog/ChatflowConfigurationDialog.jsx
@ -4,6 +4,7 @@ import { createPortal } from 'react-dom'
 import { Box, Dialog, DialogContent, DialogTitle, Tabs, Tab } from '@mui/material'
 import { tabsClasses } from '@mui/material/Tabs'
 import SpeechToText from '@/ui-component/extended/SpeechToText'
+import TextToSpeech from '@/ui-component/extended/TextToSpeech'
 import Security from '@/ui-component/extended/Security'
 import ChatFeedback from '@/ui-component/extended/ChatFeedback'
 import AnalyseFlow from '@/ui-component/extended/AnalyseFlow'
@ -30,6 +31,10 @@ const CHATFLOW_CONFIGURATION_TABS = [
        label: 'Speech to Text',
        id: 'speechToText'
    },
+    {
+        label: 'Text to Speech',
+        id: 'textToSpeech'
+    },
    {
        label: 'Chat Feedback',
        id: 'chatFeedback'
@ -125,18 +130,19 @@ const ChatflowConfigurationDialog = ({ show, isAgentCanvas, dialogProps, onCance
                                alignItems: 'center',
                                mb: 1
                            }}
-                            key={index}
+                            key={item.id}
                            label={item.label}
                            {...a11yProps(index)}
                        ></Tab>
                    ))}
                </Tabs>
                {filteredTabs.map((item, index) => (
-                    <TabPanel key={index} value={tabValue} index={index}>
+                    <TabPanel key={item.id} value={tabValue} index={index}>
                        {item.id === 'security' && <Security dialogProps={dialogProps} />}
                        {item.id === 'conversationStarters' ? <StarterPrompts dialogProps={dialogProps} /> : null}
                        {item.id === 'followUpPrompts' ? <FollowUpPrompts dialogProps={dialogProps} /> : null}
                        {item.id === 'speechToText' ? <SpeechToText dialogProps={dialogProps} /> : null}
+                        {item.id === 'textToSpeech' ? <TextToSpeech dialogProps={dialogProps} /> : null}
                        {item.id === 'chatFeedback' ? <ChatFeedback dialogProps={dialogProps} /> : null}
                        {item.id === 'analyseChatflow' ? <AnalyseFlow dialogProps={dialogProps} /> : null}
                        {item.id === 'leads' ? <Leads dialogProps={dialogProps} /> : null}
--- a/packages/ui/src/ui-component/extended/AudioWaveform.jsx
+++ b/packages/ui/src/ui-component/extended/AudioWaveform.jsx
@ -0,0 +1,311 @@
+import { useRef, useEffect, useState, useCallback } from 'react'
+import PropTypes from 'prop-types'
+import { Box, IconButton, CircularProgress } from '@mui/material'
+import { IconPlayerPlay, IconPlayerPause } from '@tabler/icons-react'
+import { useTheme } from '@mui/material/styles'
+
+const AudioWaveform = ({
+    audioSrc,
+    onPlay,
+    onPause,
+    onEnded,
+    isPlaying = false,
+    duration: _duration = 0,
+    isGenerating = false,
+    disabled = false,
+    externalAudioRef = null,
+    resetProgress = false
+}) => {
+    const canvasRef = useRef(null)
+    const audioRef = useRef(null)
+    const animationRef = useRef(null)
+    const theme = useTheme()
+
+    const [progress, setProgress] = useState(0)
+    const [_audioBuffer, setAudioBuffer] = useState(null)
+    const [waveformData, setWaveformData] = useState([])
+
+    // Generate waveform visualization data
+    const generateWaveform = useCallback((buffer) => {
+        if (!buffer) return []
+
+        const rawData = buffer.getChannelData(0)
+        const samples = 200 // More bars for smoother appearance like reference
+        const blockSize = Math.floor(rawData.length / samples)
+        const filteredData = []
+
+        for (let i = 0; i < samples; i++) {
+            let blockStart = blockSize * i
+            let sum = 0
+            for (let j = 0; j < blockSize; j++) {
+                sum += Math.abs(rawData[blockStart + j])
+            }
+            filteredData.push(sum / blockSize)
+        }
+
+        // Normalize the data
+        const maxValue = Math.max(...filteredData)
+        return filteredData.map((value) => (value / maxValue) * 100)
+    }, [])
+
+    // Generate realistic placeholder waveform like in reference
+    const generatePlaceholderWaveform = useCallback(() => {
+        const samples = 200
+        const waveform = []
+
+        for (let i = 0; i < samples; i++) {
+            // Create a more realistic waveform pattern
+            const position = i / samples
+            const baseHeight = 20 + Math.sin(position * Math.PI * 4) * 15
+            const variation = Math.random() * 40 + 10
+            const envelope = Math.sin(position * Math.PI) * 0.8 + 0.2
+
+            waveform.push((baseHeight + variation) * envelope)
+        }
+
+        return waveform
+    }, [])
+
+    // Draw waveform on canvas
+    const drawWaveform = useCallback(() => {
+        const canvas = canvasRef.current
+        if (!canvas || waveformData.length === 0) return
+
+        const ctx = canvas.getContext('2d')
+
+        // Handle high DPI displays for crisp rendering
+        const dpr = window.devicePixelRatio || 1
+        const rect = canvas.getBoundingClientRect()
+
+        canvas.width = rect.width * dpr
+        canvas.height = rect.height * dpr
+        ctx.scale(dpr, dpr)
+
+        canvas.style.width = rect.width + 'px'
+        canvas.style.height = rect.height + 'px'
+
+        ctx.clearRect(0, 0, rect.width, rect.height)
+
+        // More bars for smoother appearance like the reference
+        const totalBars = waveformData.length
+        const barWidth = 2 // Fixed thin bar width like in reference
+        const barSpacing = 1 // Small gap between bars
+        const totalWidth = rect.width
+        const startX = (totalWidth - totalBars * (barWidth + barSpacing)) / 2
+        const centerY = rect.height / 2
+
+        waveformData.forEach((value, index) => {
+            const barHeight = Math.max(2, (value / 100) * (rect.height * 0.8))
+            const x = startX + index * (barWidth + barSpacing)
+
+            // Determine color based on playback progress
+            const progressIndex = Math.floor((progress / 100) * waveformData.length)
+            const isPlayed = index <= progressIndex
+
+            ctx.fillStyle = isPlayed ? theme.palette.primary.main : theme.palette.mode === 'dark' ? '#444' : '#ccc'
+
+            // Draw thin vertical bars like in reference
+            ctx.fillRect(x, centerY - barHeight / 2, barWidth, barHeight)
+        })
+    }, [waveformData, progress, theme])
+
+    // Load and decode audio for waveform generation
+    useEffect(() => {
+        if (audioSrc && audioSrc.startsWith('blob:')) {
+            const loadAudioBuffer = async () => {
+                try {
+                    const response = await fetch(audioSrc)
+                    const arrayBuffer = await response.arrayBuffer()
+                    const audioContext = new (window.AudioContext || window.webkitAudioContext)()
+                    const buffer = await audioContext.decodeAudioData(arrayBuffer)
+                    setAudioBuffer(buffer)
+                    const waveform = generateWaveform(buffer)
+                    setWaveformData(waveform)
+                } catch (error) {
+                    console.error('Error loading audio buffer:', error)
+                    // Generate placeholder waveform
+                    const placeholder = generatePlaceholderWaveform()
+                    setWaveformData(placeholder)
+                }
+            }
+            loadAudioBuffer()
+        } else {
+            // Always show placeholder waveform when no audio source
+            const placeholder = generatePlaceholderWaveform()
+            setWaveformData(placeholder)
+        }
+    }, [audioSrc, generateWaveform, generatePlaceholderWaveform])
+
+    // Reset progress when resetProgress prop is true
+    useEffect(() => {
+        if (resetProgress) {
+            setProgress(0)
+        }
+    }, [resetProgress])
+
+    // Draw waveform when data changes or progress updates
+    useEffect(() => {
+        drawWaveform()
+    }, [drawWaveform, progress])
+
+    // Update progress during playback
+    useEffect(() => {
+        const activeAudioRef = externalAudioRef || audioRef.current
+        if (isPlaying && activeAudioRef && audioSrc) {
+            const updateProgress = () => {
+                const audio = externalAudioRef || audioRef.current
+                if (audio && audio.duration && !isNaN(audio.duration)) {
+                    const currentProgress = (audio.currentTime / audio.duration) * 100
+                    setProgress(currentProgress)
+                }
+                if (isPlaying && audio && !audio.paused) {
+                    animationRef.current = requestAnimationFrame(updateProgress)
+                }
+            }
+
+            // Start the update loop
+            animationRef.current = requestAnimationFrame(updateProgress)
+        } else {
+            if (animationRef.current) {
+                cancelAnimationFrame(animationRef.current)
+            }
+        }
+
+        return () => {
+            if (animationRef.current) {
+                cancelAnimationFrame(animationRef.current)
+            }
+        }
+    }, [isPlaying, audioSrc, externalAudioRef])
+
+    const handlePlayPause = () => {
+        if (isPlaying) {
+            onPause?.()
+        } else {
+            onPlay?.()
+        }
+    }
+
+    // Handle canvas click for seeking
+    const handleCanvasClick = (event) => {
+        const activeAudio = externalAudioRef || audioRef.current
+        if (!activeAudio || !activeAudio.duration || disabled || isGenerating) return
+
+        const canvas = canvasRef.current
+        const rect = canvas.getBoundingClientRect()
+        const clickX = event.clientX - rect.left
+
+        // Use the actual canvas display width for more accurate clicking
+        const clickProgress = Math.max(0, Math.min(100, (clickX / rect.width) * 100))
+        const seekTime = (clickProgress / 100) * activeAudio.duration
+
+        activeAudio.currentTime = seekTime
+        setProgress(clickProgress)
+    }
+
+    return (
+        <Box sx={{ width: '100%' }}>
+            {/* Hidden audio element for duration and seeking - only if no external ref */}
+            {audioSrc && !externalAudioRef && (
+                <audio
+                    ref={audioRef}
+                    src={audioSrc}
+                    onLoadedMetadata={() => {
+                        if (audioRef.current) {
+                            setProgress(0)
+                        }
+                    }}
+                    onTimeUpdate={() => {
+                        // Additional progress update on timeupdate event
+                        const audio = audioRef.current
+                        if (audio && audio.duration && !isNaN(audio.duration)) {
+                            const currentProgress = (audio.currentTime / audio.duration) * 100
+                            setProgress(currentProgress)
+                        }
+                    }}
+                    onEnded={() => {
+                        setProgress(0)
+                        onEnded?.()
+                    }}
+                    style={{ display: 'none' }}
+                >
+                    <track kind='captions' />
+                </audio>
+            )}
+
+            {/* Play button and Waveform side by side */}
+            <Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
+                {/* Play/Pause Button */}
+                <IconButton
+                    onClick={handlePlayPause}
+                    disabled={disabled || isGenerating}
+                    size='small'
+                    sx={{
+                        width: 32,
+                        height: 32,
+                        flexShrink: 0,
+                        backgroundColor: isPlaying ? 'transparent' : theme.palette.primary.main,
+                        color: isPlaying ? theme.palette.primary.main : 'white',
+                        border: isPlaying ? `1px solid ${theme.palette.primary.main}` : 'none',
+                        '&:hover': {
+                            backgroundColor: isPlaying ? theme.palette.primary.main : theme.palette.primary.dark,
+                            color: 'white'
+                        },
+                        '&:disabled': {
+                            backgroundColor: theme.palette.action.disabled,
+                            color: theme.palette.action.disabled,
+                            border: 'none'
+                        }
+                    }}
+                >
+                    {isGenerating ? (
+                        <CircularProgress size={16} />
+                    ) : isPlaying ? (
+                        <IconPlayerPause size={16} />
+                    ) : (
+                        <IconPlayerPlay size={16} />
+                    )}
+                </IconButton>
+
+                {/* Waveform Canvas */}
+                <Box
+                    sx={{
+                        flex: 1,
+                        cursor: !disabled && !isGenerating && audioSrc ? 'pointer' : 'default',
+                        display: 'flex',
+                        alignItems: 'center'
+                    }}
+                >
+                    <canvas
+                        ref={canvasRef}
+                        width={400}
+                        height={32}
+                        onClick={handleCanvasClick}
+                        style={{
+                            width: '100%',
+                            height: '32px',
+                            backgroundColor: 'transparent',
+                            opacity: disabled ? 0.6 : 1,
+                            display: 'block'
+                        }}
+                    />
+                </Box>
+            </Box>
+        </Box>
+    )
+}
+
+AudioWaveform.propTypes = {
+    audioSrc: PropTypes.string,
+    onPlay: PropTypes.func,
+    onPause: PropTypes.func,
+    onEnded: PropTypes.func,
+    isPlaying: PropTypes.bool,
+    duration: PropTypes.number,
+    isGenerating: PropTypes.bool,
+    disabled: PropTypes.bool,
+    externalAudioRef: PropTypes.object,
+    resetProgress: PropTypes.bool
+}
+
+export default AudioWaveform
--- a/packages/ui/src/ui-component/extended/SpeechToText.jsx
+++ b/packages/ui/src/ui-component/extended/SpeechToText.jsx
@ -402,7 +402,15 @@ const SpeechToText = ({ dialogProps }) => {
                            sx={{ ml: 1 }}
                            primary={speechToTextProviders[selectedProvider].label}
                            secondary={
-                                <a target='_blank' rel='noreferrer' href={speechToTextProviders[selectedProvider].url}>
+                                <a
+                                    target='_blank'
+                                    rel='noreferrer'
+                                    href={speechToTextProviders[selectedProvider].url}
+                                    style={{
+                                        color: theme?.customization?.isDarkMode ? '#90caf9' : '#1976d2',
+                                        textDecoration: 'underline'
+                                    }}
+                                >
                                    {speechToTextProviders[selectedProvider].url}
                                </a>
                            }
--- a/packages/ui/src/ui-component/extended/TextToSpeech.jsx
+++ b/packages/ui/src/ui-component/extended/TextToSpeech.jsx
@ -0,0 +1,660 @@
+import { useDispatch } from 'react-redux'
+import { useState, useEffect } from 'react'
+import PropTypes from 'prop-types'
+import { enqueueSnackbar as enqueueSnackbarAction, closeSnackbar as closeSnackbarAction, SET_CHATFLOW } from '@/store/actions'
+
+// material-ui
+import {
+    Typography,
+    Box,
+    Button,
+    FormControl,
+    ListItem,
+    ListItemAvatar,
+    ListItemText,
+    MenuItem,
+    Select,
+    CircularProgress,
+    Autocomplete,
+    TextField
+} from '@mui/material'
+import { IconX, IconVolume } from '@tabler/icons-react'
+import { useTheme } from '@mui/material/styles'
+
+// Project import
+import CredentialInputHandler from '@/views/canvas/CredentialInputHandler'
+import { TooltipWithParser } from '@/ui-component/tooltip/TooltipWithParser'
+import { SwitchInput } from '@/ui-component/switch/Switch'
+import { Input } from '@/ui-component/input/Input'
+import { StyledButton } from '@/ui-component/button/StyledButton'
+import { Dropdown } from '@/ui-component/dropdown/Dropdown'
+import AudioWaveform from '@/ui-component/extended/AudioWaveform'
+import openAISVG from '@/assets/images/openai.svg'
+import elevenLabsSVG from '@/assets/images/elevenlabs.svg'
+
+// store
+import useNotifier from '@/utils/useNotifier'
+
+// API
+import chatflowsApi from '@/api/chatflows'
+import ttsApi from '@/api/tts'
+
+const TextToSpeechType = {
+    OPENAI_TTS: 'openai',
+    ELEVEN_LABS_TTS: 'elevenlabs'
+}
+
+// Weird quirk - the key must match the name property value.
+const textToSpeechProviders = {
+    [TextToSpeechType.OPENAI_TTS]: {
+        label: 'OpenAI TTS',
+        name: TextToSpeechType.OPENAI_TTS,
+        icon: openAISVG,
+        url: 'https://platform.openai.com/docs/guides/text-to-speech',
+        inputs: [
+            {
+                label: 'Connect Credential',
+                name: 'credential',
+                type: 'credential',
+                credentialNames: ['openAIApi']
+            },
+            {
+                label: 'Voice',
+                name: 'voice',
+                type: 'voice_select',
+                description: 'The voice to use when generating the audio',
+                default: 'alloy',
+                optional: true
+            }
+        ]
+    },
+    [TextToSpeechType.ELEVEN_LABS_TTS]: {
+        label: 'Eleven Labs TTS',
+        name: TextToSpeechType.ELEVEN_LABS_TTS,
+        icon: elevenLabsSVG,
+        url: 'https://elevenlabs.io/',
+        inputs: [
+            {
+                label: 'Connect Credential',
+                name: 'credential',
+                type: 'credential',
+                credentialNames: ['elevenLabsApi']
+            },
+            {
+                label: 'Voice',
+                name: 'voice',
+                type: 'voice_select',
+                description: 'The voice to use for text-to-speech',
+                default: '21m00Tcm4TlvDq8ikWAM',
+                optional: true
+            }
+        ]
+    }
+}
+
+const TextToSpeech = ({ dialogProps }) => {
+    const dispatch = useDispatch()
+
+    useNotifier()
+    const theme = useTheme()
+
+    const enqueueSnackbar = (...args) => dispatch(enqueueSnackbarAction(...args))
+    const closeSnackbar = (...args) => dispatch(closeSnackbarAction(...args))
+
+    const [textToSpeech, setTextToSpeech] = useState(null)
+    const [selectedProvider, setSelectedProvider] = useState('none')
+    const [voices, setVoices] = useState([])
+    const [loadingVoices, setLoadingVoices] = useState(false)
+    const [testAudioSrc, setTestAudioSrc] = useState(null)
+    const [isTestPlaying, setIsTestPlaying] = useState(false)
+    const [testAudioRef, setTestAudioRef] = useState(null)
+    const [isGeneratingTest, setIsGeneratingTest] = useState(false)
+    const [resetWaveform, setResetWaveform] = useState(false)
+
+    const resetTestAudio = () => {
+        if (testAudioSrc) {
+            URL.revokeObjectURL(testAudioSrc)
+            setTestAudioSrc(null)
+        }
+        setIsTestPlaying(false)
+        setResetWaveform(true)
+        setTimeout(() => setResetWaveform(false), 100)
+    }
+
+    const onSave = async () => {
+        const textToSpeechConfig = setValue(true, selectedProvider, 'status')
+        try {
+            const saveResp = await chatflowsApi.updateChatflow(dialogProps.chatflow.id, {
+                textToSpeech: JSON.stringify(textToSpeechConfig)
+            })
+            if (saveResp.data) {
+                enqueueSnackbar({
+                    message: 'Text To Speech Configuration Saved',
+                    options: {
+                        key: Date.now() + Math.random(),
+                        variant: 'success',
+                        action: (key) => (
+                            <Button style={{ color: 'white' }} onClick={() => closeSnackbar(key)}>
+                                <IconX />
+                            </Button>
+                        )
+                    }
+                })
+                dispatch({ type: SET_CHATFLOW, chatflow: saveResp.data })
+            }
+        } catch (error) {
+            enqueueSnackbar({
+                message: `Failed to save Text To Speech Configuration: ${
+                    typeof error.response.data === 'object' ? error.response.data.message : error.response.data
+                }`,
+                options: {
+                    key: Date.now() + Math.random(),
+                    variant: 'error',
+                    persist: true,
+                    action: (key) => (
+                        <Button style={{ color: 'white' }} onClick={() => closeSnackbar(key)}>
+                            <IconX />
+                        </Button>
+                    )
+                }
+            })
+        }
+    }
+
+    const setValue = (value, providerName, inputParamName) => {
+        let newVal = {}
+        if (!textToSpeech || !Object.hasOwn(textToSpeech, providerName)) {
+            newVal = { ...(textToSpeech || {}), [providerName]: {} }
+        } else {
+            newVal = { ...textToSpeech }
+        }
+
+        newVal[providerName][inputParamName] = value
+        if (inputParamName === 'status' && value === true) {
+            // ensure that the others are turned off
+            Object.keys(textToSpeechProviders).forEach((key) => {
+                const provider = textToSpeechProviders[key]
+                if (provider.name !== providerName) {
+                    newVal[provider.name] = { ...(textToSpeech?.[provider.name] || {}), status: false }
+                }
+            })
+            if (providerName !== 'none' && newVal['none']) {
+                newVal['none'].status = false
+            }
+        }
+
+        // Reset test audio when voice or credential is changed
+        if ((inputParamName === 'voice' || inputParamName === 'credentialId') && providerName === selectedProvider) {
+            resetTestAudio()
+        }
+
+        setTextToSpeech(newVal)
+        return newVal
+    }
+
+    const handleProviderChange = (provider, configOverride = null) => {
+        setSelectedProvider(provider)
+        setVoices([])
+        resetTestAudio()
+
+        if (provider !== 'none') {
+            const config = configOverride || textToSpeech
+            const credentialId = config?.[provider]?.credentialId
+            if (credentialId) {
+                loadVoicesForProvider(provider, credentialId)
+            }
+        }
+    }
+
+    const loadVoicesForProvider = async (provider, credentialId) => {
+        if (provider === 'none' || !credentialId) return
+
+        setLoadingVoices(true)
+        try {
+            const params = new URLSearchParams({ provider })
+            params.append('credentialId', credentialId)
+
+            const response = await ttsApi.listVoices(params)
+
+            if (response.data) {
+                const voicesData = await response.data
+                setVoices(voicesData)
+            } else {
+                setVoices([])
+            }
+        } catch (error) {
+            console.error('Error loading voices:', error)
+            setVoices([])
+        } finally {
+            setLoadingVoices(false)
+        }
+    }
+
+    const testTTS = async () => {
+        if (selectedProvider === 'none' || !textToSpeech?.[selectedProvider]?.credentialId) {
+            enqueueSnackbar({
+                message: 'Please select a provider and configure credentials first',
+                options: { variant: 'warning' }
+            })
+            return
+        }
+
+        setIsGeneratingTest(true)
+
+        try {
+            const providerConfig = textToSpeech?.[selectedProvider] || {}
+            const body = {
+                text: 'Today is a wonderful day to build something with Flowise!',
+                provider: selectedProvider,
+                credentialId: providerConfig.credentialId,
+                voice: providerConfig.voice,
+                model: providerConfig.model
+            }
+
+            const response = await fetch('/api/v1/text-to-speech/generate', {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                    'x-request-from': 'internal'
+                },
+                credentials: 'include',
+                body: JSON.stringify(body)
+            })
+
+            if (!response.ok) {
+                throw new Error(`HTTP error! status: ${response.status}`)
+            }
+
+            const audioChunks = []
+            const reader = response.body.getReader()
+            let buffer = ''
+
+            let done = false
+            while (!done) {
+                const result = await reader.read()
+                done = result.done
+                if (done) break
+
+                const chunk = new TextDecoder().decode(result.value, { stream: true })
+                buffer += chunk
+                const lines = buffer.split('\n\n')
+                buffer = lines.pop() || ''
+
+                for (const eventBlock of lines) {
+                    if (eventBlock.trim()) {
+                        const event = parseSSEEvent(eventBlock)
+                        if (event && event.event === 'tts_data' && event.data?.audioChunk) {
+                            const audioBuffer = Uint8Array.from(atob(event.data.audioChunk), (c) => c.charCodeAt(0))
+                            audioChunks.push(audioBuffer)
+                        }
+                    }
+                }
+            }
+
+            if (audioChunks.length > 0) {
+                // Combine all chunks into a single blob
+                const totalLength = audioChunks.reduce((sum, chunk) => sum + chunk.length, 0)
+                const combinedBuffer = new Uint8Array(totalLength)
+                let offset = 0
+
+                for (const chunk of audioChunks) {
+                    combinedBuffer.set(chunk, offset)
+                    offset += chunk.length
+                }
+
+                const audioBlob = new Blob([combinedBuffer], { type: 'audio/mpeg' })
+                const audioUrl = URL.createObjectURL(audioBlob)
+
+                // Clean up previous audio
+                if (testAudioSrc) {
+                    URL.revokeObjectURL(testAudioSrc)
+                }
+
+                setTestAudioSrc(audioUrl)
+            } else {
+                throw new Error('No audio data received')
+            }
+        } catch (error) {
+            console.error('Error testing TTS:', error)
+            enqueueSnackbar({
+                message: `TTS test failed: ${error.message}`,
+                options: { variant: 'error' }
+            })
+        } finally {
+            setIsGeneratingTest(false)
+        }
+    }
+
+    const parseSSEEvent = (eventBlock) => {
+        const lines = eventBlock.trim().split('\n')
+        const event = { event: null, data: null }
+
+        for (const line of lines) {
+            if (line.startsWith('event:')) {
+                event.event = line.substring(6).trim()
+            } else if (line.startsWith('data:')) {
+                const dataStr = line.substring(5).trim()
+                try {
+                    const parsed = JSON.parse(dataStr)
+                    if (parsed.data) {
+                        event.data = parsed.data
+                    }
+                } catch (e) {
+                    console.error('Error parsing SSE data:', e)
+                }
+            }
+        }
+        return event.event ? event : null
+    }
+
+    // Audio control functions for waveform component
+    const handleTestPlay = async () => {
+        // If audio already exists, just play it
+        if (testAudioRef && testAudioSrc) {
+            testAudioRef.play()
+            setIsTestPlaying(true)
+            return
+        }
+
+        // If no audio exists, generate it first
+        if (!testAudioSrc) {
+            await testTTS()
+            // testTTS will set the audio source, and we'll play it in the next useEffect
+        }
+    }
+
+    const handleTestPause = () => {
+        if (testAudioRef) {
+            testAudioRef.pause()
+            setIsTestPlaying(false)
+        }
+    }
+
+    const handleTestEnded = () => {
+        setIsTestPlaying(false)
+    }
+
+    // Auto-play when audio is generated (if user clicked play)
+    useEffect(() => {
+        if (testAudioSrc && testAudioRef && !isTestPlaying) {
+            // Small delay to ensure audio element is ready
+            setTimeout(() => {
+                testAudioRef.play()
+                setIsTestPlaying(true)
+            }, 100)
+        }
+
+        // eslint-disable-next-line react-hooks/exhaustive-deps
+    }, [testAudioSrc, testAudioRef])
+
+    useEffect(() => {
+        if (dialogProps.chatflow && dialogProps.chatflow.textToSpeech) {
+            try {
+                const textToSpeechConfig = JSON.parse(dialogProps.chatflow.textToSpeech)
+                let selectedProvider = 'none'
+                Object.keys(textToSpeechProviders).forEach((key) => {
+                    const providerConfig = textToSpeechConfig[key]
+                    if (providerConfig && providerConfig.status) {
+                        selectedProvider = key
+                    }
+                })
+                setSelectedProvider(selectedProvider)
+                setTextToSpeech(textToSpeechConfig)
+                handleProviderChange(selectedProvider, textToSpeechConfig)
+            } catch {
+                setTextToSpeech(null)
+                setSelectedProvider('none')
+            }
+        }
+
+        return () => {
+            setTextToSpeech(null)
+            setSelectedProvider('none')
+            setVoices([])
+            resetTestAudio()
+        }
+        // eslint-disable-next-line react-hooks/exhaustive-deps
+    }, [dialogProps])
+
+    return (
+        <>
+            <Box fullWidth sx={{ mb: 1, display: 'flex', flexDirection: 'column', gap: 1 }}>
+                <Typography>Providers</Typography>
+                <FormControl fullWidth>
+                    <Select
+                        size='small'
+                        value={selectedProvider}
+                        onChange={(event) => handleProviderChange(event.target.value)}
+                        sx={{
+                            '& .MuiSvgIcon-root': {
+                                color: theme?.customization?.isDarkMode ? '#fff' : 'inherit'
+                            }
+                        }}
+                    >
+                        <MenuItem value='none'>None</MenuItem>
+                        {Object.values(textToSpeechProviders).map((provider) => (
+                            <MenuItem key={provider.name} value={provider.name}>
+                                {provider.label}
+                            </MenuItem>
+                        ))}
+                    </Select>
+                </FormControl>
+            </Box>
+            {selectedProvider !== 'none' && (
+                <>
+                    <ListItem sx={{ mt: 3 }} alignItems='center'>
+                        <ListItemAvatar>
+                            <div
+                                style={{
+                                    width: 50,
+                                    height: 50,
+                                    borderRadius: '50%',
+                                    backgroundColor: 'white',
+                                    flexShrink: 0,
+                                    display: 'flex',
+                                    alignItems: 'center',
+                                    justifyContent: 'center'
+                                }}
+                            >
+                                <img
+                                    style={{
+                                        width: '100%',
+                                        height: '100%',
+                                        padding: 10,
+                                        objectFit: 'contain'
+                                    }}
+                                    alt='TTS Provider'
+                                    src={textToSpeechProviders[selectedProvider].icon}
+                                />
+                            </div>
+                        </ListItemAvatar>
+                        <ListItemText
+                            sx={{ ml: 1 }}
+                            primary={textToSpeechProviders[selectedProvider].label}
+                            secondary={
+                                <a
+                                    target='_blank'
+                                    rel='noreferrer'
+                                    href={textToSpeechProviders[selectedProvider].url}
+                                    style={{
+                                        color: theme?.customization?.isDarkMode ? '#90caf9' : '#1976d2',
+                                        textDecoration: 'underline'
+                                    }}
+                                >
+                                    {textToSpeechProviders[selectedProvider].url}
+                                </a>
+                            }
+                        />
+                    </ListItem>
+                    {textToSpeechProviders[selectedProvider].inputs.map((inputParam) => (
+                        <Box key={`${selectedProvider}-${inputParam.name}`} sx={{ p: 2 }}>
+                            <div style={{ display: 'flex', flexDirection: 'row' }}>
+                                <Typography>
+                                    {inputParam.label}
+                                    {!inputParam.optional && <span style={{ color: 'red' }}>&nbsp;*</span>}
+                                    {inputParam.description && (
+                                        <TooltipWithParser style={{ marginLeft: 10 }} title={inputParam.description} />
+                                    )}
+                                </Typography>
+                            </div>
+                            {inputParam.type === 'credential' && (
+                                <CredentialInputHandler
+                                    key={textToSpeech?.[selectedProvider]?.credentialId}
+                                    data={
+                                        textToSpeech?.[selectedProvider]?.credentialId
+                                            ? { credential: textToSpeech?.[selectedProvider]?.credentialId }
+                                            : {}
+                                    }
+                                    inputParam={inputParam}
+                                    onSelect={(newValue) => {
+                                        setValue(newValue, selectedProvider, 'credentialId')
+                                        // Load voices when credential is updated
+                                        if (newValue && selectedProvider !== 'none') {
+                                            setTimeout(() => loadVoicesForProvider(selectedProvider, newValue), 100)
+                                        }
+                                    }}
+                                />
+                            )}
+                            {inputParam.type === 'boolean' && (
+                                <SwitchInput
+                                    onChange={(newValue) => setValue(newValue, selectedProvider, inputParam.name)}
+                                    value={
+                                        textToSpeech?.[selectedProvider]
+                                            ? textToSpeech[selectedProvider][inputParam.name]
+                                            : inputParam.default ?? false
+                                    }
+                                />
+                            )}
+                            {(inputParam.type === 'string' || inputParam.type === 'password' || inputParam.type === 'number') && (
+                                <Input
+                                    inputParam={inputParam}
+                                    onChange={(newValue) => setValue(newValue, selectedProvider, inputParam.name)}
+                                    value={
+                                        textToSpeech?.[selectedProvider]
+                                            ? textToSpeech[selectedProvider][inputParam.name]
+                                            : inputParam.default ?? ''
+                                    }
+                                />
+                            )}
+                            {inputParam.type === 'options' && (
+                                <Dropdown
+                                    name={inputParam.name}
+                                    options={inputParam.options}
+                                    onSelect={(newValue) => setValue(newValue, selectedProvider, inputParam.name)}
+                                    value={
+                                        textToSpeech?.[selectedProvider]
+                                            ? textToSpeech[selectedProvider][inputParam.name]
+                                            : inputParam.default ?? 'choose an option'
+                                    }
+                                />
+                            )}
+                            {inputParam.type === 'voice_select' && (
+                                <Autocomplete
+                                    size='small'
+                                    sx={{ mt: 1 }}
+                                    options={voices}
+                                    loading={loadingVoices}
+                                    getOptionLabel={(option) => option.name || ''}
+                                    value={
+                                        voices.find(
+                                            (voice) =>
+                                                voice.id === (textToSpeech?.[selectedProvider]?.[inputParam.name] || inputParam.default)
+                                        ) || null
+                                    }
+                                    onChange={(event, newValue) => {
+                                        setValue(newValue ? newValue.id : '', selectedProvider, inputParam.name)
+                                    }}
+                                    renderInput={(params) => (
+                                        <TextField
+                                            {...params}
+                                            placeholder={loadingVoices ? 'Loading voices...' : 'Choose a voice'}
+                                            InputProps={{
+                                                ...params.InputProps,
+                                                endAdornment: (
+                                                    <>
+                                                        {loadingVoices ? <CircularProgress color='inherit' size={20} /> : null}
+                                                        {params.InputProps.endAdornment}
+                                                    </>
+                                                )
+                                            }}
+                                        />
+                                    )}
+                                    disabled={loadingVoices || !textToSpeech?.[selectedProvider]?.credentialId}
+                                />
+                            )}
+                        </Box>
+                    ))}
+
+                    {/* Auto-play Toggle */}
+                    <Box sx={{ p: 2 }}>
+                        <div style={{ display: 'flex', flexDirection: 'row', alignItems: 'center' }}>
+                            <Typography>
+                                Automatically play audio
+                                <TooltipWithParser
+                                    style={{ marginLeft: 10 }}
+                                    title='When enabled, bot responses will be automatically converted to speech and played'
+                                />
+                            </Typography>
+                        </div>
+                        <SwitchInput
+                            onChange={(newValue) => setValue(newValue, selectedProvider, 'autoPlay')}
+                            value={textToSpeech?.[selectedProvider] ? textToSpeech[selectedProvider].autoPlay ?? false : false}
+                        />
+                    </Box>
+
+                    {/* Test Voice Section */}
+                    <Box sx={{ p: 2 }}>
+                        <Typography variant='h6' sx={{ mb: 2, display: 'flex', alignItems: 'center', gap: 1 }}>
+                            <IconVolume size={20} />
+                            Test Voice
+                        </Typography>
+
+                        <Typography variant='body2' color='textSecondary' sx={{ mb: 2 }}>
+                            Test text: &quot;Today is a wonderful day to build something with Flowise!&quot;
+                        </Typography>
+
+                        <AudioWaveform
+                            audioSrc={testAudioSrc}
+                            onPlay={handleTestPlay}
+                            onPause={handleTestPause}
+                            onEnded={handleTestEnded}
+                            isPlaying={isTestPlaying}
+                            isGenerating={isGeneratingTest}
+                            disabled={!textToSpeech?.[selectedProvider]?.credentialId}
+                            externalAudioRef={testAudioRef}
+                            resetProgress={resetWaveform}
+                        />
+
+                        {/* Hidden audio element for waveform control */}
+                        {testAudioSrc && (
+                            <audio
+                                ref={(ref) => setTestAudioRef(ref)}
+                                src={testAudioSrc}
+                                onPlay={() => setIsTestPlaying(true)}
+                                onPause={() => setIsTestPlaying(false)}
+                                onEnded={handleTestEnded}
+                                style={{ display: 'none' }}
+                            >
+                                <track kind='captions' />
+                            </audio>
+                        )}
+                    </Box>
+                </>
+            )}
+            <StyledButton
+                style={{ marginBottom: 10, marginTop: 10 }}
+                disabled={selectedProvider !== 'none' && !textToSpeech?.[selectedProvider]?.credentialId}
+                variant='contained'
+                onClick={onSave}
+            >
+                Save
+            </StyledButton>
+        </>
+    )
+}
+
+TextToSpeech.propTypes = {
+    dialogProps: PropTypes.object
+}
+
+export default TextToSpeech
--- a/packages/ui/src/views/chatmessage/ChatMessage.jsx
+++ b/packages/ui/src/views/chatmessage/ChatMessage.jsx
@ -38,7 +38,8 @@ import {
    IconSquareFilled,
    IconCheck,
    IconPaperclip,
-    IconSparkles
+    IconSparkles,
+    IconVolume
 } from '@tabler/icons-react'
 import robotPNG from '@/assets/images/robot.png'
 import userPNG from '@/assets/images/account.png'
@ -72,6 +73,7 @@ import attachmentsApi from '@/api/attachments'
 import chatmessagefeedbackApi from '@/api/chatmessagefeedback'
 import leadsApi from '@/api/lead'
 import executionsApi from '@/api/executions'
+import ttsApi from '@/api/tts'

 // Hooks
 import useApi from '@/hooks/useApi'
@ -251,6 +253,27 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP

    const [isConfigLoading, setIsConfigLoading] = useState(true)

+    // TTS state
+    const [isTTSLoading, setIsTTSLoading] = useState({})
+    const [isTTSPlaying, setIsTTSPlaying] = useState({})
+    const [ttsAudio, setTtsAudio] = useState({})
+    const [isTTSEnabled, setIsTTSEnabled] = useState(false)
+
+    // TTS streaming state
+    const [ttsStreamingState, setTtsStreamingState] = useState({
+        mediaSource: null,
+        sourceBuffer: null,
+        audio: null,
+        chunkQueue: [],
+        isBuffering: false,
+        audioFormat: null,
+        abortController: null
+    })
+
+    // Ref to prevent auto-scroll during TTS actions (using ref to avoid re-renders)
+    const isTTSActionRef = useRef(false)
+    const ttsTimeoutRef = useRef(null)
+
    const isFileAllowedForUpload = (file) => {
        const constraints = getAllowChatFlowUploads.data
        /**
@ -463,7 +486,12 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
    const handleAbort = async () => {
        setIsMessageStopping(true)
        try {
+            // Stop all TTS streams first
+            await handleTTSAbortAll()
+            stopAllTTS()
+
            await chatmessageApi.abortMessage(chatflowid, chatId)
+            setIsMessageStopping(false)
        } catch (error) {
            setIsMessageStopping(false)
            enqueueSnackbar({
@ -536,6 +564,22 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
        }
    }

+    // Helper function to manage TTS action flag
+    const setTTSAction = (isActive) => {
+        isTTSActionRef.current = isActive
+        if (ttsTimeoutRef.current) {
+            clearTimeout(ttsTimeoutRef.current)
+            ttsTimeoutRef.current = null
+        }
+        if (isActive) {
+            // Reset the flag after a longer delay to ensure all state changes are complete
+            ttsTimeoutRef.current = setTimeout(() => {
+                isTTSActionRef.current = false
+                ttsTimeoutRef.current = null
+            }, 300)
+        }
+    }
+
    const onChange = useCallback((e) => setUserInput(e.target.value), [setUserInput])

    const updateLastMessage = (text) => {
@ -949,6 +993,7 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
                    setLoading(false)
                    setUserInput('')
                    setUploadedFiles([])
+
                    setTimeout(() => {
                        inputRef.current?.focus()
                        scrollToBottom()
@ -1027,6 +1072,18 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
                        abortMessage(payload.data)
                        closeResponse()
                        break
+                    case 'tts_start':
+                        handleTTSStart(payload.data)
+                        break
+                    case 'tts_data':
+                        handleTTSDataChunk(payload.data.audioChunk)
+                        break
+                    case 'tts_end':
+                        handleTTSEnd()
+                        break
+                    case 'tts_abort':
+                        handleTTSAbort(payload.data)
+                        break
                    case 'end':
                        setLocalStorageChatflow(chatflowid, chatId)
                        closeResponse()
@ -1293,6 +1350,30 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
                }
            }
        }
+
+        // Check if TTS is configured
+        if (getChatflowConfig.data && getChatflowConfig.data.textToSpeech) {
+            try {
+                const ttsConfig =
+                    typeof getChatflowConfig.data.textToSpeech === 'string'
+                        ? JSON.parse(getChatflowConfig.data.textToSpeech)
+                        : getChatflowConfig.data.textToSpeech
+
+                let isEnabled = false
+                if (ttsConfig) {
+                    Object.keys(ttsConfig).forEach((provider) => {
+                        if (provider !== 'none' && ttsConfig?.[provider]?.status) {
+                            isEnabled = true
+                        }
+                    })
+                }
+                setIsTTSEnabled(isEnabled)
+            } catch (error) {
+                setIsTTSEnabled(false)
+            }
+        } else {
+            setIsTTSEnabled(false)
+        }
        // eslint-disable-next-line react-hooks/exhaustive-deps
    }, [getChatflowConfig.data])

@ -1313,9 +1394,11 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
        }
    }, [isChatFlowAvailableForRAGFileUploads, fullFileUpload])

-    // Auto scroll chat to bottom
+    // Auto scroll chat to bottom (but not during TTS actions)
    useEffect(() => {
-        scrollToBottom()
+        if (!isTTSActionRef.current) {
+            scrollToBottom()
+        }
    }, [messages])

    useEffect(() => {
@ -1497,9 +1580,451 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
                return allMessages
            })
        }
+
        setIsLeadSaving(false)
    }

+    const cleanupTTSForMessage = (messageId) => {
+        if (ttsAudio[messageId]) {
+            ttsAudio[messageId].pause()
+            ttsAudio[messageId].currentTime = 0
+            setTtsAudio((prev) => {
+                const newState = { ...prev }
+                delete newState[messageId]
+                return newState
+            })
+        }
+
+        if (ttsStreamingState.audio) {
+            ttsStreamingState.audio.pause()
+            cleanupTTSStreaming()
+        }
+
+        setIsTTSPlaying((prev) => {
+            const newState = { ...prev }
+            delete newState[messageId]
+            return newState
+        })
+
+        setIsTTSLoading((prev) => {
+            const newState = { ...prev }
+            delete newState[messageId]
+            return newState
+        })
+    }
+
+    const handleTTSStop = async (messageId) => {
+        setTTSAction(true)
+        await ttsApi.abortTTS({ chatflowId: chatflowid, chatId, chatMessageId: messageId })
+        cleanupTTSForMessage(messageId)
+        setIsMessageStopping(false)
+    }
+
+    const stopAllTTS = () => {
+        Object.keys(ttsAudio).forEach((messageId) => {
+            if (ttsAudio[messageId]) {
+                ttsAudio[messageId].pause()
+                ttsAudio[messageId].currentTime = 0
+            }
+        })
+        setTtsAudio({})
+
+        if (ttsStreamingState.abortController) {
+            ttsStreamingState.abortController.abort()
+        }
+
+        if (ttsStreamingState.audio) {
+            ttsStreamingState.audio.pause()
+            cleanupTTSStreaming()
+        }
+
+        setIsTTSPlaying({})
+        setIsTTSLoading({})
+    }
+
+    const handleTTSClick = async (messageId, messageText) => {
+        if (isTTSLoading[messageId]) return
+
+        if (isTTSPlaying[messageId] || ttsAudio[messageId]) {
+            handleTTSStop(messageId)
+            return
+        }
+
+        setTTSAction(true)
+
+        // abort all ongoing streams and clear audio sources
+        await handleTTSAbortAll()
+        stopAllTTS()
+
+        handleTTSStart({ chatMessageId: messageId, format: 'mp3' })
+        try {
+            const abortController = new AbortController()
+            setTtsStreamingState((prev) => ({ ...prev, abortController }))
+
+            const response = await fetch('/api/v1/text-to-speech/generate', {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                    'x-request-from': 'internal'
+                },
+                credentials: 'include',
+                signal: abortController.signal,
+                body: JSON.stringify({
+                    chatflowId: chatflowid,
+                    chatId: chatId,
+                    chatMessageId: messageId,
+                    text: messageText
+                })
+            })
+
+            if (!response.ok) {
+                throw new Error(`TTS request failed: ${response.status}`)
+            }
+
+            const reader = response.body.getReader()
+            const decoder = new TextDecoder()
+            let buffer = ''
+
+            let done = false
+            while (!done) {
+                if (abortController.signal.aborted) {
+                    break
+                }
+
+                const result = await reader.read()
+                done = result.done
+                if (done) {
+                    break
+                }
+                const value = result.value
+                const chunk = decoder.decode(value, { stream: true })
+                buffer += chunk
+
+                const lines = buffer.split('\n\n')
+                buffer = lines.pop() || ''
+
+                for (const eventBlock of lines) {
+                    if (eventBlock.trim()) {
+                        const event = parseSSEEvent(eventBlock)
+                        if (event) {
+                            switch (event.event) {
+                                case 'tts_start':
+                                    break
+                                case 'tts_data':
+                                    if (!abortController.signal.aborted) {
+                                        handleTTSDataChunk(event.data.audioChunk)
+                                    }
+                                    break
+                                case 'tts_end':
+                                    if (!abortController.signal.aborted) {
+                                        handleTTSEnd()
+                                    }
+                                    break
+                            }
+                        }
+                    }
+                }
+            }
+        } catch (error) {
+            if (error.name === 'AbortError') {
+                console.error('TTS request was aborted')
+            } else {
+                console.error('Error with TTS:', error)
+                enqueueSnackbar({
+                    message: `TTS failed: ${error.message}`,
+                    options: { variant: 'error' }
+                })
+            }
+        } finally {
+            setIsTTSLoading((prev) => {
+                const newState = { ...prev }
+                delete newState[messageId]
+                return newState
+            })
+        }
+    }
+
+    const parseSSEEvent = (eventBlock) => {
+        const lines = eventBlock.split('\n')
+        const event = {}
+
+        for (const line of lines) {
+            if (line.startsWith('event:')) {
+                event.event = line.substring(6).trim()
+            } else if (line.startsWith('data:')) {
+                const dataStr = line.substring(5).trim()
+                try {
+                    const parsed = JSON.parse(dataStr)
+                    if (parsed.data) {
+                        event.data = parsed.data
+                    }
+                } catch (e) {
+                    console.error('Error parsing SSE data:', e, 'Raw data:', dataStr)
+                }
+            }
+        }
+
+        return event.event ? event : null
+    }
+
+    const initializeTTSStreaming = (data) => {
+        try {
+            const mediaSource = new MediaSource()
+            const audio = new Audio()
+            audio.src = URL.createObjectURL(mediaSource)
+
+            mediaSource.addEventListener('sourceopen', () => {
+                try {
+                    const mimeType = data.format === 'mp3' ? 'audio/mpeg' : 'audio/mpeg'
+                    const sourceBuffer = mediaSource.addSourceBuffer(mimeType)
+
+                    setTtsStreamingState((prevState) => ({
+                        ...prevState,
+                        mediaSource,
+                        sourceBuffer,
+                        audio
+                    }))
+
+                    audio.play().catch((playError) => {
+                        console.error('Error starting audio playback:', playError)
+                    })
+                } catch (error) {
+                    console.error('Error setting up source buffer:', error)
+                    console.error('MediaSource readyState:', mediaSource.readyState)
+                    console.error('Requested MIME type:', mimeType)
+                }
+            })
+
+            audio.addEventListener('playing', () => {
+                setIsTTSLoading((prevState) => {
+                    const newState = { ...prevState }
+                    delete newState[data.chatMessageId]
+                    return newState
+                })
+                setIsTTSPlaying((prevState) => ({
+                    ...prevState,
+                    [data.chatMessageId]: true
+                }))
+            })
+
+            audio.addEventListener('ended', () => {
+                setIsTTSPlaying((prevState) => {
+                    const newState = { ...prevState }
+                    delete newState[data.chatMessageId]
+                    return newState
+                })
+                cleanupTTSStreaming()
+            })
+        } catch (error) {
+            console.error('Error initializing TTS streaming:', error)
+        }
+    }
+
+    const cleanupTTSStreaming = () => {
+        setTtsStreamingState((prevState) => {
+            if (prevState.abortController) {
+                prevState.abortController.abort()
+            }
+
+            if (prevState.audio) {
+                prevState.audio.pause()
+                prevState.audio.removeAttribute('src')
+                if (prevState.audio.src) {
+                    URL.revokeObjectURL(prevState.audio.src)
+                }
+            }
+
+            if (prevState.mediaSource) {
+                if (prevState.mediaSource.readyState === 'open') {
+                    try {
+                        prevState.mediaSource.endOfStream()
+                    } catch (e) {
+                        // Ignore errors during cleanup
+                    }
+                }
+                prevState.mediaSource.removeEventListener('sourceopen', () => {})
+            }
+
+            return {
+                mediaSource: null,
+                sourceBuffer: null,
+                audio: null,
+                chunkQueue: [],
+                isBuffering: false,
+                audioFormat: null,
+                abortController: null
+            }
+        })
+    }
+
+    const processChunkQueue = () => {
+        setTtsStreamingState((prevState) => {
+            if (!prevState.sourceBuffer || prevState.sourceBuffer.updating || prevState.chunkQueue.length === 0) {
+                return prevState
+            }
+
+            const chunk = prevState.chunkQueue.shift()
+
+            try {
+                prevState.sourceBuffer.appendBuffer(chunk)
+                return {
+                    ...prevState,
+                    chunkQueue: [...prevState.chunkQueue],
+                    isBuffering: true
+                }
+            } catch (error) {
+                console.error('Error appending chunk to buffer:', error)
+                return prevState
+            }
+        })
+    }
+
+    const handleTTSStart = (data) => {
+        setTTSAction(true)
+
+        // Stop all existing TTS audio before starting new stream
+        stopAllTTS()
+
+        setIsTTSLoading((prevState) => ({
+            ...prevState,
+            [data.chatMessageId]: true
+        }))
+        setMessages((prevMessages) => {
+            const allMessages = [...cloneDeep(prevMessages)]
+            const lastMessage = allMessages[allMessages.length - 1]
+            if (lastMessage.type === 'userMessage') return allMessages
+            if (lastMessage.id) return allMessages
+            allMessages[allMessages.length - 1].id = data.chatMessageId
+            return allMessages
+        })
+        setTtsStreamingState({
+            mediaSource: null,
+            sourceBuffer: null,
+            audio: null,
+            chunkQueue: [],
+            isBuffering: false,
+            audioFormat: data.format,
+            abortController: null
+        })
+
+        setTimeout(() => initializeTTSStreaming(data), 0)
+    }
+
+    const handleTTSDataChunk = (base64Data) => {
+        try {
+            const audioBuffer = Uint8Array.from(atob(base64Data), (c) => c.charCodeAt(0))
+
+            setTtsStreamingState((prevState) => {
+                const newState = {
+                    ...prevState,
+                    chunkQueue: [...prevState.chunkQueue, audioBuffer]
+                }
+
+                if (prevState.sourceBuffer && !prevState.sourceBuffer.updating) {
+                    setTimeout(() => processChunkQueue(), 0)
+                }
+
+                return newState
+            })
+        } catch (error) {
+            console.error('Error handling TTS data chunk:', error)
+        }
+    }
+
+    const handleTTSEnd = () => {
+        setTtsStreamingState((prevState) => {
+            if (prevState.mediaSource && prevState.mediaSource.readyState === 'open') {
+                try {
+                    if (prevState.sourceBuffer && prevState.chunkQueue.length > 0 && !prevState.sourceBuffer.updating) {
+                        const remainingChunks = [...prevState.chunkQueue]
+                        remainingChunks.forEach((chunk, index) => {
+                            setTimeout(() => {
+                                if (prevState.sourceBuffer && !prevState.sourceBuffer.updating) {
+                                    try {
+                                        prevState.sourceBuffer.appendBuffer(chunk)
+                                        if (index === remainingChunks.length - 1) {
+                                            setTimeout(() => {
+                                                if (prevState.mediaSource && prevState.mediaSource.readyState === 'open') {
+                                                    prevState.mediaSource.endOfStream()
+                                                }
+                                            }, 100)
+                                        }
+                                    } catch (error) {
+                                        console.error('Error appending remaining chunk:', error)
+                                    }
+                                }
+                            }, index * 50)
+                        })
+                        return {
+                            ...prevState,
+                            chunkQueue: []
+                        }
+                    }
+
+                    if (prevState.sourceBuffer && !prevState.sourceBuffer.updating) {
+                        prevState.mediaSource.endOfStream()
+                    } else if (prevState.sourceBuffer) {
+                        prevState.sourceBuffer.addEventListener(
+                            'updateend',
+                            () => {
+                                if (prevState.mediaSource && prevState.mediaSource.readyState === 'open') {
+                                    prevState.mediaSource.endOfStream()
+                                }
+                            },
+                            { once: true }
+                        )
+                    }
+                } catch (error) {
+                    console.error('Error ending TTS stream:', error)
+                }
+            }
+            return prevState
+        })
+    }
+
+    const handleTTSAbort = (data) => {
+        const messageId = data.chatMessageId
+        cleanupTTSForMessage(messageId)
+    }
+
+    const handleTTSAbortAll = async () => {
+        const activeTTSMessages = Object.keys(isTTSLoading).concat(Object.keys(isTTSPlaying))
+        for (const messageId of activeTTSMessages) {
+            await ttsApi.abortTTS({ chatflowId: chatflowid, chatId, chatMessageId: messageId })
+        }
+    }
+
+    useEffect(() => {
+        if (ttsStreamingState.sourceBuffer) {
+            const sourceBuffer = ttsStreamingState.sourceBuffer
+
+            const handleUpdateEnd = () => {
+                setTtsStreamingState((prevState) => ({
+                    ...prevState,
+                    isBuffering: false
+                }))
+                setTimeout(() => processChunkQueue(), 0)
+            }
+
+            sourceBuffer.addEventListener('updateend', handleUpdateEnd)
+
+            return () => {
+                sourceBuffer.removeEventListener('updateend', handleUpdateEnd)
+            }
+        }
+    }, [ttsStreamingState.sourceBuffer])
+
+    useEffect(() => {
+        return () => {
+            cleanupTTSStreaming()
+            // Cleanup TTS timeout on unmount
+            if (ttsTimeoutRef.current) {
+                clearTimeout(ttsTimeoutRef.current)
+                ttsTimeoutRef.current = null
+            }
+        }
+    }, [])
+
    const getInputDisabled = () => {
        return (
            loading ||
@ -2151,7 +2676,7 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
                                                })}
                                            </div>
                                        )}
-                                        {message.type === 'apiMessage' && message.id && chatFeedbackStatus ? (
+                                        {message.type === 'apiMessage' && message.id ? (
                                            <>
                                                <Box
                                                    sx={{
@ -2161,25 +2686,62 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
                                                        gap: 1
                                                    }}
                                                >
-                                                    <CopyToClipboardButton onClick={() => copyMessageToClipboard(message.message)} />
-                                                    {!message.feedback ||
-                                                    message.feedback.rating === '' ||
-                                                    message.feedback.rating === 'THUMBS_UP' ? (
-                                                        <ThumbsUpButton
-                                                            isDisabled={message.feedback && message.feedback.rating === 'THUMBS_UP'}
-                                                            rating={message.feedback ? message.feedback.rating : ''}
-                                                            onClick={() => onThumbsUpClick(message.id)}
-                                                        />
-                                                    ) : null}
-                                                    {!message.feedback ||
-                                                    message.feedback.rating === '' ||
-                                                    message.feedback.rating === 'THUMBS_DOWN' ? (
-                                                        <ThumbsDownButton
-                                                            isDisabled={message.feedback && message.feedback.rating === 'THUMBS_DOWN'}
-                                                            rating={message.feedback ? message.feedback.rating : ''}
-                                                            onClick={() => onThumbsDownClick(message.id)}
-                                                        />
-                                                    ) : null}
+                                                    {isTTSEnabled && (
+                                                        <IconButton
+                                                            size='small'
+                                                            onClick={() =>
+                                                                isTTSPlaying[message.id]
+                                                                    ? handleTTSStop(message.id)
+                                                                    : handleTTSClick(message.id, message.message)
+                                                            }
+                                                            disabled={isTTSLoading[message.id]}
+                                                            sx={{
+                                                                backgroundColor: ttsAudio[message.id] ? 'primary.main' : 'transparent',
+                                                                color: ttsAudio[message.id] ? 'white' : 'inherit',
+                                                                '&:hover': {
+                                                                    backgroundColor: ttsAudio[message.id] ? 'primary.dark' : 'action.hover'
+                                                                }
+                                                            }}
+                                                        >
+                                                            {isTTSLoading[message.id] ? (
+                                                                <CircularProgress size={16} />
+                                                            ) : isTTSPlaying[message.id] ? (
+                                                                <IconCircleDot style={{ width: '20px', height: '20px' }} color={'red'} />
+                                                            ) : (
+                                                                <IconVolume
+                                                                    style={{ width: '20px', height: '20px' }}
+                                                                    color={customization.isDarkMode ? 'white' : '#1e88e5'}
+                                                                />
+                                                            )}
+                                                        </IconButton>
+                                                    )}
+                                                    {chatFeedbackStatus && (
+                                                        <>
+                                                            <CopyToClipboardButton
+                                                                onClick={() => copyMessageToClipboard(message.message)}
+                                                            />
+                                                            {!message.feedback ||
+                                                            message.feedback.rating === '' ||
+                                                            message.feedback.rating === 'THUMBS_UP' ? (
+                                                                <ThumbsUpButton
+                                                                    isDisabled={message.feedback && message.feedback.rating === 'THUMBS_UP'}
+                                                                    rating={message.feedback ? message.feedback.rating : ''}
+                                                                    onClick={() => onThumbsUpClick(message.id)}
+                                                                />
+                                                            ) : null}
+                                                            {!message.feedback ||
+                                                            message.feedback.rating === '' ||
+                                                            message.feedback.rating === 'THUMBS_DOWN' ? (
+                                                                <ThumbsDownButton
+                                                                    isDisabled={
+                                                                        message.feedback && message.feedback.rating === 'THUMBS_DOWN'
+                                                                    }
+                                                                    rating={message.feedback ? message.feedback.rating : ''}
+                                                                    onClick={() => onThumbsDownClick(message.id)}
+                                                                />
+                                                            ) : null}
+                                                        </>
+                                                    )}
                                                </Box>
                                            </>
                                        ) : null}
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
Author	SHA1	Message	Date
Henry	53622d95e4	Merge branch 'main' into feature/tts	2025-10-02 11:16:12 +01:00
Ilango Rajagopal	634d8fe282	Remove redundant condition for tts autoplay in chatflow	2025-10-01 13:37:59 +05:30
Ilango Rajagopal	87b54bec9e	Remove elevenlabs sdk from root package.json	2025-10-01 13:25:25 +05:30
Henry	b71844a174	Merge branch 'main' into feature/tts # Conflicts: # pnpm-lock.yaml	2025-09-30 13:27:01 +01:00
Henry	6aabcee1de	Merge branch 'feature/tts' of https://github.com/FlowiseAI/Flowise into feature/tts	2025-09-30 13:23:30 +01:00
Ilango Rajagopal	3db27e28fe	Fix UI issue	2025-09-30 13:50:45 +05:30
Henry	4be176d55a	Merge branch 'main' into feature/tts	2025-09-27 14:59:57 +01:00
Ilango Rajagopal	3f89d82292	Fix merge conflicts	2025-09-26 13:57:11 +05:30
Ilango Rajagopal	50b5bb911a	Send metadata event when aborting autoplay TTS	2025-09-26 13:49:51 +05:30
Ilango Rajagopal	d2a177db59	Fix abort not working for TTS autoplay	2025-09-24 22:06:27 +05:30
Ilango Rajagopal	27da0b62bd	TTS abort controller	2025-09-22 16:05:54 +05:30
Ilango Rajagopal	3198e7817e	update	2025-09-21 16:29:21 +05:30
Ilango Rajagopal	b5718c3fdb	Fix tts audio not playing when clicking speaker button	2025-09-19 13:11:51 +05:30
Ilango Rajagopal	f2da015dce	Remove logger	2025-09-19 13:11:05 +05:30
Ilango Rajagopal	4cd1c4a22b	Add ability to abort audio streaming in TTS and release lock on chat input	2025-09-18 14:32:25 +05:30
Ilango Rajagopal	b514a82728	Remove unnecessary code	2025-09-18 13:28:16 +05:30
Henry	f64900b8b8	Updated the condition for enabling TTS providers to exclude the 'none' provider, ensuring only valid providers are considered for text-to-speech functionality.	2025-09-16 23:52:06 +01:00
Henry	72ccf2e7b0	- Implemented stopAllTTS function calls to halt existing TTS audio before playing new audio or starting a new TTS stream	2025-09-16 19:05:51 +01:00
Henry	123ab3c85e	Refactor Text-to-Speech Provider Selection and Enhance UI Components - Updated the text-to-speech controller to select the active provider based on status instead of the first available provider - Added audio waveform controls and test audio functionality in the TextToSpeech component, allowing users to play and pause test audio - Integrated Autocomplete for voice selection in the TextToSpeech component - Implemented TTS action management in ChatMessage to prevent auto-scrolling during TTS actions	2025-09-16 18:54:47 +01:00
Ilango Rajagopal	5ea714098d	Fix merge conflicts	2025-09-16 13:55:04 +05:30
Ilango Rajagopal	c1553d12ff	Whitelist tts generate endpoint	2025-08-26 13:46:00 +05:30
Ilango Rajagopal	95a63fa609	Update generate tts endpoint and its usage in internal chat	2025-08-26 12:27:44 +05:30
Ilango Rajagopal	2e33a00667	Fix issue with tts voices not loading	2025-08-26 07:18:13 +05:30
Ilango Rajagopal	eb07a42f1d	Fix issue with test voice feature	2025-08-25 23:56:33 +05:30
Ilango Rajagopal	4aad293685	Fix merge conflicts	2025-08-25 09:23:07 +05:30
Ilango Rajagopal	45917a34e3	Update SSE handling for TTS	2025-08-25 04:31:44 +05:30
Ilango Rajagopal	d42c096164	Abort TTS SSE when clicking the stop button	2025-08-25 03:42:38 +05:30
Ilango Rajagopal	55b6be24df	Refactor TTS - fix issues with tts loading and stop audio buttons	2025-08-25 02:38:09 +05:30
Ilango Rajagopal	8de200ee15	Refactor	2025-08-22 12:42:06 +05:30
Ilango Rajagopal	ad44c7b661	Fix issues with TTS - openai voices, streaming audio, rate limiting, speed of speech	2025-08-22 12:35:33 +05:30
Ilango Rajagopal	aa357c8373	Add tts sse to redis publisher	2025-08-20 15:36:11 +05:30
Ilango Rajagopal	2b5554aafe	Use existing sse streamer to stream tts audio before sse client is removed	2025-08-20 13:12:59 +05:30
Ilango Rajagopal	2247646182	Update controllers - fix issue with sse client getting removed before tts events are sent	2025-08-20 13:00:10 +05:30
Ilango Rajagopal	b30e4a9da8	Set up streaming response for text to speech audio	2025-08-20 12:52:36 +05:30
Ilango Rajagopal	08f0d7597d	Fix crash issue when first changing tts provider	2025-08-18 13:50:40 +05:30
Ilango Rajagopal	ef1b0dc856	Add option to autoplay tts audio after prediction completes	2025-08-14 18:06:57 +05:30
Ilango Rajagopal	1902701e64	Fix issue with text to speech tab not showing correct saved voice	2025-08-14 13:42:44 +05:30
Ilango Rajagopal	eca7d175fd	Fix issue with fetching eleven labs voices	2025-08-14 13:30:07 +05:30
Ilango Rajagopal	47dd721177	Add description to eleven labs credentials	2025-08-13 13:03:46 +05:30
Ilango Rajagopal	3364539a2e	Add tts backend	2025-08-13 08:35:20 +05:30
Ilango Rajagopal	2c39b51cff	Add tts UI	2025-08-13 08:34:55 +05:30