From ad44c7b66108b489e3ea42c1fbbf3cb37eaed6a4 Mon Sep 17 00:00:00 2001 From: Ilango Rajagopal Date: Fri, 22 Aug 2025 12:35:33 +0530 Subject: [PATCH] Fix issues with TTS - openai voices, streaming audio, rate limiting, speed of speech --- packages/components/src/Interface.ts | 1 + packages/components/src/textToSpeech.ts | 241 ++++++------ .../src/controllers/text-to-speech/index.ts | 9 +- .../src/services/text-to-speech/index.ts | 46 +-- packages/server/src/utils/SSEStreamer.ts | 15 +- packages/server/src/utils/buildChatflow.ts | 5 +- .../ui-component/extended/TextToSpeech.jsx | 22 +- .../ui/src/views/chatmessage/ChatMessage.jsx | 347 +++++++++++++++++- 8 files changed, 465 insertions(+), 221 deletions(-) diff --git a/packages/components/src/Interface.ts b/packages/components/src/Interface.ts index 268627833..811090481 100644 --- a/packages/components/src/Interface.ts +++ b/packages/components/src/Interface.ts @@ -442,6 +442,7 @@ export interface IServerSideEventStreamer { streamEndEvent(chatId: string): void streamUsageMetadataEvent(chatId: string, data: any): void streamAudioEvent(chatId: string, audioData: string): void + streamTTSStartEvent(chatId: string, format: string): void streamTTSDataEvent(chatId: string, audioChunk: string): void streamTTSEndEvent(chatId: string): void } diff --git a/packages/components/src/textToSpeech.ts b/packages/components/src/textToSpeech.ts index fccc17dd0..d90bec17c 100644 --- a/packages/components/src/textToSpeech.ts +++ b/packages/components/src/textToSpeech.ts @@ -15,163 +15,136 @@ export const convertTextToSpeechStream = async ( textToSpeechConfig: ICommonObject, options: ICommonObject, onChunk: (chunk: Buffer) => void, - onEnd: () => void + onEnd: () => void, + onStart?: (format: string) => void ): Promise => { - return new Promise(async (resolve, reject) => { - try { - if (textToSpeechConfig) { - const credentialId = textToSpeechConfig.credentialId as string - const credentialData = await getCredentialData(credentialId ?? '', options) + return new Promise((resolve, reject) => { + const processStream = async () => { + try { + if (textToSpeechConfig) { + const credentialId = textToSpeechConfig.credentialId as string + const credentialData = await getCredentialData(credentialId ?? '', options) - switch (textToSpeechConfig.name) { - case TextToSpeechType.OPENAI_TTS: { - const openai = new OpenAI({ - apiKey: credentialData.openAIApiKey - }) + switch (textToSpeechConfig.name) { + case TextToSpeechType.OPENAI_TTS: { + if (onStart) onStart('mp3') - const response = await openai.audio.speech.create({ - model: 'gpt-4o-mini-tts', - voice: (textToSpeechConfig.voice || 'alloy') as - | 'alloy' - | 'ash' - | 'ballad' - | 'coral' - | 'echo' - | 'fable' - | 'nova' - | 'onyx' - | 'sage' - | 'shimmer', - input: text, - response_format: 'wav' - }) + const openai = new OpenAI({ + apiKey: credentialData.openAIApiKey + }) - const stream = Readable.fromWeb(response as unknown as ReadableStream) - if (!stream) { - throw new Error('Failed to get response stream') + const response = await openai.audio.speech.create({ + model: 'gpt-4o-mini-tts', + voice: (textToSpeechConfig.voice || 'alloy') as + | 'alloy' + | 'ash' + | 'ballad' + | 'coral' + | 'echo' + | 'fable' + | 'nova' + | 'onyx' + | 'sage' + | 'shimmer', + input: text, + response_format: 'mp3' + }) + + const stream = response.body as unknown as Readable + if (!stream) { + throw new Error('Failed to get response stream') + } + + await processStreamWithRateLimit(stream, onChunk, onEnd, resolve, reject, 640, 20) + break } - stream.on('data', (chunk) => { - onChunk(Buffer.from(chunk)) - }) + case TextToSpeechType.ELEVEN_LABS_TTS: { + if (onStart) onStart('mp3') - stream.on('end', () => { - onEnd() - resolve() - }) + const client = new ElevenLabsClient({ + apiKey: credentialData.elevenLabsApiKey + }) - stream.on('error', (error) => { - reject(error) - }) + const response = await client.textToSpeech.stream(textToSpeechConfig.voice || '21m00Tcm4TlvDq8ikWAM', { + text: text, + modelId: 'eleven_multilingual_v2' + }) - break - } + const stream = Readable.fromWeb(response as unknown as ReadableStream) + if (!stream) { + throw new Error('Failed to get response stream') + } - case TextToSpeechType.ELEVEN_LABS_TTS: { - const client = new ElevenLabsClient({ - apiKey: credentialData.elevenLabsApiKey - }) - - const response = await client.textToSpeech.stream(textToSpeechConfig.voice || '21m00Tcm4TlvDq8ikWAM', { - text: text, - modelId: 'eleven_multilingual_v2' - }) - - const stream = Readable.fromWeb(response as unknown as ReadableStream) - if (!stream) { - throw new Error('Failed to get response stream') + await processStreamWithRateLimit(stream, onChunk, onEnd, resolve, reject, 640, 40) + break } - - stream.on('data', (chunk) => { - onChunk(Buffer.from(chunk)) - }) - - stream.on('end', () => { - onEnd() - resolve() - }) - - stream.on('error', (error) => { - reject(error) - }) - - break } + } else { + reject(new Error('Text to speech is not selected. Please configure TTS in the chatflow.')) } - } else { - reject(new Error('Text to speech is not selected. Please configure TTS in the chatflow.')) + } catch (error) { + reject(error) } - } catch (error) { - reject(error) } + + processStream() }) } -export const convertTextToSpeech = async (text: string, textToSpeechConfig: ICommonObject, options: ICommonObject): Promise => { - if (textToSpeechConfig) { - const credentialId = textToSpeechConfig.credentialId as string - const credentialData = await getCredentialData(credentialId ?? '', options) +const processStreamWithRateLimit = async ( + stream: Readable, + onChunk: (chunk: Buffer) => void, + onEnd: () => void, + resolve: () => void, + reject: (error: any) => void, + targetChunkSize: number = 640, + rateLimitMs: number = 20 +) => { + const TARGET_CHUNK_SIZE = targetChunkSize + const RATE_LIMIT_MS = rateLimitMs - switch (textToSpeechConfig.name) { - case TextToSpeechType.OPENAI_TTS: { - const openai = new OpenAI({ - apiKey: credentialData.openAIApiKey - }) + let buffer: Buffer = Buffer.alloc(0) + let isEnded = false - const response = await openai.audio.speech.create({ - model: 'gpt-4o-mini-tts', - voice: (textToSpeechConfig.voice || 'alloy') as - | 'alloy' - | 'ash' - | 'ballad' - | 'coral' - | 'echo' - | 'fable' - | 'nova' - | 'onyx' - | 'sage' - | 'shimmer', - input: text, - response_format: 'wav' - }) - - const audioBuffer = Buffer.from(await response.arrayBuffer()) - return audioBuffer - } - - case TextToSpeechType.ELEVEN_LABS_TTS: { - const client = new ElevenLabsClient({ - apiKey: credentialData.elevenLabsApiKey - }) - - const audioStream = await client.textToSpeech.stream(textToSpeechConfig.voice || '21m00Tcm4TlvDq8ikWAM', { - text: text, - modelId: 'eleven_multilingual_v2' - }) - - const chunks: Buffer[] = [] - const reader = audioStream.getReader() - - try { - let result = await reader.read() - while (!result.done) { - if (result.value) { - chunks.push(Buffer.from(result.value)) - } - result = await reader.read() - } - } finally { - reader.releaseLock() - } - - const audioBuffer = Buffer.concat(chunks) - return audioBuffer + const processChunks = async () => { + while (!isEnded || buffer.length > 0) { + if (buffer.length >= TARGET_CHUNK_SIZE) { + const chunk = buffer.subarray(0, TARGET_CHUNK_SIZE) + buffer = buffer.subarray(TARGET_CHUNK_SIZE) + onChunk(chunk) + await sleep(RATE_LIMIT_MS) + } else if (isEnded && buffer.length > 0) { + onChunk(buffer) + buffer = Buffer.alloc(0) + } else if (!isEnded) { + await sleep(RATE_LIMIT_MS) + } else { + break } } - } else { - throw new Error('Text to speech is not selected. Please configure TTS in the chatflow.') + + onEnd() + resolve() } - return Buffer.alloc(0) + + stream.on('data', (chunk) => { + buffer = Buffer.concat([buffer, Buffer.from(chunk)]) + }) + + stream.on('end', () => { + isEnded = true + }) + + stream.on('error', (error) => { + reject(error) + }) + + processChunks().catch(reject) +} + +const sleep = (ms: number): Promise => { + return new Promise((resolve) => setTimeout(resolve, ms)) } export const getVoices = async (provider: string, credentialId: string, options: ICommonObject) => { diff --git a/packages/server/src/controllers/text-to-speech/index.ts b/packages/server/src/controllers/text-to-speech/index.ts index 689f24bf1..b9665fbe0 100644 --- a/packages/server/src/controllers/text-to-speech/index.ts +++ b/packages/server/src/controllers/text-to-speech/index.ts @@ -67,7 +67,6 @@ const generateTextToSpeech = async (req: Request, res: Response) => { res.write(`data: ${JSON.stringify(clientResponse)}\n\n`) }, async () => { - // Send end event const endResponse = { event: 'tts_end', data: {} @@ -75,6 +74,14 @@ const generateTextToSpeech = async (req: Request, res: Response) => { res.write('event: tts_end\n') res.write(`data: ${JSON.stringify(endResponse)}\n\n`) res.end() + }, + (format: string) => { + const startResponse = { + event: 'tts_start', + data: { format } + } + res.write('event: tts_start\n') + res.write(`data: ${JSON.stringify(startResponse)}\n\n`) } ) } catch (error) { diff --git a/packages/server/src/services/text-to-speech/index.ts b/packages/server/src/services/text-to-speech/index.ts index f330d06a5..22a11ede7 100644 --- a/packages/server/src/services/text-to-speech/index.ts +++ b/packages/server/src/services/text-to-speech/index.ts @@ -2,7 +2,7 @@ import { StatusCodes } from 'http-status-codes' import { getRunningExpressApp } from '../../utils/getRunningExpressApp' import { InternalFlowiseError } from '../../errors/internalFlowiseError' import { getErrorMessage } from '../../errors/utils' -import { convertTextToSpeech, getVoices } from 'flowise-components' +import { getVoices } from 'flowise-components' import { databaseEntities } from '../../utils' export enum TextToSpeechProvider { @@ -23,51 +23,8 @@ export interface TTSResponse { contentType: string } -const generateTextToSpeech = async (request: TTSRequest): Promise => { - try { - const appServer = getRunningExpressApp() - const options = { - orgId: '', - chatflowid: '', - chatId: '', - appDataSource: appServer.AppDataSource, - databaseEntities: databaseEntities - } - - const textToSpeechConfig = { - name: request.provider, - credentialId: request.credentialId, - voice: request.voice, - model: request.model - } - - const audioBuffer = await convertTextToSpeech(request.text, textToSpeechConfig, options) - - return { - audioBuffer, - contentType: 'audio/mpeg' - } - } catch (error) { - throw new InternalFlowiseError( - StatusCodes.INTERNAL_SERVER_ERROR, - `Error: textToSpeechService.generateTextToSpeech - ${getErrorMessage(error)}` - ) - } -} - const getVoicesForProvider = async (provider: string, credentialId?: string): Promise => { try { - if (provider === TextToSpeechProvider.OPENAI) { - return [ - { id: 'alloy', name: 'Alloy' }, - { id: 'echo', name: 'Echo' }, - { id: 'fable', name: 'Fable' }, - { id: 'onyx', name: 'Onyx' }, - { id: 'nova', name: 'Nova' }, - { id: 'shimmer', name: 'Shimmer' } - ] - } - if (!credentialId) { throw new InternalFlowiseError(StatusCodes.BAD_REQUEST, 'Credential ID required for this provider') } @@ -91,6 +48,5 @@ const getVoicesForProvider = async (provider: string, credentialId?: string): Pr } export default { - generateTextToSpeech, getVoices: getVoicesForProvider } diff --git a/packages/server/src/utils/SSEStreamer.ts b/packages/server/src/utils/SSEStreamer.ts index 77d976c09..050e7f4b0 100644 --- a/packages/server/src/utils/SSEStreamer.ts +++ b/packages/server/src/utils/SSEStreamer.ts @@ -269,10 +269,19 @@ export class SSEStreamer implements IServerSideEventStreamer { } } + streamTTSStartEvent(chatId: string, format: string): void { + const client = this.clients[chatId] + if (client) { + const clientResponse = { + event: 'tts_start', + data: { format } + } + client.response.write('message:\ndata:' + JSON.stringify(clientResponse) + '\n\n') + } + } + streamTTSDataEvent(chatId: string, audioChunk: string): void { const client = this.clients[chatId] - console.log('clients', this.clients) - console.log('client', client) if (client) { const clientResponse = { event: 'tts_data', @@ -284,8 +293,6 @@ export class SSEStreamer implements IServerSideEventStreamer { streamTTSEndEvent(chatId: string): void { const client = this.clients[chatId] - console.log('clients', this.clients) - console.log('client', client) if (client) { const clientResponse = { event: 'tts_end', diff --git a/packages/server/src/utils/buildChatflow.ts b/packages/server/src/utils/buildChatflow.ts index 27d0092f7..6f021d68a 100644 --- a/packages/server/src/utils/buildChatflow.ts +++ b/packages/server/src/utils/buildChatflow.ts @@ -6,7 +6,6 @@ import { omit } from 'lodash' import { IFileUpload, convertSpeechToText, - convertTextToSpeech, convertTextToSpeechStream, ICommonObject, addSingleFileToStorage, @@ -123,11 +122,13 @@ const generateTTSForResponseStream = async ( options, (chunk: Buffer) => { const audioBase64 = chunk.toString('base64') - logger.info(`Received TTS chunk: ${audioBase64}`) sseStreamer.streamTTSDataEvent(chatId, audioBase64) }, () => { sseStreamer.streamTTSEndEvent(chatId) + }, + (format: string) => { + sseStreamer.streamTTSStartEvent(chatId, format) } ) } catch (error) { diff --git a/packages/ui/src/ui-component/extended/TextToSpeech.jsx b/packages/ui/src/ui-component/extended/TextToSpeech.jsx index 3b0d306ea..8ce80278a 100644 --- a/packages/ui/src/ui-component/extended/TextToSpeech.jsx +++ b/packages/ui/src/ui-component/extended/TextToSpeech.jsx @@ -58,30 +58,10 @@ const textToSpeechProviders = { { label: 'Voice', name: 'voice', - type: 'options', + type: 'voice_select', description: 'The voice to use when generating the audio', - options: [ - { label: 'Alloy', name: 'alloy' }, - { label: 'Echo', name: 'echo' }, - { label: 'Fable', name: 'fable' }, - { label: 'Onyx', name: 'onyx' }, - { label: 'Nova', name: 'nova' }, - { label: 'Shimmer', name: 'shimmer' } - ], default: 'alloy', optional: true - }, - { - label: 'Model', - name: 'model', - type: 'options', - description: 'The TTS model to use', - options: [ - { label: 'TTS-1', name: 'tts-1' }, - { label: 'TTS-1 HD', name: 'tts-1-hd' } - ], - default: 'tts-1', - optional: true } ] }, diff --git a/packages/ui/src/views/chatmessage/ChatMessage.jsx b/packages/ui/src/views/chatmessage/ChatMessage.jsx index b23d3508b..8b006f449 100644 --- a/packages/ui/src/views/chatmessage/ChatMessage.jsx +++ b/packages/ui/src/views/chatmessage/ChatMessage.jsx @@ -257,6 +257,16 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP const [ttsAudio, setTtsAudio] = useState({}) const [isTTSEnabled, setIsTTSEnabled] = useState(false) + // TTS streaming state + const [ttsStreamingState, setTtsStreamingState] = useState({ + mediaSource: null, + sourceBuffer: null, + audio: null, + chunkQueue: [], + isBuffering: false, + audioFormat: null + }) + const isFileAllowedForUpload = (file) => { const constraints = getAllowChatFlowUploads.data /** @@ -1042,6 +1052,15 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP case 'audio': handleAutoPlayAudio(payload.data) break + case 'tts_start': + handleTTSStart(payload.data.format) + break + case 'tts_data': + handleTTSDataChunk(payload.data) + break + case 'tts_end': + handleTTSEnd() + break case 'end': setLocalStorageChatflow(chatflowid, chatId) closeResponse() @@ -1588,6 +1607,19 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP return } + // Use existing streaming infrastructure for manual TTS + handleTTSStart('mp3', (audio) => { + setTtsAudio((prev) => ({ ...prev, [messageId]: audio })) + + audio.addEventListener('ended', () => { + setTtsAudio((prev) => { + const newState = { ...prev } + delete newState[messageId] + return newState + }) + }) + }) + const response = await fetch('/api/v1/text-to-speech/generate', { method: 'POST', headers: { @@ -1608,23 +1640,48 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP throw new Error(`TTS request failed: ${response.status}`) } - const audioBuffer = await response.arrayBuffer() - const audioBlob = new Blob([audioBuffer], { type: 'audio/mpeg' }) - const audioUrl = URL.createObjectURL(audioBlob) - const audio = new Audio(audioUrl) + const reader = response.body.getReader() + const decoder = new TextDecoder() + let buffer = '' - setTtsAudio((prev) => ({ ...prev, [messageId]: audio })) + let done = false + while (!done) { + const result = await reader.read() + done = result.done + if (done) { + break + } + const value = result.value - audio.addEventListener('ended', () => { - setTtsAudio((prev) => { - const newState = { ...prev } - delete newState[messageId] - return newState - }) - URL.revokeObjectURL(audioUrl) - }) + // Decode the chunk as text and add to buffer + const chunk = decoder.decode(value, { stream: true }) + buffer += chunk - await audio.play() + // Process complete SSE events + const lines = buffer.split('\n\n') + buffer = lines.pop() || '' // Keep incomplete event in buffer + + for (const eventBlock of lines) { + if (eventBlock.trim()) { + const event = parseSSEEvent(eventBlock) + if (event) { + // Handle the event just like the SSE handler does + switch (event.event) { + case 'tts_start': + break + case 'tts_data': + handleTTSDataChunk(event.data) + break + case 'tts_end': + handleTTSEnd() + break + default: + break + } + } + } + } + } } catch (error) { console.error('Error with TTS:', error) enqueueSnackbar({ @@ -1671,6 +1728,268 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP } } + const parseSSEEvent = (eventBlock) => { + const lines = eventBlock.split('\n') + const event = {} + + for (const line of lines) { + if (line.startsWith('event:')) { + event.event = line.substring(6).trim() + } else if (line.startsWith('data:')) { + const dataStr = line.substring(5).trim() + try { + const parsed = JSON.parse(dataStr) + if (parsed.data) { + event.data = parsed.data + } + } catch (e) { + console.error('Error parsing SSE data:', e, 'Raw data:', dataStr) + } + } + } + + return event.event ? event : null + } + + const initializeTTSStreaming = (format, onAudioReady = null) => { + try { + const mediaSource = new MediaSource() + const audio = new Audio() + audio.src = URL.createObjectURL(mediaSource) + + mediaSource.addEventListener('sourceopen', () => { + try { + // Use the provided format, default to MP3 if not set + const mimeType = format === 'mp3' ? 'audio/mpeg' : 'audio/mpeg' + + const sourceBuffer = mediaSource.addSourceBuffer(mimeType) + + setTtsStreamingState((prevState) => ({ + ...prevState, + mediaSource, + sourceBuffer, + audio + })) + + // Start playback + + audio.play().catch((playError) => { + console.error('Error starting audio playback:', playError) + }) + + // Notify callback if provided + if (onAudioReady) { + onAudioReady(audio) + } + } catch (error) { + console.error('Error setting up source buffer:', error) + console.error('MediaSource readyState:', mediaSource.readyState) + console.error('Requested MIME type:', mimeType) + } + }) + + audio.addEventListener('ended', () => { + cleanupTTSStreaming() + }) + } catch (error) { + console.error('Error initializing TTS streaming:', error) + } + } + + const cleanupTTSStreaming = () => { + setTtsStreamingState((prevState) => { + if (prevState.audio) { + prevState.audio.pause() + prevState.audio.removeAttribute('src') + if (prevState.audio.src) { + URL.revokeObjectURL(prevState.audio.src) + } + } + + if (prevState.mediaSource) { + if (prevState.mediaSource.readyState === 'open') { + try { + prevState.mediaSource.endOfStream() + } catch (e) { + // Ignore errors during cleanup + } + } + prevState.mediaSource.removeEventListener('sourceopen', () => {}) + } + + return { + mediaSource: null, + sourceBuffer: null, + audio: null, + chunkQueue: [], + isBuffering: false, + audioFormat: null + } + }) + } + + const processChunkQueue = () => { + setTtsStreamingState((prevState) => { + if (!prevState.sourceBuffer || prevState.sourceBuffer.updating || prevState.chunkQueue.length === 0) { + return prevState + } + + const chunk = prevState.chunkQueue.shift() + + try { + prevState.sourceBuffer.appendBuffer(chunk) + return { + ...prevState, + chunkQueue: [...prevState.chunkQueue], + isBuffering: true + } + } catch (error) { + console.error('Error appending chunk to buffer:', error) + return prevState + } + }) + } + + const handleTTSStart = (format, onAudioReady = null) => { + // Store the audio format for this TTS session and initialize + setTtsStreamingState((prevState) => { + // Cleanup any existing streaming first + if (prevState.audio) { + prevState.audio.pause() + if (prevState.audio.src) { + URL.revokeObjectURL(prevState.audio.src) + } + } + + if (prevState.mediaSource && prevState.mediaSource.readyState === 'open') { + try { + prevState.mediaSource.endOfStream() + } catch (e) { + // Ignore errors during cleanup + } + } + + return { + mediaSource: null, + sourceBuffer: null, + audio: null, + chunkQueue: [], + isBuffering: false, + audioFormat: format + } + }) + + // Initialize TTS streaming with the correct format + setTimeout(() => initializeTTSStreaming(format, onAudioReady), 0) + } + + const handleTTSDataChunk = (base64Data) => { + try { + const audioBuffer = Uint8Array.from(atob(base64Data), (c) => c.charCodeAt(0)) + + setTtsStreamingState((prevState) => { + // Add chunk to queue + const newState = { + ...prevState, + chunkQueue: [...prevState.chunkQueue, audioBuffer] + } + + // Process queue if sourceBuffer is ready + if (prevState.sourceBuffer && !prevState.sourceBuffer.updating) { + setTimeout(() => processChunkQueue(), 0) + } + + return newState + }) + } catch (error) { + console.error('Error handling TTS data chunk:', error) + } + } + + const handleTTSEnd = () => { + setTtsStreamingState((prevState) => { + if (prevState.mediaSource && prevState.mediaSource.readyState === 'open') { + try { + // Process any remaining chunks first + if (prevState.sourceBuffer && prevState.chunkQueue.length > 0 && !prevState.sourceBuffer.updating) { + const remainingChunks = [...prevState.chunkQueue] + remainingChunks.forEach((chunk, index) => { + setTimeout(() => { + if (prevState.sourceBuffer && !prevState.sourceBuffer.updating) { + try { + prevState.sourceBuffer.appendBuffer(chunk) + if (index === remainingChunks.length - 1) { + // End stream after last chunk + setTimeout(() => { + if (prevState.mediaSource && prevState.mediaSource.readyState === 'open') { + prevState.mediaSource.endOfStream() + } + }, 100) + } + } catch (error) { + console.error('Error appending remaining chunk:', error) + } + } + }, index * 50) + }) + return { + ...prevState, + chunkQueue: [] + } + } + + // Wait for any pending buffer operations to complete + if (prevState.sourceBuffer && !prevState.sourceBuffer.updating) { + prevState.mediaSource.endOfStream() + } else if (prevState.sourceBuffer) { + // Wait for buffer to finish updating + prevState.sourceBuffer.addEventListener( + 'updateend', + () => { + if (prevState.mediaSource && prevState.mediaSource.readyState === 'open') { + prevState.mediaSource.endOfStream() + } + }, + { once: true } + ) + } + } catch (error) { + console.error('Error ending TTS stream:', error) + } + } + return prevState + }) + } + + // Set up sourceBuffer event listeners when it changes + useEffect(() => { + if (ttsStreamingState.sourceBuffer) { + const sourceBuffer = ttsStreamingState.sourceBuffer + + const handleUpdateEnd = () => { + setTtsStreamingState((prevState) => ({ + ...prevState, + isBuffering: false + })) + // Process next chunk in queue + setTimeout(() => processChunkQueue(), 0) + } + + sourceBuffer.addEventListener('updateend', handleUpdateEnd) + + return () => { + sourceBuffer.removeEventListener('updateend', handleUpdateEnd) + } + } + }, [ttsStreamingState.sourceBuffer]) + + // Cleanup TTS streaming on component unmount + useEffect(() => { + return () => { + cleanupTTSStreaming() + } + }, []) + const getInputDisabled = () => { return ( loading ||