Refactor Text-to-Speech Provider Selection and Enhance UI Components
- Updated the text-to-speech controller to select the active provider based on status instead of the first available provider - Added audio waveform controls and test audio functionality in the TextToSpeech component, allowing users to play and pause test audio - Integrated Autocomplete for voice selection in the TextToSpeech component - Implemented TTS action management in ChatMessage to prevent auto-scrolling during TTS actions
This commit is contained in:
parent
5ea714098d
commit
123ab3c85e
|
|
@ -34,17 +34,17 @@ const generateTextToSpeech = async (req: Request, res: Response) => {
|
|||
const chatflow = await chatflowsService.getChatflowById(chatflowId)
|
||||
const ttsConfig = JSON.parse(chatflow.textToSpeech)
|
||||
|
||||
// Extract the first provider config (assuming single provider per chatflow)
|
||||
const providerKey = Object.keys(ttsConfig)[0]
|
||||
if (!providerKey) {
|
||||
// Find the provider with status: true
|
||||
const activeProviderKey = Object.keys(ttsConfig).find((key) => ttsConfig[key].status === true)
|
||||
if (!activeProviderKey) {
|
||||
throw new InternalFlowiseError(
|
||||
StatusCodes.BAD_REQUEST,
|
||||
`Error: textToSpeechController.generateTextToSpeech - no TTS provider configured in chatflow!`
|
||||
`Error: textToSpeechController.generateTextToSpeech - no active TTS provider configured in chatflow!`
|
||||
)
|
||||
}
|
||||
|
||||
const providerConfig = ttsConfig[providerKey]
|
||||
provider = providerKey
|
||||
const providerConfig = ttsConfig[activeProviderKey]
|
||||
provider = activeProviderKey
|
||||
credentialId = providerConfig.credentialId
|
||||
voice = providerConfig.voice
|
||||
model = providerConfig.model
|
||||
|
|
|
|||
|
|
@ -0,0 +1,311 @@
|
|||
import { useRef, useEffect, useState, useCallback } from 'react'
|
||||
import PropTypes from 'prop-types'
|
||||
import { Box, IconButton, CircularProgress } from '@mui/material'
|
||||
import { IconPlayerPlay, IconPlayerPause } from '@tabler/icons-react'
|
||||
import { useTheme } from '@mui/material/styles'
|
||||
|
||||
const AudioWaveform = ({
|
||||
audioSrc,
|
||||
onPlay,
|
||||
onPause,
|
||||
onEnded,
|
||||
isPlaying = false,
|
||||
duration: _duration = 0,
|
||||
isGenerating = false,
|
||||
disabled = false,
|
||||
externalAudioRef = null,
|
||||
resetProgress = false
|
||||
}) => {
|
||||
const canvasRef = useRef(null)
|
||||
const audioRef = useRef(null)
|
||||
const animationRef = useRef(null)
|
||||
const theme = useTheme()
|
||||
|
||||
const [progress, setProgress] = useState(0)
|
||||
const [_audioBuffer, setAudioBuffer] = useState(null)
|
||||
const [waveformData, setWaveformData] = useState([])
|
||||
|
||||
// Generate waveform visualization data
|
||||
const generateWaveform = useCallback((buffer) => {
|
||||
if (!buffer) return []
|
||||
|
||||
const rawData = buffer.getChannelData(0)
|
||||
const samples = 200 // More bars for smoother appearance like reference
|
||||
const blockSize = Math.floor(rawData.length / samples)
|
||||
const filteredData = []
|
||||
|
||||
for (let i = 0; i < samples; i++) {
|
||||
let blockStart = blockSize * i
|
||||
let sum = 0
|
||||
for (let j = 0; j < blockSize; j++) {
|
||||
sum += Math.abs(rawData[blockStart + j])
|
||||
}
|
||||
filteredData.push(sum / blockSize)
|
||||
}
|
||||
|
||||
// Normalize the data
|
||||
const maxValue = Math.max(...filteredData)
|
||||
return filteredData.map((value) => (value / maxValue) * 100)
|
||||
}, [])
|
||||
|
||||
// Generate realistic placeholder waveform like in reference
|
||||
const generatePlaceholderWaveform = useCallback(() => {
|
||||
const samples = 200
|
||||
const waveform = []
|
||||
|
||||
for (let i = 0; i < samples; i++) {
|
||||
// Create a more realistic waveform pattern
|
||||
const position = i / samples
|
||||
const baseHeight = 20 + Math.sin(position * Math.PI * 4) * 15
|
||||
const variation = Math.random() * 40 + 10
|
||||
const envelope = Math.sin(position * Math.PI) * 0.8 + 0.2
|
||||
|
||||
waveform.push((baseHeight + variation) * envelope)
|
||||
}
|
||||
|
||||
return waveform
|
||||
}, [])
|
||||
|
||||
// Draw waveform on canvas
|
||||
const drawWaveform = useCallback(() => {
|
||||
const canvas = canvasRef.current
|
||||
if (!canvas || waveformData.length === 0) return
|
||||
|
||||
const ctx = canvas.getContext('2d')
|
||||
|
||||
// Handle high DPI displays for crisp rendering
|
||||
const dpr = window.devicePixelRatio || 1
|
||||
const rect = canvas.getBoundingClientRect()
|
||||
|
||||
canvas.width = rect.width * dpr
|
||||
canvas.height = rect.height * dpr
|
||||
ctx.scale(dpr, dpr)
|
||||
|
||||
canvas.style.width = rect.width + 'px'
|
||||
canvas.style.height = rect.height + 'px'
|
||||
|
||||
ctx.clearRect(0, 0, rect.width, rect.height)
|
||||
|
||||
// More bars for smoother appearance like the reference
|
||||
const totalBars = waveformData.length
|
||||
const barWidth = 2 // Fixed thin bar width like in reference
|
||||
const barSpacing = 1 // Small gap between bars
|
||||
const totalWidth = rect.width
|
||||
const startX = (totalWidth - totalBars * (barWidth + barSpacing)) / 2
|
||||
const centerY = rect.height / 2
|
||||
|
||||
waveformData.forEach((value, index) => {
|
||||
const barHeight = Math.max(2, (value / 100) * (rect.height * 0.8))
|
||||
const x = startX + index * (barWidth + barSpacing)
|
||||
|
||||
// Determine color based on playback progress
|
||||
const progressIndex = Math.floor((progress / 100) * waveformData.length)
|
||||
const isPlayed = index <= progressIndex
|
||||
|
||||
ctx.fillStyle = isPlayed ? theme.palette.primary.main : theme.palette.mode === 'dark' ? '#444' : '#ccc'
|
||||
|
||||
// Draw thin vertical bars like in reference
|
||||
ctx.fillRect(x, centerY - barHeight / 2, barWidth, barHeight)
|
||||
})
|
||||
}, [waveformData, progress, theme])
|
||||
|
||||
// Load and decode audio for waveform generation
|
||||
useEffect(() => {
|
||||
if (audioSrc && audioSrc.startsWith('blob:')) {
|
||||
const loadAudioBuffer = async () => {
|
||||
try {
|
||||
const response = await fetch(audioSrc)
|
||||
const arrayBuffer = await response.arrayBuffer()
|
||||
const audioContext = new (window.AudioContext || window.webkitAudioContext)()
|
||||
const buffer = await audioContext.decodeAudioData(arrayBuffer)
|
||||
setAudioBuffer(buffer)
|
||||
const waveform = generateWaveform(buffer)
|
||||
setWaveformData(waveform)
|
||||
} catch (error) {
|
||||
console.error('Error loading audio buffer:', error)
|
||||
// Generate placeholder waveform
|
||||
const placeholder = generatePlaceholderWaveform()
|
||||
setWaveformData(placeholder)
|
||||
}
|
||||
}
|
||||
loadAudioBuffer()
|
||||
} else {
|
||||
// Always show placeholder waveform when no audio source
|
||||
const placeholder = generatePlaceholderWaveform()
|
||||
setWaveformData(placeholder)
|
||||
}
|
||||
}, [audioSrc, generateWaveform, generatePlaceholderWaveform])
|
||||
|
||||
// Reset progress when resetProgress prop is true
|
||||
useEffect(() => {
|
||||
if (resetProgress) {
|
||||
setProgress(0)
|
||||
}
|
||||
}, [resetProgress])
|
||||
|
||||
// Draw waveform when data changes or progress updates
|
||||
useEffect(() => {
|
||||
drawWaveform()
|
||||
}, [drawWaveform, progress])
|
||||
|
||||
// Update progress during playback
|
||||
useEffect(() => {
|
||||
const activeAudioRef = externalAudioRef || audioRef.current
|
||||
if (isPlaying && activeAudioRef && audioSrc) {
|
||||
const updateProgress = () => {
|
||||
const audio = externalAudioRef || audioRef.current
|
||||
if (audio && audio.duration && !isNaN(audio.duration)) {
|
||||
const currentProgress = (audio.currentTime / audio.duration) * 100
|
||||
setProgress(currentProgress)
|
||||
}
|
||||
if (isPlaying && audio && !audio.paused) {
|
||||
animationRef.current = requestAnimationFrame(updateProgress)
|
||||
}
|
||||
}
|
||||
|
||||
// Start the update loop
|
||||
animationRef.current = requestAnimationFrame(updateProgress)
|
||||
} else {
|
||||
if (animationRef.current) {
|
||||
cancelAnimationFrame(animationRef.current)
|
||||
}
|
||||
}
|
||||
|
||||
return () => {
|
||||
if (animationRef.current) {
|
||||
cancelAnimationFrame(animationRef.current)
|
||||
}
|
||||
}
|
||||
}, [isPlaying, audioSrc, externalAudioRef])
|
||||
|
||||
const handlePlayPause = () => {
|
||||
if (isPlaying) {
|
||||
onPause?.()
|
||||
} else {
|
||||
onPlay?.()
|
||||
}
|
||||
}
|
||||
|
||||
// Handle canvas click for seeking
|
||||
const handleCanvasClick = (event) => {
|
||||
const activeAudio = externalAudioRef || audioRef.current
|
||||
if (!activeAudio || !activeAudio.duration || disabled || isGenerating) return
|
||||
|
||||
const canvas = canvasRef.current
|
||||
const rect = canvas.getBoundingClientRect()
|
||||
const clickX = event.clientX - rect.left
|
||||
|
||||
// Use the actual canvas display width for more accurate clicking
|
||||
const clickProgress = Math.max(0, Math.min(100, (clickX / rect.width) * 100))
|
||||
const seekTime = (clickProgress / 100) * activeAudio.duration
|
||||
|
||||
activeAudio.currentTime = seekTime
|
||||
setProgress(clickProgress)
|
||||
}
|
||||
|
||||
return (
|
||||
<Box sx={{ width: '100%' }}>
|
||||
{/* Hidden audio element for duration and seeking - only if no external ref */}
|
||||
{audioSrc && !externalAudioRef && (
|
||||
<audio
|
||||
ref={audioRef}
|
||||
src={audioSrc}
|
||||
onLoadedMetadata={() => {
|
||||
if (audioRef.current) {
|
||||
setProgress(0)
|
||||
}
|
||||
}}
|
||||
onTimeUpdate={() => {
|
||||
// Additional progress update on timeupdate event
|
||||
const audio = audioRef.current
|
||||
if (audio && audio.duration && !isNaN(audio.duration)) {
|
||||
const currentProgress = (audio.currentTime / audio.duration) * 100
|
||||
setProgress(currentProgress)
|
||||
}
|
||||
}}
|
||||
onEnded={() => {
|
||||
setProgress(0)
|
||||
onEnded?.()
|
||||
}}
|
||||
style={{ display: 'none' }}
|
||||
>
|
||||
<track kind='captions' />
|
||||
</audio>
|
||||
)}
|
||||
|
||||
{/* Play button and Waveform side by side */}
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
{/* Play/Pause Button */}
|
||||
<IconButton
|
||||
onClick={handlePlayPause}
|
||||
disabled={disabled || isGenerating}
|
||||
size='small'
|
||||
sx={{
|
||||
width: 32,
|
||||
height: 32,
|
||||
flexShrink: 0,
|
||||
backgroundColor: isPlaying ? 'transparent' : theme.palette.primary.main,
|
||||
color: isPlaying ? theme.palette.primary.main : 'white',
|
||||
border: isPlaying ? `1px solid ${theme.palette.primary.main}` : 'none',
|
||||
'&:hover': {
|
||||
backgroundColor: isPlaying ? theme.palette.primary.main : theme.palette.primary.dark,
|
||||
color: 'white'
|
||||
},
|
||||
'&:disabled': {
|
||||
backgroundColor: theme.palette.action.disabled,
|
||||
color: theme.palette.action.disabled,
|
||||
border: 'none'
|
||||
}
|
||||
}}
|
||||
>
|
||||
{isGenerating ? (
|
||||
<CircularProgress size={16} />
|
||||
) : isPlaying ? (
|
||||
<IconPlayerPause size={16} />
|
||||
) : (
|
||||
<IconPlayerPlay size={16} />
|
||||
)}
|
||||
</IconButton>
|
||||
|
||||
{/* Waveform Canvas */}
|
||||
<Box
|
||||
sx={{
|
||||
flex: 1,
|
||||
cursor: !disabled && !isGenerating && audioSrc ? 'pointer' : 'default',
|
||||
display: 'flex',
|
||||
alignItems: 'center'
|
||||
}}
|
||||
>
|
||||
<canvas
|
||||
ref={canvasRef}
|
||||
width={400}
|
||||
height={32}
|
||||
onClick={handleCanvasClick}
|
||||
style={{
|
||||
width: '100%',
|
||||
height: '32px',
|
||||
backgroundColor: 'transparent',
|
||||
opacity: disabled ? 0.6 : 1,
|
||||
display: 'block'
|
||||
}}
|
||||
/>
|
||||
</Box>
|
||||
</Box>
|
||||
</Box>
|
||||
)
|
||||
}
|
||||
|
||||
AudioWaveform.propTypes = {
|
||||
audioSrc: PropTypes.string,
|
||||
onPlay: PropTypes.func,
|
||||
onPause: PropTypes.func,
|
||||
onEnded: PropTypes.func,
|
||||
isPlaying: PropTypes.bool,
|
||||
duration: PropTypes.number,
|
||||
isGenerating: PropTypes.bool,
|
||||
disabled: PropTypes.bool,
|
||||
externalAudioRef: PropTypes.object,
|
||||
resetProgress: PropTypes.bool
|
||||
}
|
||||
|
||||
export default AudioWaveform
|
||||
|
|
@ -402,7 +402,15 @@ const SpeechToText = ({ dialogProps }) => {
|
|||
sx={{ ml: 1 }}
|
||||
primary={speechToTextProviders[selectedProvider].label}
|
||||
secondary={
|
||||
<a target='_blank' rel='noreferrer' href={speechToTextProviders[selectedProvider].url}>
|
||||
<a
|
||||
target='_blank'
|
||||
rel='noreferrer'
|
||||
href={speechToTextProviders[selectedProvider].url}
|
||||
style={{
|
||||
color: theme?.customization?.isDarkMode ? '#90caf9' : '#1976d2',
|
||||
textDecoration: 'underline'
|
||||
}}
|
||||
>
|
||||
{speechToTextProviders[selectedProvider].url}
|
||||
</a>
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,7 +14,9 @@ import {
|
|||
ListItemText,
|
||||
MenuItem,
|
||||
Select,
|
||||
CircularProgress
|
||||
CircularProgress,
|
||||
Autocomplete,
|
||||
TextField
|
||||
} from '@mui/material'
|
||||
import { IconX, IconVolume } from '@tabler/icons-react'
|
||||
import { useTheme } from '@mui/material/styles'
|
||||
|
|
@ -26,6 +28,7 @@ import { SwitchInput } from '@/ui-component/switch/Switch'
|
|||
import { Input } from '@/ui-component/input/Input'
|
||||
import { StyledButton } from '@/ui-component/button/StyledButton'
|
||||
import { Dropdown } from '@/ui-component/dropdown/Dropdown'
|
||||
import AudioWaveform from '@/ui-component/extended/AudioWaveform'
|
||||
import openAISVG from '@/assets/images/openai.svg'
|
||||
import elevenLabsSVG from '@/assets/images/elevenlabs.svg'
|
||||
|
||||
|
|
@ -102,6 +105,21 @@ const TextToSpeech = ({ dialogProps }) => {
|
|||
const [selectedProvider, setSelectedProvider] = useState('none')
|
||||
const [voices, setVoices] = useState([])
|
||||
const [loadingVoices, setLoadingVoices] = useState(false)
|
||||
const [testAudioSrc, setTestAudioSrc] = useState(null)
|
||||
const [isTestPlaying, setIsTestPlaying] = useState(false)
|
||||
const [testAudioRef, setTestAudioRef] = useState(null)
|
||||
const [isGeneratingTest, setIsGeneratingTest] = useState(false)
|
||||
const [resetWaveform, setResetWaveform] = useState(false)
|
||||
|
||||
const resetTestAudio = () => {
|
||||
if (testAudioSrc) {
|
||||
URL.revokeObjectURL(testAudioSrc)
|
||||
setTestAudioSrc(null)
|
||||
}
|
||||
setIsTestPlaying(false)
|
||||
setResetWaveform(true)
|
||||
setTimeout(() => setResetWaveform(false), 100)
|
||||
}
|
||||
|
||||
const onSave = async () => {
|
||||
const textToSpeechConfig = setValue(true, selectedProvider, 'status')
|
||||
|
|
@ -164,16 +182,24 @@ const TextToSpeech = ({ dialogProps }) => {
|
|||
newVal['none'].status = false
|
||||
}
|
||||
}
|
||||
|
||||
// Reset test audio when voice or credential is changed
|
||||
if ((inputParamName === 'voice' || inputParamName === 'credentialId') && providerName === selectedProvider) {
|
||||
resetTestAudio()
|
||||
}
|
||||
|
||||
setTextToSpeech(newVal)
|
||||
return newVal
|
||||
}
|
||||
|
||||
const handleProviderChange = (provider, configOverride = null) => {
|
||||
setSelectedProvider(() => provider)
|
||||
setSelectedProvider(provider)
|
||||
setVoices([])
|
||||
resetTestAudio()
|
||||
|
||||
if (provider !== 'none') {
|
||||
const config = configOverride || textToSpeech
|
||||
const credentialId = config[provider]?.credentialId
|
||||
const credentialId = config?.[provider]?.credentialId
|
||||
if (credentialId) {
|
||||
loadVoicesForProvider(provider, credentialId)
|
||||
}
|
||||
|
|
@ -213,6 +239,8 @@ const TextToSpeech = ({ dialogProps }) => {
|
|||
return
|
||||
}
|
||||
|
||||
setIsGeneratingTest(true)
|
||||
|
||||
try {
|
||||
const providerConfig = textToSpeech?.[selectedProvider] || {}
|
||||
const body = {
|
||||
|
|
@ -223,74 +251,6 @@ const TextToSpeech = ({ dialogProps }) => {
|
|||
model: providerConfig.model
|
||||
}
|
||||
|
||||
// Use streaming approach like in ChatMessage.jsx
|
||||
const mediaSource = new MediaSource()
|
||||
const audio = new Audio()
|
||||
audio.src = URL.createObjectURL(mediaSource)
|
||||
|
||||
const streamingState = {
|
||||
mediaSource,
|
||||
sourceBuffer: null,
|
||||
audio,
|
||||
chunkQueue: [],
|
||||
isBuffering: false,
|
||||
abortController: new AbortController(),
|
||||
streamEnded: false
|
||||
}
|
||||
|
||||
mediaSource.addEventListener('sourceopen', () => {
|
||||
try {
|
||||
const mimeType = 'audio/mpeg'
|
||||
streamingState.sourceBuffer = mediaSource.addSourceBuffer(mimeType)
|
||||
|
||||
streamingState.sourceBuffer.addEventListener('updateend', () => {
|
||||
streamingState.isBuffering = false
|
||||
if (streamingState.chunkQueue.length > 0 && !streamingState.sourceBuffer.updating) {
|
||||
const chunk = streamingState.chunkQueue.shift()
|
||||
try {
|
||||
streamingState.sourceBuffer.appendBuffer(chunk)
|
||||
streamingState.isBuffering = true
|
||||
} catch (error) {
|
||||
console.error('Error appending chunk:', error)
|
||||
}
|
||||
} else if (streamingState.streamEnded && streamingState.chunkQueue.length === 0) {
|
||||
// All chunks processed and stream ended, now we can safely end the stream
|
||||
try {
|
||||
if (streamingState.mediaSource.readyState === 'open') {
|
||||
streamingState.mediaSource.endOfStream()
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error ending MediaSource stream:', error)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
audio.play().catch((playError) => {
|
||||
console.error('Error starting audio playback:', playError)
|
||||
})
|
||||
} catch (error) {
|
||||
console.error('Error setting up source buffer:', error)
|
||||
}
|
||||
})
|
||||
|
||||
audio.addEventListener('playing', () => {
|
||||
enqueueSnackbar({
|
||||
message: 'Test audio playing...',
|
||||
options: { variant: 'info' }
|
||||
})
|
||||
})
|
||||
|
||||
audio.addEventListener('ended', () => {
|
||||
enqueueSnackbar({
|
||||
message: 'Test audio completed successfully',
|
||||
options: { variant: 'success' }
|
||||
})
|
||||
// Cleanup
|
||||
if (streamingState.audio.src) {
|
||||
URL.revokeObjectURL(streamingState.audio.src)
|
||||
}
|
||||
})
|
||||
|
||||
const response = await fetch('/api/v1/text-to-speech/generate', {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
|
|
@ -298,24 +258,19 @@ const TextToSpeech = ({ dialogProps }) => {
|
|||
'x-request-from': 'internal'
|
||||
},
|
||||
credentials: 'include',
|
||||
body: JSON.stringify(body),
|
||||
signal: streamingState.abortController.signal
|
||||
body: JSON.stringify(body)
|
||||
})
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP error! status: ${response.status}`)
|
||||
}
|
||||
|
||||
const audioChunks = []
|
||||
const reader = response.body.getReader()
|
||||
let buffer = ''
|
||||
|
||||
let done = false
|
||||
while (!done) {
|
||||
if (streamingState.abortController.signal.aborted) {
|
||||
reader.cancel()
|
||||
break
|
||||
}
|
||||
|
||||
const result = await reader.read()
|
||||
done = result.done
|
||||
if (done) break
|
||||
|
|
@ -328,51 +283,45 @@ const TextToSpeech = ({ dialogProps }) => {
|
|||
for (const eventBlock of lines) {
|
||||
if (eventBlock.trim()) {
|
||||
const event = parseSSEEvent(eventBlock)
|
||||
if (event) {
|
||||
switch (event.event) {
|
||||
case 'tts_data':
|
||||
if (event.data?.audioChunk) {
|
||||
const audioBuffer = Uint8Array.from(atob(event.data.audioChunk), (c) => c.charCodeAt(0))
|
||||
streamingState.chunkQueue.push(audioBuffer)
|
||||
|
||||
if (streamingState.sourceBuffer && !streamingState.sourceBuffer.updating) {
|
||||
const chunk = streamingState.chunkQueue.shift()
|
||||
try {
|
||||
streamingState.sourceBuffer.appendBuffer(chunk)
|
||||
streamingState.isBuffering = true
|
||||
} catch (error) {
|
||||
console.error('Error appending initial chunk:', error)
|
||||
}
|
||||
}
|
||||
}
|
||||
break
|
||||
case 'tts_end':
|
||||
streamingState.streamEnded = true
|
||||
// Check if we can end the stream immediately (no chunks queued and not updating)
|
||||
if (
|
||||
streamingState.sourceBuffer &&
|
||||
streamingState.chunkQueue.length === 0 &&
|
||||
!streamingState.sourceBuffer.updating &&
|
||||
streamingState.mediaSource.readyState === 'open'
|
||||
) {
|
||||
try {
|
||||
streamingState.mediaSource.endOfStream()
|
||||
} catch (error) {
|
||||
console.error('Error ending MediaSource stream:', error)
|
||||
}
|
||||
}
|
||||
break
|
||||
}
|
||||
if (event && event.event === 'tts_data' && event.data?.audioChunk) {
|
||||
const audioBuffer = Uint8Array.from(atob(event.data.audioChunk), (c) => c.charCodeAt(0))
|
||||
audioChunks.push(audioBuffer)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (audioChunks.length > 0) {
|
||||
// Combine all chunks into a single blob
|
||||
const totalLength = audioChunks.reduce((sum, chunk) => sum + chunk.length, 0)
|
||||
const combinedBuffer = new Uint8Array(totalLength)
|
||||
let offset = 0
|
||||
|
||||
for (const chunk of audioChunks) {
|
||||
combinedBuffer.set(chunk, offset)
|
||||
offset += chunk.length
|
||||
}
|
||||
|
||||
const audioBlob = new Blob([combinedBuffer], { type: 'audio/mpeg' })
|
||||
const audioUrl = URL.createObjectURL(audioBlob)
|
||||
|
||||
// Clean up previous audio
|
||||
if (testAudioSrc) {
|
||||
URL.revokeObjectURL(testAudioSrc)
|
||||
}
|
||||
|
||||
setTestAudioSrc(audioUrl)
|
||||
} else {
|
||||
throw new Error('No audio data received')
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error testing TTS:', error)
|
||||
enqueueSnackbar({
|
||||
message: `TTS test failed: ${error.message}`,
|
||||
options: { variant: 'error' }
|
||||
})
|
||||
} finally {
|
||||
setIsGeneratingTest(false)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -398,6 +347,46 @@ const TextToSpeech = ({ dialogProps }) => {
|
|||
return event.event ? event : null
|
||||
}
|
||||
|
||||
// Audio control functions for waveform component
|
||||
const handleTestPlay = async () => {
|
||||
// If audio already exists, just play it
|
||||
if (testAudioRef && testAudioSrc) {
|
||||
testAudioRef.play()
|
||||
setIsTestPlaying(true)
|
||||
return
|
||||
}
|
||||
|
||||
// If no audio exists, generate it first
|
||||
if (!testAudioSrc) {
|
||||
await testTTS()
|
||||
// testTTS will set the audio source, and we'll play it in the next useEffect
|
||||
}
|
||||
}
|
||||
|
||||
const handleTestPause = () => {
|
||||
if (testAudioRef) {
|
||||
testAudioRef.pause()
|
||||
setIsTestPlaying(false)
|
||||
}
|
||||
}
|
||||
|
||||
const handleTestEnded = () => {
|
||||
setIsTestPlaying(false)
|
||||
}
|
||||
|
||||
// Auto-play when audio is generated (if user clicked play)
|
||||
useEffect(() => {
|
||||
if (testAudioSrc && testAudioRef && !isTestPlaying) {
|
||||
// Small delay to ensure audio element is ready
|
||||
setTimeout(() => {
|
||||
testAudioRef.play()
|
||||
setIsTestPlaying(true)
|
||||
}, 100)
|
||||
}
|
||||
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [testAudioSrc, testAudioRef])
|
||||
|
||||
useEffect(() => {
|
||||
if (dialogProps.chatflow && dialogProps.chatflow.textToSpeech) {
|
||||
try {
|
||||
|
|
@ -422,6 +411,7 @@ const TextToSpeech = ({ dialogProps }) => {
|
|||
setTextToSpeech(null)
|
||||
setSelectedProvider('none')
|
||||
setVoices([])
|
||||
resetTestAudio()
|
||||
}
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, [dialogProps])
|
||||
|
|
@ -482,7 +472,15 @@ const TextToSpeech = ({ dialogProps }) => {
|
|||
sx={{ ml: 1 }}
|
||||
primary={textToSpeechProviders[selectedProvider].label}
|
||||
secondary={
|
||||
<a target='_blank' rel='noreferrer' href={textToSpeechProviders[selectedProvider].url}>
|
||||
<a
|
||||
target='_blank'
|
||||
rel='noreferrer'
|
||||
href={textToSpeechProviders[selectedProvider].url}
|
||||
style={{
|
||||
color: theme?.customization?.isDarkMode ? '#90caf9' : '#1976d2',
|
||||
textDecoration: 'underline'
|
||||
}}
|
||||
>
|
||||
{textToSpeechProviders[selectedProvider].url}
|
||||
</a>
|
||||
}
|
||||
|
|
@ -551,25 +549,38 @@ const TextToSpeech = ({ dialogProps }) => {
|
|||
/>
|
||||
)}
|
||||
{inputParam.type === 'voice_select' && (
|
||||
<Box>
|
||||
{loadingVoices ? (
|
||||
<Box sx={{ display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
<CircularProgress size={16} />
|
||||
<Typography variant='body2'>Loading voices...</Typography>
|
||||
</Box>
|
||||
) : (
|
||||
<Dropdown
|
||||
name={inputParam.name}
|
||||
options={voices.map((voice) => ({ label: voice.name, name: voice.id }))}
|
||||
onSelect={(newValue) => setValue(newValue, selectedProvider, inputParam.name)}
|
||||
value={
|
||||
textToSpeech?.[selectedProvider]
|
||||
? textToSpeech[selectedProvider][inputParam.name]
|
||||
: inputParam.default ?? 'choose a voice'
|
||||
}
|
||||
<Autocomplete
|
||||
size='small'
|
||||
sx={{ mt: 1 }}
|
||||
options={voices}
|
||||
loading={loadingVoices}
|
||||
getOptionLabel={(option) => option.name || ''}
|
||||
value={
|
||||
voices.find(
|
||||
(voice) =>
|
||||
voice.id === (textToSpeech?.[selectedProvider]?.[inputParam.name] || inputParam.default)
|
||||
) || null
|
||||
}
|
||||
onChange={(event, newValue) => {
|
||||
setValue(newValue ? newValue.id : '', selectedProvider, inputParam.name)
|
||||
}}
|
||||
renderInput={(params) => (
|
||||
<TextField
|
||||
{...params}
|
||||
placeholder={loadingVoices ? 'Loading voices...' : 'Choose a voice'}
|
||||
InputProps={{
|
||||
...params.InputProps,
|
||||
endAdornment: (
|
||||
<>
|
||||
{loadingVoices ? <CircularProgress color='inherit' size={20} /> : null}
|
||||
{params.InputProps.endAdornment}
|
||||
</>
|
||||
)
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
disabled={loadingVoices || !textToSpeech?.[selectedProvider]?.credentialId}
|
||||
/>
|
||||
)}
|
||||
</Box>
|
||||
))}
|
||||
|
|
@ -591,17 +602,42 @@ const TextToSpeech = ({ dialogProps }) => {
|
|||
/>
|
||||
</Box>
|
||||
|
||||
{/* Test TTS Button */}
|
||||
{/* Test Voice Section */}
|
||||
<Box sx={{ p: 2 }}>
|
||||
<StyledButton
|
||||
variant='outlined'
|
||||
size='small'
|
||||
startIcon={<IconVolume />}
|
||||
onClick={testTTS}
|
||||
disabled={!textToSpeech?.[selectedProvider]?.credentialId}
|
||||
>
|
||||
<Typography variant='h6' sx={{ mb: 2, display: 'flex', alignItems: 'center', gap: 1 }}>
|
||||
<IconVolume size={20} />
|
||||
Test Voice
|
||||
</StyledButton>
|
||||
</Typography>
|
||||
|
||||
<Typography variant='body2' color='textSecondary' sx={{ mb: 2 }}>
|
||||
Test text: "Today is a wonderful day to build something with Flowise!"
|
||||
</Typography>
|
||||
|
||||
<AudioWaveform
|
||||
audioSrc={testAudioSrc}
|
||||
onPlay={handleTestPlay}
|
||||
onPause={handleTestPause}
|
||||
onEnded={handleTestEnded}
|
||||
isPlaying={isTestPlaying}
|
||||
isGenerating={isGeneratingTest}
|
||||
disabled={!textToSpeech?.[selectedProvider]?.credentialId}
|
||||
externalAudioRef={testAudioRef}
|
||||
resetProgress={resetWaveform}
|
||||
/>
|
||||
|
||||
{/* Hidden audio element for waveform control */}
|
||||
{testAudioSrc && (
|
||||
<audio
|
||||
ref={(ref) => setTestAudioRef(ref)}
|
||||
src={testAudioSrc}
|
||||
onPlay={() => setIsTestPlaying(true)}
|
||||
onPause={() => setIsTestPlaying(false)}
|
||||
onEnded={handleTestEnded}
|
||||
style={{ display: 'none' }}
|
||||
>
|
||||
<track kind='captions' />
|
||||
</audio>
|
||||
)}
|
||||
</Box>
|
||||
</>
|
||||
)}
|
||||
|
|
|
|||
|
|
@ -39,8 +39,7 @@ import {
|
|||
IconCheck,
|
||||
IconPaperclip,
|
||||
IconSparkles,
|
||||
IconVolume,
|
||||
IconSquare
|
||||
IconVolume
|
||||
} from '@tabler/icons-react'
|
||||
import robotPNG from '@/assets/images/robot.png'
|
||||
import userPNG from '@/assets/images/account.png'
|
||||
|
|
@ -270,6 +269,10 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
abortController: null
|
||||
})
|
||||
|
||||
// Ref to prevent auto-scroll during TTS actions (using ref to avoid re-renders)
|
||||
const isTTSActionRef = useRef(false)
|
||||
const ttsTimeoutRef = useRef(null)
|
||||
|
||||
const isFileAllowedForUpload = (file) => {
|
||||
const constraints = getAllowChatFlowUploads.data
|
||||
/**
|
||||
|
|
@ -555,6 +558,22 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
}
|
||||
}
|
||||
|
||||
// Helper function to manage TTS action flag
|
||||
const setTTSAction = (isActive) => {
|
||||
isTTSActionRef.current = isActive
|
||||
if (ttsTimeoutRef.current) {
|
||||
clearTimeout(ttsTimeoutRef.current)
|
||||
ttsTimeoutRef.current = null
|
||||
}
|
||||
if (isActive) {
|
||||
// Reset the flag after a longer delay to ensure all state changes are complete
|
||||
ttsTimeoutRef.current = setTimeout(() => {
|
||||
isTTSActionRef.current = false
|
||||
ttsTimeoutRef.current = null
|
||||
}, 300)
|
||||
}
|
||||
}
|
||||
|
||||
const onChange = useCallback((e) => setUserInput(e.target.value), [setUserInput])
|
||||
|
||||
const updateLastMessage = (text) => {
|
||||
|
|
@ -1374,9 +1393,11 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
}
|
||||
}, [isChatFlowAvailableForRAGFileUploads, fullFileUpload])
|
||||
|
||||
// Auto scroll chat to bottom
|
||||
// Auto scroll chat to bottom (but not during TTS actions)
|
||||
useEffect(() => {
|
||||
scrollToBottom()
|
||||
if (!isTTSActionRef.current) {
|
||||
scrollToBottom()
|
||||
}
|
||||
}, [messages])
|
||||
|
||||
useEffect(() => {
|
||||
|
|
@ -1563,6 +1584,8 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
}
|
||||
|
||||
const handleTTSStop = (messageId) => {
|
||||
setTTSAction(true)
|
||||
|
||||
if (ttsAudio[messageId]) {
|
||||
ttsAudio[messageId].pause()
|
||||
ttsAudio[messageId].currentTime = 0
|
||||
|
|
@ -1621,6 +1644,7 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
return
|
||||
}
|
||||
|
||||
setTTSAction(true)
|
||||
stopAllTTS()
|
||||
|
||||
handleTTSStart({ chatMessageId: messageId, format: 'mp3' })
|
||||
|
|
@ -1868,6 +1892,7 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
}
|
||||
|
||||
const handleTTSStart = (data) => {
|
||||
setTTSAction(true)
|
||||
setIsTTSLoading((prevState) => ({
|
||||
...prevState,
|
||||
[data.chatMessageId]: true
|
||||
|
|
@ -1988,6 +2013,11 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
useEffect(() => {
|
||||
return () => {
|
||||
cleanupTTSStreaming()
|
||||
// Cleanup TTS timeout on unmount
|
||||
if (ttsTimeoutRef.current) {
|
||||
clearTimeout(ttsTimeoutRef.current)
|
||||
ttsTimeoutRef.current = null
|
||||
}
|
||||
}
|
||||
}, [])
|
||||
|
||||
|
|
@ -2672,9 +2702,12 @@ const ChatMessage = ({ open, chatflowid, isAgentCanvas, isDialog, previews, setP
|
|||
{isTTSLoading[message.id] ? (
|
||||
<CircularProgress size={16} />
|
||||
) : isTTSPlaying[message.id] ? (
|
||||
<IconSquare size={16} />
|
||||
<IconCircleDot style={{ width: '20px', height: '20px' }} color={'red'} />
|
||||
) : (
|
||||
<IconVolume size={16} />
|
||||
<IconVolume
|
||||
style={{ width: '20px', height: '20px' }}
|
||||
color={customization.isDarkMode ? 'white' : '#1e88e5'}
|
||||
/>
|
||||
)}
|
||||
</IconButton>
|
||||
)}
|
||||
|
|
|
|||
Loading…
Reference in New Issue