Feature/Add Groq Whisper support (#3706)
* feat: Add Groq Whisper support to SpeechToText component - Introduced a new speech-to-text provider, Groq Whisper, in both the backend and UI components. - Updated SpeechToTextType to include GROQ_WHISPER. - Implemented Groq client integration for audio transcription with customizable model, language, and temperature options. - Added UI elements for Groq Whisper configuration, including input fields for model, language, and temperature settings. * turn speech to text none status to false when other was selected --------- Co-authored-by: Henry <hzj94@hotmail.com>
This commit is contained in:
parent
d5498858ec
commit
4c29b2390c
|
|
@ -3,11 +3,13 @@ import { getCredentialData } from './utils'
|
|||
import { type ClientOptions, OpenAIClient, toFile } from '@langchain/openai'
|
||||
import { AssemblyAI } from 'assemblyai'
|
||||
import { getFileFromStorage } from './storageUtils'
|
||||
import Groq from 'groq-sdk'
|
||||
|
||||
const SpeechToTextType = {
|
||||
OPENAI_WHISPER: 'openAIWhisper',
|
||||
ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe',
|
||||
LOCALAI_STT: 'localAISTT'
|
||||
LOCALAI_STT: 'localAISTT',
|
||||
GROQ_WHISPER: 'groqWhisper'
|
||||
}
|
||||
|
||||
export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfig: ICommonObject, options: ICommonObject) => {
|
||||
|
|
@ -70,6 +72,23 @@ export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfi
|
|||
}
|
||||
break
|
||||
}
|
||||
case SpeechToTextType.GROQ_WHISPER: {
|
||||
const groqClient = new Groq({
|
||||
apiKey: credentialData.groqApiKey
|
||||
})
|
||||
const file = await toFile(audio_file, upload.name)
|
||||
const groqTranscription = await groqClient.audio.transcriptions.create({
|
||||
file,
|
||||
model: speechToTextConfig?.model || 'whisper-large-v3',
|
||||
language: speechToTextConfig?.language,
|
||||
temperature: speechToTextConfig?.temperature ? parseFloat(speechToTextConfig.temperature) : undefined,
|
||||
response_format: 'verbose_json'
|
||||
})
|
||||
if (groqTranscription?.text) {
|
||||
return groqTranscription.text
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new Error('Speech to text is not selected, but found a recorded audio file. Please fix the chain.')
|
||||
|
|
|
|||
Binary file not shown.
|
After Width: | Height: | Size: 1.7 KiB |
|
|
@ -17,6 +17,7 @@ import { Dropdown } from '@/ui-component/dropdown/Dropdown'
|
|||
import openAISVG from '@/assets/images/openai.svg'
|
||||
import assemblyAIPng from '@/assets/images/assemblyai.png'
|
||||
import localAiPng from '@/assets/images/localai.png'
|
||||
import groqPng from '@/assets/images/groq.png'
|
||||
|
||||
// store
|
||||
import useNotifier from '@/utils/useNotifier'
|
||||
|
|
@ -29,7 +30,8 @@ import chatflowsApi from '@/api/chatflows'
|
|||
const SpeechToTextType = {
|
||||
OPENAI_WHISPER: 'openAIWhisper',
|
||||
ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe',
|
||||
LOCALAI_STT: 'localAISTT'
|
||||
LOCALAI_STT: 'localAISTT',
|
||||
GROQ_WHISPER: 'groqWhisper'
|
||||
}
|
||||
|
||||
// Weird quirk - the key must match the name property value.
|
||||
|
|
@ -139,6 +141,46 @@ const speechToTextProviders = {
|
|||
optional: true
|
||||
}
|
||||
]
|
||||
},
|
||||
[SpeechToTextType.GROQ_WHISPER]: {
|
||||
label: 'Groq Whisper',
|
||||
name: SpeechToTextType.GROQ_WHISPER,
|
||||
icon: groqPng,
|
||||
url: 'https://console.groq.com/',
|
||||
inputs: [
|
||||
{
|
||||
label: 'Model',
|
||||
name: 'model',
|
||||
type: 'string',
|
||||
description: `The STT model to load. Defaults to whisper-large-v3 if left blank.`,
|
||||
placeholder: 'whisper-large-v3',
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Connect Credential',
|
||||
name: 'credential',
|
||||
type: 'credential',
|
||||
credentialNames: ['groqApi']
|
||||
},
|
||||
{
|
||||
label: 'Language',
|
||||
name: 'language',
|
||||
type: 'string',
|
||||
description:
|
||||
'The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.',
|
||||
placeholder: 'en',
|
||||
optional: true
|
||||
},
|
||||
{
|
||||
label: 'Temperature',
|
||||
name: 'temperature',
|
||||
type: 'number',
|
||||
step: 0.1,
|
||||
description:
|
||||
'The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.',
|
||||
optional: true
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -210,6 +252,9 @@ const SpeechToText = ({ dialogProps }) => {
|
|||
newVal[provider.name] = { ...speechToText[provider.name], status: false }
|
||||
}
|
||||
})
|
||||
if (providerName !== 'none') {
|
||||
newVal['none'].status = false
|
||||
}
|
||||
}
|
||||
setSpeechToText(newVal)
|
||||
return newVal
|
||||
|
|
|
|||
Loading…
Reference in New Issue