Feature/Add Groq Whisper support (#3706)

* feat: Add Groq Whisper support to SpeechToText component

- Introduced a new speech-to-text provider, Groq Whisper, in both the backend and UI components.
- Updated SpeechToTextType to include GROQ_WHISPER.
- Implemented Groq client integration for audio transcription with customizable model, language, and temperature options.
- Added UI elements for Groq Whisper configuration, including input fields for model, language, and temperature settings.

* turn speech to text none status to false when other was selected

---------

Co-authored-by: Henry <hzj94@hotmail.com>
This commit is contained in:
Anthony Bryan Gavilan Vinces 2024-12-17 18:11:07 -05:00 committed by GitHub
parent d5498858ec
commit 4c29b2390c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 66 additions and 2 deletions

View File

@ -3,11 +3,13 @@ import { getCredentialData } from './utils'
import { type ClientOptions, OpenAIClient, toFile } from '@langchain/openai' import { type ClientOptions, OpenAIClient, toFile } from '@langchain/openai'
import { AssemblyAI } from 'assemblyai' import { AssemblyAI } from 'assemblyai'
import { getFileFromStorage } from './storageUtils' import { getFileFromStorage } from './storageUtils'
import Groq from 'groq-sdk'
const SpeechToTextType = { const SpeechToTextType = {
OPENAI_WHISPER: 'openAIWhisper', OPENAI_WHISPER: 'openAIWhisper',
ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe', ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe',
LOCALAI_STT: 'localAISTT' LOCALAI_STT: 'localAISTT',
GROQ_WHISPER: 'groqWhisper'
} }
export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfig: ICommonObject, options: ICommonObject) => { export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfig: ICommonObject, options: ICommonObject) => {
@ -70,6 +72,23 @@ export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfi
} }
break break
} }
case SpeechToTextType.GROQ_WHISPER: {
const groqClient = new Groq({
apiKey: credentialData.groqApiKey
})
const file = await toFile(audio_file, upload.name)
const groqTranscription = await groqClient.audio.transcriptions.create({
file,
model: speechToTextConfig?.model || 'whisper-large-v3',
language: speechToTextConfig?.language,
temperature: speechToTextConfig?.temperature ? parseFloat(speechToTextConfig.temperature) : undefined,
response_format: 'verbose_json'
})
if (groqTranscription?.text) {
return groqTranscription.text
}
break
}
} }
} else { } else {
throw new Error('Speech to text is not selected, but found a recorded audio file. Please fix the chain.') throw new Error('Speech to text is not selected, but found a recorded audio file. Please fix the chain.')

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.7 KiB

View File

@ -17,6 +17,7 @@ import { Dropdown } from '@/ui-component/dropdown/Dropdown'
import openAISVG from '@/assets/images/openai.svg' import openAISVG from '@/assets/images/openai.svg'
import assemblyAIPng from '@/assets/images/assemblyai.png' import assemblyAIPng from '@/assets/images/assemblyai.png'
import localAiPng from '@/assets/images/localai.png' import localAiPng from '@/assets/images/localai.png'
import groqPng from '@/assets/images/groq.png'
// store // store
import useNotifier from '@/utils/useNotifier' import useNotifier from '@/utils/useNotifier'
@ -29,7 +30,8 @@ import chatflowsApi from '@/api/chatflows'
const SpeechToTextType = { const SpeechToTextType = {
OPENAI_WHISPER: 'openAIWhisper', OPENAI_WHISPER: 'openAIWhisper',
ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe', ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe',
LOCALAI_STT: 'localAISTT' LOCALAI_STT: 'localAISTT',
GROQ_WHISPER: 'groqWhisper'
} }
// Weird quirk - the key must match the name property value. // Weird quirk - the key must match the name property value.
@ -139,6 +141,46 @@ const speechToTextProviders = {
optional: true optional: true
} }
] ]
},
[SpeechToTextType.GROQ_WHISPER]: {
label: 'Groq Whisper',
name: SpeechToTextType.GROQ_WHISPER,
icon: groqPng,
url: 'https://console.groq.com/',
inputs: [
{
label: 'Model',
name: 'model',
type: 'string',
description: `The STT model to load. Defaults to whisper-large-v3 if left blank.`,
placeholder: 'whisper-large-v3',
optional: true
},
{
label: 'Connect Credential',
name: 'credential',
type: 'credential',
credentialNames: ['groqApi']
},
{
label: 'Language',
name: 'language',
type: 'string',
description:
'The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.',
placeholder: 'en',
optional: true
},
{
label: 'Temperature',
name: 'temperature',
type: 'number',
step: 0.1,
description:
'The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.',
optional: true
}
]
} }
} }
@ -210,6 +252,9 @@ const SpeechToText = ({ dialogProps }) => {
newVal[provider.name] = { ...speechToText[provider.name], status: false } newVal[provider.name] = { ...speechToText[provider.name], status: false }
} }
}) })
if (providerName !== 'none') {
newVal['none'].status = false
}
} }
setSpeechToText(newVal) setSpeechToText(newVal)
return newVal return newVal