Feature/Add Groq Whisper support (#3706)

* feat: Add Groq Whisper support to SpeechToText component - Introduced a new speech-to-text provider, Groq Whisper, in both the backend and UI components. - Updated SpeechToTextType to include GROQ_WHISPER. - Implemented Groq client integration for audio transcription with customizable model, language, and temperature options. - Added UI elements for Groq Whisper configuration, including input fields for model, language, and temperature settings. * turn speech to text none status to false when other was selected --------- Co-authored-by: Henry <hzj94@hotmail.com>
2024-12-17 18:11:07 -05:00 · 2024-12-17 18:11:07 -05:00 · 4c29b2390c
parent d5498858ec
commit 4c29b2390c
3 changed files with 66 additions and 2 deletions
--- a/packages/components/src/speechToText.ts
+++ b/packages/components/src/speechToText.ts
@ -3,11 +3,13 @@ import { getCredentialData } from './utils'
 import { type ClientOptions, OpenAIClient, toFile } from '@langchain/openai'
 import { AssemblyAI } from 'assemblyai'
 import { getFileFromStorage } from './storageUtils'
 import Groq from 'groq-sdk'
 const SpeechToTextType = {
    OPENAI_WHISPER: 'openAIWhisper',
    ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe',
-    LOCALAI_STT: 'localAISTT'
+    LOCALAI_STT: 'localAISTT',
    GROQ_WHISPER: 'groqWhisper'
 }
 export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfig: ICommonObject, options: ICommonObject) => {
@ -70,6 +72,23 @@ export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfi
                }
                break
            }
            case SpeechToTextType.GROQ_WHISPER: {
                const groqClient = new Groq({
                    apiKey: credentialData.groqApiKey
                })
                const file = await toFile(audio_file, upload.name)
                const groqTranscription = await groqClient.audio.transcriptions.create({
                    file,
                    model: speechToTextConfig?.model || 'whisper-large-v3',
                    language: speechToTextConfig?.language,
                    temperature: speechToTextConfig?.temperature ? parseFloat(speechToTextConfig.temperature) : undefined,
                    response_format: 'verbose_json'
                })
                if (groqTranscription?.text) {
                    return groqTranscription.text
                }
                break
            }
        }
    } else {
        throw new Error('Speech to text is not selected, but found a recorded audio file. Please fix the chain.')
--- a/packages/ui/src/assets/images/groq.png
+++ b/packages/ui/src/assets/images/groq.png
--- a/packages/ui/src/ui-component/extended/SpeechToText.jsx
+++ b/packages/ui/src/ui-component/extended/SpeechToText.jsx
@ -17,6 +17,7 @@ import { Dropdown } from '@/ui-component/dropdown/Dropdown'
 import openAISVG from '@/assets/images/openai.svg'
 import assemblyAIPng from '@/assets/images/assemblyai.png'
 import localAiPng from '@/assets/images/localai.png'
 import groqPng from '@/assets/images/groq.png'
 // store
 import useNotifier from '@/utils/useNotifier'
@ -29,7 +30,8 @@ import chatflowsApi from '@/api/chatflows'
 const SpeechToTextType = {
    OPENAI_WHISPER: 'openAIWhisper',
    ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe',
-    LOCALAI_STT: 'localAISTT'
+    LOCALAI_STT: 'localAISTT',
    GROQ_WHISPER: 'groqWhisper'
 }
 // Weird quirk - the key must match the name property value.
@ -139,6 +141,46 @@ const speechToTextProviders = {
                optional: true
            }
        ]
    },
    [SpeechToTextType.GROQ_WHISPER]: {
        label: 'Groq Whisper',
        name: SpeechToTextType.GROQ_WHISPER,
        icon: groqPng,
        url: 'https://console.groq.com/',
        inputs: [
            {
                label: 'Model',
                name: 'model',
                type: 'string',
                description: `The STT model to load. Defaults to whisper-large-v3 if left blank.`,
                placeholder: 'whisper-large-v3',
                optional: true
            },
            {
                label: 'Connect Credential',
                name: 'credential',
                type: 'credential',
                credentialNames: ['groqApi']
            },
            {
                label: 'Language',
                name: 'language',
                type: 'string',
                description:
                    'The language of the input audio. Supplying the input language in ISO-639-1 format will improve accuracy and latency.',
                placeholder: 'en',
                optional: true
            },
            {
                label: 'Temperature',
                name: 'temperature',
                type: 'number',
                step: 0.1,
                description:
                    'The sampling temperature, between 0 and 1. Higher values like 0.8 will make the output more random, while lower values like 0.2 will make it more focused and deterministic.',
                optional: true
            }
        ]
    }
 }
@ -210,6 +252,9 @@ const SpeechToText = ({ dialogProps }) => {
                    newVal[provider.name] = { ...speechToText[provider.name], status: false }
                }
            })
            if (providerName !== 'none') {
                newVal['none'].status = false
            }
        }
        setSpeechToText(newVal)
        return newVal