From 4c29b2390c1de9d5ff24111a54d5979c8ad8ecb8 Mon Sep 17 00:00:00 2001 From: Anthony Bryan Gavilan Vinces Date: Tue, 17 Dec 2024 18:11:07 -0500 Subject: [PATCH] Feature/Add Groq Whisper support (#3706) * feat: Add Groq Whisper support to SpeechToText component - Introduced a new speech-to-text provider, Groq Whisper, in both the backend and UI components. - Updated SpeechToTextType to include GROQ_WHISPER. - Implemented Groq client integration for audio transcription with customizable model, language, and temperature options. - Added UI elements for Groq Whisper configuration, including input fields for model, language, and temperature settings. * turn speech to text none status to false when other was selected --------- Co-authored-by: Henry --- packages/components/src/speechToText.ts | 21 +++++++- packages/ui/src/assets/images/groq.png | Bin 0 -> 1773 bytes .../ui-component/extended/SpeechToText.jsx | 47 +++++++++++++++++- 3 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 packages/ui/src/assets/images/groq.png diff --git a/packages/components/src/speechToText.ts b/packages/components/src/speechToText.ts index 821f02216..547804c5c 100644 --- a/packages/components/src/speechToText.ts +++ b/packages/components/src/speechToText.ts @@ -3,11 +3,13 @@ import { getCredentialData } from './utils' import { type ClientOptions, OpenAIClient, toFile } from '@langchain/openai' import { AssemblyAI } from 'assemblyai' import { getFileFromStorage } from './storageUtils' +import Groq from 'groq-sdk' const SpeechToTextType = { OPENAI_WHISPER: 'openAIWhisper', ASSEMBLYAI_TRANSCRIBE: 'assemblyAiTranscribe', - LOCALAI_STT: 'localAISTT' + LOCALAI_STT: 'localAISTT', + GROQ_WHISPER: 'groqWhisper' } export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfig: ICommonObject, options: ICommonObject) => { @@ -70,6 +72,23 @@ export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfi } break } + case SpeechToTextType.GROQ_WHISPER: { + const groqClient = new Groq({ + apiKey: credentialData.groqApiKey + }) + const file = await toFile(audio_file, upload.name) + const groqTranscription = await groqClient.audio.transcriptions.create({ + file, + model: speechToTextConfig?.model || 'whisper-large-v3', + language: speechToTextConfig?.language, + temperature: speechToTextConfig?.temperature ? parseFloat(speechToTextConfig.temperature) : undefined, + response_format: 'verbose_json' + }) + if (groqTranscription?.text) { + return groqTranscription.text + } + break + } } } else { throw new Error('Speech to text is not selected, but found a recorded audio file. Please fix the chain.') diff --git a/packages/ui/src/assets/images/groq.png b/packages/ui/src/assets/images/groq.png new file mode 100644 index 0000000000000000000000000000000000000000..ea2b8821cf8a317fa3e2cf1a7b36e1d0b7b89798 GIT binary patch literal 1773 zcmVJ78yIdGfP8*~D}jcBgoz@DjEIVOjggLzX_A(clvXvniDM+O3jVZoWes8fy)YW|W!Yyi=xSRpD1Fm=i>13I&c1=s+jt~|pKj9vTi zf!SP1hQVY@_uqq`XH)%}Teq*&T6%Ty?OWGvULwB^5MB(yFkHmC3NU8ufGgl0z66BD z`d|S90&f8jD1d+gTLy=7{AwWf!N)2Y$0A6LE0|XfQcD0qYF7(keuEsxs?Cua*)kojbh=%dHOO8p^J^fZ5Ze zG6@R`vAJ|9(~zbB3y`v$Fq;eim9$9v54D?Et7Na)QtHd2a>eLJhLU@mY|Jqr=mG??#tx7CMgSgvoG~LJtE@-6mYcnHopLx-Eq zjQ}0A8v{SZ0Ki`uee@XtMC_CU2NX<)(@&c*K+8rokU&9uV4Zat#!iOo5@L`2Mbc)Q zDn#09dl6*Y)1sYn+-J;9cdl^j>W$uJ^L@9a;Q-E<;1%a3PT7kguIISqdP4x$ggHJa z~9W^myPA;>~&_|PseJj3Lif}s^+uo_2nh;vr9zrkIwI?{+@TM%Hw zCKzC6JWGxVvv|D!2XL_qyaOXjDnmpAHA9OhS%3ko1_RzH;CRS$;uw9?2GX%&7 z0v3QnqfE?Fv*05h>Bf-`?hrW0=p!R#(ts}d4>G@y^`X+};a;~F}~&nqtJU0i)4d%4B3eT7H|L{!m<+K7!3mE z$qxo3V4ejaKsP=4O%Qg=9{#)pJOUbtb`bOs>NqGM&Y{pcWTK&O00%_P(29wE;T9GJ z1}`$I3}bL=bQ;Tu+(%&$(g6aom9liAE_EXaW6HyT(sYjuyeWMQs8iLj zucv { newVal[provider.name] = { ...speechToText[provider.name], status: false } } }) + if (providerName !== 'none') { + newVal['none'].status = false + } } setSpeechToText(newVal) return newVal