From 6bd8aaefc897fed1f39c9c6bf1bbb61d12c27d8c Mon Sep 17 00:00:00 2001 From: Vinod Kiran Date: Fri, 19 Apr 2024 05:58:09 +0530 Subject: [PATCH] Bugfix/speech input on Safari/iOS (#1971) * debug to identify * Safari sends audio file as mp4 and nor webp * Safari on iOS needs special handling * lint fixes * updated condition * Remove unused import --------- Co-authored-by: Ilango --- packages/components/src/speechToText.ts | 2 -- packages/server/src/utils/buildChatflow.ts | 3 ++- packages/ui/src/views/chatmessage/ChatMessage.jsx | 8 +++++++- packages/ui/src/views/chatmessage/audio-recording.js | 9 ++++++++- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/packages/components/src/speechToText.ts b/packages/components/src/speechToText.ts index 8524b5252..20d72e40a 100644 --- a/packages/components/src/speechToText.ts +++ b/packages/components/src/speechToText.ts @@ -10,7 +10,6 @@ export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfi const credentialId = speechToTextConfig.credentialId as string const credentialData = await getCredentialData(credentialId ?? '', options) const filePath = path.join(getStoragePath(), options.chatflowid, options.chatId, upload.name) - const audio_file = fs.createReadStream(filePath) if (speechToTextConfig.name === 'openAIWhisper') { @@ -18,7 +17,6 @@ export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfi apiKey: credentialData.openAIApiKey } const openAIClient = new OpenAIClient(openAIClientOptions) - const transcription = await openAIClient.audio.transcriptions.create({ file: audio_file, model: 'whisper-1', diff --git a/packages/server/src/utils/buildChatflow.ts b/packages/server/src/utils/buildChatflow.ts index 0dbc71976..42118b4d3 100644 --- a/packages/server/src/utils/buildChatflow.ts +++ b/packages/server/src/utils/buildChatflow.ts @@ -66,6 +66,7 @@ export const utilBuildChatflow = async (req: Request, socketIO?: Server, isInter fileUploads = incomingInput.uploads for (let i = 0; i < fileUploads.length; i += 1) { const upload = fileUploads[i] + if ((upload.type === 'file' || upload.type === 'audio') && upload.data) { const filename = upload.name const dir = path.join(getStoragePath(), chatflowid, chatId) @@ -83,7 +84,7 @@ export const utilBuildChatflow = async (req: Request, socketIO?: Server, isInter } // Run Speech to Text conversion - if (upload.mime === 'audio/webm') { + if (upload.mime === 'audio/webm' || upload.mime === 'audio/mp4') { let speechToTextConfig: ICommonObject = {} if (chatflow.speechToText) { const speechToTextProviders = JSON.parse(chatflow.speechToText) diff --git a/packages/ui/src/views/chatmessage/ChatMessage.jsx b/packages/ui/src/views/chatmessage/ChatMessage.jsx index d7e6de72c..988114848 100644 --- a/packages/ui/src/views/chatmessage/ChatMessage.jsx +++ b/packages/ui/src/views/chatmessage/ChatMessage.jsx @@ -241,7 +241,13 @@ export const ChatMessage = ({ open, chatflowid, isDialog, previews, setPreviews } const addRecordingToPreviews = (blob) => { - const mimeType = blob.type.substring(0, blob.type.indexOf(';')) + let mimeType = '' + const pos = blob.type.indexOf(';') + if (pos === -1) { + mimeType = blob.type + } else { + mimeType = blob.type.substring(0, pos) + } // read blob and add to previews const reader = new FileReader() reader.readAsDataURL(blob) diff --git a/packages/ui/src/views/chatmessage/audio-recording.js b/packages/ui/src/views/chatmessage/audio-recording.js index 1fbaddc10..37c5df08c 100644 --- a/packages/ui/src/views/chatmessage/audio-recording.js +++ b/packages/ui/src/views/chatmessage/audio-recording.js @@ -2,6 +2,7 @@ * @fileoverview This file contains the API to handle audio recording. * Originally from 'https://ralzohairi.medium.com/audio-recording-in-javascript-96eed45b75ee' */ +import { isSafari } from 'react-device-detect' // audio-recording.js --------------- let microphoneButton, elapsedTimeTag @@ -277,7 +278,13 @@ export const audioRecorder = { }) //start the recording by calling the start method on the media recorder - audioRecorder.mediaRecorder.start() + if (isSafari) { + // https://community.openai.com/t/whisper-problem-with-audio-mp4-blobs-from-safari/322252 + // https://community.openai.com/t/whisper-api-cannot-read-files-correctly/93420/46 + audioRecorder.mediaRecorder.start(1000) + } else { + audioRecorder.mediaRecorder.start() + } }) )