Bugfix/speech input on Safari/iOS (#1971)
* debug to identify * Safari sends audio file as mp4 and nor webp * Safari on iOS needs special handling * lint fixes * updated condition * Remove unused import --------- Co-authored-by: Ilango <rajagopalilango@gmail.com>
This commit is contained in:
parent
d1c8f7eb96
commit
6bd8aaefc8
|
|
@ -10,7 +10,6 @@ export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfi
|
||||||
const credentialId = speechToTextConfig.credentialId as string
|
const credentialId = speechToTextConfig.credentialId as string
|
||||||
const credentialData = await getCredentialData(credentialId ?? '', options)
|
const credentialData = await getCredentialData(credentialId ?? '', options)
|
||||||
const filePath = path.join(getStoragePath(), options.chatflowid, options.chatId, upload.name)
|
const filePath = path.join(getStoragePath(), options.chatflowid, options.chatId, upload.name)
|
||||||
|
|
||||||
const audio_file = fs.createReadStream(filePath)
|
const audio_file = fs.createReadStream(filePath)
|
||||||
|
|
||||||
if (speechToTextConfig.name === 'openAIWhisper') {
|
if (speechToTextConfig.name === 'openAIWhisper') {
|
||||||
|
|
@ -18,7 +17,6 @@ export const convertSpeechToText = async (upload: IFileUpload, speechToTextConfi
|
||||||
apiKey: credentialData.openAIApiKey
|
apiKey: credentialData.openAIApiKey
|
||||||
}
|
}
|
||||||
const openAIClient = new OpenAIClient(openAIClientOptions)
|
const openAIClient = new OpenAIClient(openAIClientOptions)
|
||||||
|
|
||||||
const transcription = await openAIClient.audio.transcriptions.create({
|
const transcription = await openAIClient.audio.transcriptions.create({
|
||||||
file: audio_file,
|
file: audio_file,
|
||||||
model: 'whisper-1',
|
model: 'whisper-1',
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,7 @@ export const utilBuildChatflow = async (req: Request, socketIO?: Server, isInter
|
||||||
fileUploads = incomingInput.uploads
|
fileUploads = incomingInput.uploads
|
||||||
for (let i = 0; i < fileUploads.length; i += 1) {
|
for (let i = 0; i < fileUploads.length; i += 1) {
|
||||||
const upload = fileUploads[i]
|
const upload = fileUploads[i]
|
||||||
|
|
||||||
if ((upload.type === 'file' || upload.type === 'audio') && upload.data) {
|
if ((upload.type === 'file' || upload.type === 'audio') && upload.data) {
|
||||||
const filename = upload.name
|
const filename = upload.name
|
||||||
const dir = path.join(getStoragePath(), chatflowid, chatId)
|
const dir = path.join(getStoragePath(), chatflowid, chatId)
|
||||||
|
|
@ -83,7 +84,7 @@ export const utilBuildChatflow = async (req: Request, socketIO?: Server, isInter
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run Speech to Text conversion
|
// Run Speech to Text conversion
|
||||||
if (upload.mime === 'audio/webm') {
|
if (upload.mime === 'audio/webm' || upload.mime === 'audio/mp4') {
|
||||||
let speechToTextConfig: ICommonObject = {}
|
let speechToTextConfig: ICommonObject = {}
|
||||||
if (chatflow.speechToText) {
|
if (chatflow.speechToText) {
|
||||||
const speechToTextProviders = JSON.parse(chatflow.speechToText)
|
const speechToTextProviders = JSON.parse(chatflow.speechToText)
|
||||||
|
|
|
||||||
|
|
@ -241,7 +241,13 @@ export const ChatMessage = ({ open, chatflowid, isDialog, previews, setPreviews
|
||||||
}
|
}
|
||||||
|
|
||||||
const addRecordingToPreviews = (blob) => {
|
const addRecordingToPreviews = (blob) => {
|
||||||
const mimeType = blob.type.substring(0, blob.type.indexOf(';'))
|
let mimeType = ''
|
||||||
|
const pos = blob.type.indexOf(';')
|
||||||
|
if (pos === -1) {
|
||||||
|
mimeType = blob.type
|
||||||
|
} else {
|
||||||
|
mimeType = blob.type.substring(0, pos)
|
||||||
|
}
|
||||||
// read blob and add to previews
|
// read blob and add to previews
|
||||||
const reader = new FileReader()
|
const reader = new FileReader()
|
||||||
reader.readAsDataURL(blob)
|
reader.readAsDataURL(blob)
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@
|
||||||
* @fileoverview This file contains the API to handle audio recording.
|
* @fileoverview This file contains the API to handle audio recording.
|
||||||
* Originally from 'https://ralzohairi.medium.com/audio-recording-in-javascript-96eed45b75ee'
|
* Originally from 'https://ralzohairi.medium.com/audio-recording-in-javascript-96eed45b75ee'
|
||||||
*/
|
*/
|
||||||
|
import { isSafari } from 'react-device-detect'
|
||||||
|
|
||||||
// audio-recording.js ---------------
|
// audio-recording.js ---------------
|
||||||
let microphoneButton, elapsedTimeTag
|
let microphoneButton, elapsedTimeTag
|
||||||
|
|
@ -277,7 +278,13 @@ export const audioRecorder = {
|
||||||
})
|
})
|
||||||
|
|
||||||
//start the recording by calling the start method on the media recorder
|
//start the recording by calling the start method on the media recorder
|
||||||
audioRecorder.mediaRecorder.start()
|
if (isSafari) {
|
||||||
|
// https://community.openai.com/t/whisper-problem-with-audio-mp4-blobs-from-safari/322252
|
||||||
|
// https://community.openai.com/t/whisper-api-cannot-read-files-correctly/93420/46
|
||||||
|
audioRecorder.mediaRecorder.start(1000)
|
||||||
|
} else {
|
||||||
|
audioRecorder.mediaRecorder.start()
|
||||||
|
}
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue