From 6631b9fead2c60ac2a52fe9407657ef4cec6fabb Mon Sep 17 00:00:00 2001 From: Jarno Hakulinen Date: Tue, 6 Dec 2022 11:21:16 -0800 Subject: [PATCH] Add AudioStreamWaveFormat with ALAW, MULAW support (#82) * add AudioStreamWaveFormat with alaw, mulaw support * fix lint --- audio/audio_stream_container_format.go | 15 +++++++ audio/audio_stream_format.go | 17 ++++++++ samples/main.go | 1 + samples/recognizer/from_file.go | 58 ++++++++++++++++++++++++++ 4 files changed, 91 insertions(+) diff --git a/audio/audio_stream_container_format.go b/audio/audio_stream_container_format.go index fea3fa6..97823bf 100644 --- a/audio/audio_stream_container_format.go +++ b/audio/audio_stream_container_format.go @@ -31,3 +31,18 @@ const ( // ANY Stream ContainerFormat definition when the actual stream format is not known. ANY AudioStreamContainerFormat = 0x108 ) + +// AudioStreamWaveFormat represents the format specified inside WAV container which are sent directly as encoded to the speech service. +type AudioStreamWaveFormat int //nolint:revive + +const ( + // AudioStreamWaveFormat definition for PCM (pulse-code modulated) data in integer format. + WavePCM AudioStreamWaveFormat = 0x0001 + + // AudioStreamWaveFormat definition A-law-encoded format. + WaveALAW AudioStreamWaveFormat = 0x0006 + + // AudioStreamWaveFormat definition for Mu-law-encoded format. + WaveMULAW AudioStreamWaveFormat = 0x0007 + +) diff --git a/audio/audio_stream_format.go b/audio/audio_stream_format.go index 3c5c81e..d8767a0 100644 --- a/audio/audio_stream_format.go +++ b/audio/audio_stream_format.go @@ -32,6 +32,23 @@ func GetDefaultInputFormat() (*AudioStreamFormat, error) { return format, nil } +// GetWaveFormat creates an audio stream format object with the specified waveformat characteristics. +func GetWaveFormat(samplesPerSecond uint32, bitsPerSample uint8, channels uint8, waveFormat AudioStreamWaveFormat) (*AudioStreamFormat, error) { + var handle C.SPXHANDLE + ret := uintptr(C.audio_stream_format_create_from_waveformat( + &handle, + (C.uint32_t)(samplesPerSecond), + (C.uint8_t)(bitsPerSample), + (C.uint8_t)(channels), + (C.Audio_Stream_Wave_Format)(waveFormat))) + if ret != C.SPX_NOERROR { + return nil, common.NewCarbonError(ret) + } + format := new(AudioStreamFormat) + format.handle = handle + return format, nil +} + // GetWaveFormatPCM creates an audio stream format object with the specified PCM waveformat characteristics. // Note: Currently, only WAV / PCM with 16-bit samples, 16 kHz sample rate, and a single channel (Mono) is supported. When // used with Conversation Transcription, eight channels are supported. diff --git a/samples/main.go b/samples/main.go index bf61a44..f093513 100644 --- a/samples/main.go +++ b/samples/main.go @@ -31,6 +31,7 @@ func main() { samples := functionMap{ "speech_recognizer:RecognizeOnceFromWavFile": recognizer.RecognizeOnceFromWavFile, "speech_recognizer:RecognizeOnceFromCompressedFile": recognizer.RecognizeOnceFromCompressedFile, + "speech_recognizer:RecognizeOnceFromALAWFile": recognizer.RecognizeOnceFromALAWFile, "speech_recognizer:ContinuousFromMicrophone": recognizer.ContinuousFromMicrophone, "speech_recognizer:RecognizeContinuousUsingWrapper": recognizer.RecognizeContinuousUsingWrapper, "dialog_service_connector:ListenOnce": dialog_service_connector.ListenOnce, diff --git a/samples/recognizer/from_file.go b/samples/recognizer/from_file.go index cf25d3d..c9c9bde 100644 --- a/samples/recognizer/from_file.go +++ b/samples/recognizer/from_file.go @@ -125,3 +125,61 @@ func RecognizeOnceFromCompressedFile(subscription string, region string, file st fmt.Println("Got a recognition!") fmt.Println(outcome.Result.Text) } + +func RecognizeOnceFromALAWFile(subscription string, region string, file string) { + var waveFormat audio.AudioStreamWaveFormat + waveFormat = audio.WaveALAW + format, err := audio.GetWaveFormat(8000, 16, 1, waveFormat) + if err != nil { + fmt.Println("Got an error: ", err) + return + } + defer format.Close() + stream, err := audio.CreatePushAudioInputStreamFromFormat(format) + if err != nil { + fmt.Println("Got an error: ", err) + return + } + defer stream.Close() + audioConfig, err := audio.NewAudioConfigFromStreamInput(stream) + if err != nil { + fmt.Println("Got an error: ", err) + return + } + defer audioConfig.Close() + config, err := speech.NewSpeechConfigFromSubscription(subscription, region) + if err != nil { + fmt.Println("Got an error: ", err) + return + } + defer config.Close() + speechRecognizer, err := speech.NewSpeechRecognizerFromConfig(config, audioConfig) + if err != nil { + fmt.Println("Got an error: ", err) + return + } + defer speechRecognizer.Close() + speechRecognizer.SessionStarted(func(event speech.SessionEventArgs) { + defer event.Close() + fmt.Println("Session Started (ID=", event.SessionID, ")") + }) + speechRecognizer.SessionStopped(func(event speech.SessionEventArgs) { + defer event.Close() + fmt.Println("Session Stopped (ID=", event.SessionID, ")") + }) + helpers.PumpFileIntoStream(file, stream) + task := speechRecognizer.RecognizeOnceAsync() + var outcome speech.SpeechRecognitionOutcome + select { + case outcome = <-task: + case <-time.After(40 * time.Second): + fmt.Println("Timed out") + return + } + defer outcome.Close() + if outcome.Error != nil { + fmt.Println("Got an error: ", outcome.Error) + } + fmt.Println("Got a recognition!") + fmt.Println(outcome.Result.Text) +} \ No newline at end of file