Skip to content

Commit

Permalink
Add AudioStreamWaveFormat with ALAW, MULAW support (#82)
Browse files Browse the repository at this point in the history
* add AudioStreamWaveFormat with alaw, mulaw support

* fix lint
  • Loading branch information
jhakulin committed Dec 6, 2022
1 parent bc53b09 commit 6631b9f
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 0 deletions.
15 changes: 15 additions & 0 deletions audio/audio_stream_container_format.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,18 @@ const (
// ANY Stream ContainerFormat definition when the actual stream format is not known.
ANY AudioStreamContainerFormat = 0x108
)

// AudioStreamWaveFormat represents the format specified inside WAV container which are sent directly as encoded to the speech service.
type AudioStreamWaveFormat int //nolint:revive

const (
// AudioStreamWaveFormat definition for PCM (pulse-code modulated) data in integer format.
WavePCM AudioStreamWaveFormat = 0x0001

// AudioStreamWaveFormat definition A-law-encoded format.
WaveALAW AudioStreamWaveFormat = 0x0006

// AudioStreamWaveFormat definition for Mu-law-encoded format.
WaveMULAW AudioStreamWaveFormat = 0x0007

)
17 changes: 17 additions & 0 deletions audio/audio_stream_format.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,23 @@ func GetDefaultInputFormat() (*AudioStreamFormat, error) {
return format, nil
}

// GetWaveFormat creates an audio stream format object with the specified waveformat characteristics.
func GetWaveFormat(samplesPerSecond uint32, bitsPerSample uint8, channels uint8, waveFormat AudioStreamWaveFormat) (*AudioStreamFormat, error) {
var handle C.SPXHANDLE
ret := uintptr(C.audio_stream_format_create_from_waveformat(
&handle,
(C.uint32_t)(samplesPerSecond),
(C.uint8_t)(bitsPerSample),
(C.uint8_t)(channels),
(C.Audio_Stream_Wave_Format)(waveFormat)))
if ret != C.SPX_NOERROR {
return nil, common.NewCarbonError(ret)
}
format := new(AudioStreamFormat)
format.handle = handle
return format, nil
}

// GetWaveFormatPCM creates an audio stream format object with the specified PCM waveformat characteristics.
// Note: Currently, only WAV / PCM with 16-bit samples, 16 kHz sample rate, and a single channel (Mono) is supported. When
// used with Conversation Transcription, eight channels are supported.
Expand Down
1 change: 1 addition & 0 deletions samples/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ func main() {
samples := functionMap{
"speech_recognizer:RecognizeOnceFromWavFile": recognizer.RecognizeOnceFromWavFile,
"speech_recognizer:RecognizeOnceFromCompressedFile": recognizer.RecognizeOnceFromCompressedFile,
"speech_recognizer:RecognizeOnceFromALAWFile": recognizer.RecognizeOnceFromALAWFile,
"speech_recognizer:ContinuousFromMicrophone": recognizer.ContinuousFromMicrophone,
"speech_recognizer:RecognizeContinuousUsingWrapper": recognizer.RecognizeContinuousUsingWrapper,
"dialog_service_connector:ListenOnce": dialog_service_connector.ListenOnce,
Expand Down
58 changes: 58 additions & 0 deletions samples/recognizer/from_file.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,61 @@ func RecognizeOnceFromCompressedFile(subscription string, region string, file st
fmt.Println("Got a recognition!")
fmt.Println(outcome.Result.Text)
}

func RecognizeOnceFromALAWFile(subscription string, region string, file string) {
var waveFormat audio.AudioStreamWaveFormat
waveFormat = audio.WaveALAW
format, err := audio.GetWaveFormat(8000, 16, 1, waveFormat)
if err != nil {
fmt.Println("Got an error: ", err)
return
}
defer format.Close()
stream, err := audio.CreatePushAudioInputStreamFromFormat(format)
if err != nil {
fmt.Println("Got an error: ", err)
return
}
defer stream.Close()
audioConfig, err := audio.NewAudioConfigFromStreamInput(stream)
if err != nil {
fmt.Println("Got an error: ", err)
return
}
defer audioConfig.Close()
config, err := speech.NewSpeechConfigFromSubscription(subscription, region)
if err != nil {
fmt.Println("Got an error: ", err)
return
}
defer config.Close()
speechRecognizer, err := speech.NewSpeechRecognizerFromConfig(config, audioConfig)
if err != nil {
fmt.Println("Got an error: ", err)
return
}
defer speechRecognizer.Close()
speechRecognizer.SessionStarted(func(event speech.SessionEventArgs) {
defer event.Close()
fmt.Println("Session Started (ID=", event.SessionID, ")")
})
speechRecognizer.SessionStopped(func(event speech.SessionEventArgs) {
defer event.Close()
fmt.Println("Session Stopped (ID=", event.SessionID, ")")
})
helpers.PumpFileIntoStream(file, stream)
task := speechRecognizer.RecognizeOnceAsync()
var outcome speech.SpeechRecognitionOutcome
select {
case outcome = <-task:
case <-time.After(40 * time.Second):
fmt.Println("Timed out")
return
}
defer outcome.Close()
if outcome.Error != nil {
fmt.Println("Got an error: ", outcome.Error)
}
fmt.Println("Got a recognition!")
fmt.Println(outcome.Result.Text)
}

0 comments on commit 6631b9f

Please sign in to comment.