From 4b7c26f50100d2564afc5b02d39d3785f5d73f99 Mon Sep 17 00:00:00 2001 From: Thomas Schick Date: Tue, 16 May 2023 15:42:47 +0200 Subject: [PATCH 1/4] feat(stt): add character insertion bias parameter --- v3/speechtotextv1/speech_to_text_v1.go | 83 ++++++++++++++++++++------ 1 file changed, 64 insertions(+), 19 deletions(-) diff --git a/v3/speechtotextv1/speech_to_text_v1.go b/v3/speechtotextv1/speech_to_text_v1.go index 7940f31..cbf242d 100644 --- a/v3/speechtotextv1/speech_to_text_v1.go +++ b/v3/speechtotextv1/speech_to_text_v1.go @@ -45,7 +45,6 @@ import ( // names. Broadband and multimedia models have minimum sampling rates of 16 kHz. Narrowband and telephony models have // minimum sampling rates of 8 kHz. The next-generation models offer high throughput and greater transcription accuracy. // -// // Effective 15 March 2022, previous-generation models for all languages other than Arabic and Japanese are deprecated. // The deprecated models remain available until 15 September 2022, when they will be removed from the service and the // documentation. You must migrate to the equivalent next-generation model by the end of service date. For more @@ -314,7 +313,8 @@ func (speechToText *SpeechToTextV1) GetModelWithContext(ctx context.Context, get // // ### Streaming mode // -// For requests to transcribe live audio as it becomes available, you must set the `Transfer-Encoding` header to +// For requests to transcribe live audio as it becomes available, you must set the `Transfer-Encoding` header to +// // `chunked` to use streaming mode. In streaming mode, the service closes the connection (status code 408) if it does // not receive at least 15 seconds of audio (including silence) in any 30-second period. The service also closes the // connection (status code 400) if it detects no speech for `inactivity_timeout` seconds of streaming audio; use the @@ -326,7 +326,8 @@ func (speechToText *SpeechToTextV1) GetModelWithContext(ctx context.Context, get // // ### Audio formats (content types) // -// The service accepts audio in the following formats (MIME types). +// The service accepts audio in the following formats (MIME types). +// // * For formats that are labeled **Required**, you must use the `Content-Type` header with the request to specify the // format of the audio. // * For all other formats, you can omit the `Content-Type` header or specify `application/octet-stream` with the header @@ -357,12 +358,14 @@ func (speechToText *SpeechToTextV1) GetModelWithContext(ctx context.Context, get // minimum required rate, the service down-samples the audio to the appropriate rate. If the sampling rate of the audio // is lower than the minimum required rate, the request fails. // -// **See also:** [Supported audio +// **See also:** [Supported audio +// // formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats). // // ### Next-generation models // -// The service supports next-generation `Multimedia` (16 kHz) and `Telephony` (8 kHz) models for many languages. +// The service supports next-generation `Multimedia` (16 kHz) and `Telephony` (8 kHz) models for many languages. +// // Next-generation models have higher throughput than the service's previous generation of `Broadband` and `Narrowband` // models. When you use next-generation models, the service can return transcriptions more quickly and also provide // noticeably better transcription accuracy. @@ -385,7 +388,8 @@ func (speechToText *SpeechToTextV1) GetModelWithContext(ctx context.Context, get // // ### Multipart speech recognition // -// **Note:** The asynchronous HTTP interface, WebSocket interface, and Watson SDKs do not support multipart speech +// **Note:** The asynchronous HTTP interface, WebSocket interface, and Watson SDKs do not support multipart speech +// // recognition. // // The HTTP `POST` method of the service also supports multipart speech recognition. With multipart requests, you pass @@ -504,6 +508,9 @@ func (speechToText *SpeechToTextV1) RecognizeWithContext(ctx context.Context, re if recognizeOptions.BackgroundAudioSuppression != nil { builder.AddQuery("background_audio_suppression", fmt.Sprint(*recognizeOptions.BackgroundAudioSuppression)) } + if recognizeOptions.CharacterInsertionBias != nil { + builder.AddQuery("character_insertion_bias", fmt.Sprint(*recognizeOptions.CharacterInsertionBias)) + } if recognizeOptions.LowLatency != nil { builder.AddQuery("low_latency", fmt.Sprint(*recognizeOptions.LowLatency)) } @@ -706,7 +713,8 @@ func (speechToText *SpeechToTextV1) UnregisterCallbackWithContext(ctx context.Co // // ### Streaming mode // -// For requests to transcribe live audio as it becomes available, you must set the `Transfer-Encoding` header to +// For requests to transcribe live audio as it becomes available, you must set the `Transfer-Encoding` header to +// // `chunked` to use streaming mode. In streaming mode, the service closes the connection (status code 408) if it does // not receive at least 15 seconds of audio (including silence) in any 30-second period. The service also closes the // connection (status code 400) if it detects no speech for `inactivity_timeout` seconds of streaming audio; use the @@ -718,7 +726,8 @@ func (speechToText *SpeechToTextV1) UnregisterCallbackWithContext(ctx context.Co // // ### Audio formats (content types) // -// The service accepts audio in the following formats (MIME types). +// The service accepts audio in the following formats (MIME types). +// // * For formats that are labeled **Required**, you must use the `Content-Type` header with the request to specify the // format of the audio. // * For all other formats, you can omit the `Content-Type` header or specify `application/octet-stream` with the header @@ -749,12 +758,14 @@ func (speechToText *SpeechToTextV1) UnregisterCallbackWithContext(ctx context.Co // minimum required rate, the service down-samples the audio to the appropriate rate. If the sampling rate of the audio // is lower than the minimum required rate, the request fails. // -// **See also:** [Supported audio +// **See also:** [Supported audio +// // formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats). // // ### Next-generation models // -// The service supports next-generation `Multimedia` (16 kHz) and `Telephony` (8 kHz) models for many languages. +// The service supports next-generation `Multimedia` (16 kHz) and `Telephony` (8 kHz) models for many languages. +// // Next-generation models have higher throughput than the service's previous generation of `Broadband` and `Narrowband` // models. When you use next-generation models, the service can return transcriptions more quickly and also provide // noticeably better transcription accuracy. @@ -1413,10 +1424,10 @@ func (speechToText *SpeechToTextV1) DeleteLanguageModelWithContext(ctx context.C // model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-languageCreate#trainModel-language) // * [Language support for customization](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-custom-support) // -// // ### Training failures // -// Training can fail to start for the following reasons: +// Training can fail to start for the following reasons: +// // * The service is currently handling another request for the custom model, such as another training request or a // request to add a corpus or grammar to the model. // * No training data have been added to the custom model. @@ -1859,8 +1870,6 @@ func (speechToText *SpeechToTextV1) GetCorpusWithContext(ctx context.Context, ge // another corpus or grammar, or they were modified in some way with the [Add custom words](#addwords) or [Add a custom // word](#addword) method. // -// -// // **See also:** [Deleting a corpus from a custom language // model](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-manageCorpora#deleteCorpus). func (speechToText *SpeechToTextV1) DeleteCorpus(deleteCorpusOptions *DeleteCorpusOptions) (response *core.DetailedResponse, err error) { @@ -2938,7 +2947,8 @@ func (speechToText *SpeechToTextV1) DeleteAcousticModelWithContext(ctx context.C // // ### Training failures // -// Training can fail to start for the following reasons: +// Training can fail to start for the following reasons: +// // * The service is currently handling another request for the custom model, such as another training request or a // request to add audio resources to the model. // * The custom model contains less than 10 minutes or more than 200 hours of audio data. @@ -3259,7 +3269,8 @@ func (speechToText *SpeechToTextV1) ListAudioWithContext(ctx context.Context, li // // ### Content types for audio-type resources // -// You can add an individual audio file in any format that the service supports for speech recognition. For an +// You can add an individual audio file in any format that the service supports for speech recognition. For an +// // audio-type resource, use the `Content-Type` parameter to specify the audio format (MIME type) of the audio file, // including specifying the sampling rate, channels, and endianness where indicated. // * `audio/alaw` (Specify the sampling rate (`rate`) of the audio.) @@ -3284,12 +3295,14 @@ func (speechToText *SpeechToTextV1) ListAudioWithContext(ctx context.Context, li // minimum required rate, the service down-samples the audio to the appropriate rate. If the sampling rate of the audio // is lower than the minimum required rate, the service labels the audio file as `invalid`. // -// **See also:** [Supported audio +// **See also:** [Supported audio +// // formats](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-audio-formats). // // ### Content types for archive-type resources // -// You can add an archive file (**.zip** or **.tar.gz** file) that contains audio files in any format that the service +// You can add an archive file (**.zip** or **.tar.gz** file) that contains audio files in any format that the service +// // supports for speech recognition. For an archive-type resource, use the `Content-Type` parameter to specify the media // type of the archive file: // * `application/zip` for a **.zip** file @@ -3309,7 +3322,8 @@ func (speechToText *SpeechToTextV1) ListAudioWithContext(ctx context.Context, li // // ### Naming restrictions for embedded audio files // -// The name of an audio file that is contained in an archive-type resource can include a maximum of 128 characters. +// The name of an audio file that is contained in an archive-type resource can include a maximum of 128 characters. +// // This includes the file extension and all elements of the name (for example, slashes). func (speechToText *SpeechToTextV1) AddAudio(addAudioOptions *AddAudioOptions) (response *core.DetailedResponse, err error) { return speechToText.AddAudioWithContext(context.Background(), addAudioOptions) @@ -7212,6 +7226,31 @@ type RecognizeOptions struct { // support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support). BackgroundAudioSuppression *float32 `json:"background_audio_suppression,omitempty"` + // The character_insertion_bias parameter controls the service's bias for competing strings of different lengths + // during speech recognition. With next-generation models, the service parses audio character by character. + // As it does, it establishes hypotheses of previous character strings to help determine viable next characters. + // During this process, it collects candidate strings of different lengths. + // + // By default, each model uses a default character_insertion_bias of 0.0. + // This value is optimized to produce the best balance between hypotheses with different numbers of characters. + // The default is typically adequate for most speech recognition. + // However, certain use cases might benefit from favoring hypotheses with shorter or longer strings of characters. + // In such cases, specifying a change from the default can improve speech recognition. + // + // You can use the character_insertion_bias parameter to indicate that the service is to favor shorter or longer + // strings as it considers subsequent characters for its hypotheses. + // The value you provide depends on the characteristics of your audio. + // The range of acceptable values is from -1.0 to 1.0: + // + // Negative values cause the service to prefer hypotheses with shorter strings of characters. + // Positive values cause the service to prefer hypotheses with longer strings of characters. + // As your value approaches -1.0 or 1.0, the impact of the parameter becomes more pronounced. + // To determine the most effective value for your scenario, start by setting the value of the parameter + // to a small increment, such as -0.1, -0.05, 0.05, or 0.1, and assess how the value impacts the transcription results. + // + // The parameter is not available for previous-generation models. + CharacterInsertionBias *float32 `json:"character_insertion_bias,omitempty"` + // If `true` for next-generation `Multimedia` and `Telephony` models that support low latency, directs the service to // produce results even more quickly than it usually does. Next-generation models produce transcription results faster // than previous-generation models. The `low_latency` parameter causes the models to produce results even more quickly, @@ -7459,6 +7498,12 @@ func (_options *RecognizeOptions) SetBackgroundAudioSuppression(backgroundAudioS return _options } +// SetCharacterInsertionBias : Allow user to set CharacterInsertionBias +func (_options *RecognizeOptions) SetCharacterInsertionBias(characterInsertionBias float32) *RecognizeOptions { + _options.CharacterInsertionBias = core.Float32Ptr(characterInsertionBias) + return _options +} + // SetLowLatency : Allow user to set LowLatency func (_options *RecognizeOptions) SetLowLatency(lowLatency bool) *RecognizeOptions { _options.LowLatency = core.BoolPtr(lowLatency) From 5f701db8ff342cfc4b73b84fab37382660e326b8 Mon Sep 17 00:00:00 2001 From: Thomas Schick Date: Tue, 16 May 2023 15:54:57 +0200 Subject: [PATCH 2/4] fix(stt): add character insertion bias parameter to createjoboptions --- v3/speechtotextv1/speech_to_text_v1.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/v3/speechtotextv1/speech_to_text_v1.go b/v3/speechtotextv1/speech_to_text_v1.go index cbf242d..6dd859e 100644 --- a/v3/speechtotextv1/speech_to_text_v1.go +++ b/v3/speechtotextv1/speech_to_text_v1.go @@ -5064,6 +5064,31 @@ type CreateJobOptions struct { // support](https://cloud.ibm.com/docs/speech-to-text?topic=speech-to-text-detection#detection-support). BackgroundAudioSuppression *float32 `json:"background_audio_suppression,omitempty"` + // The character_insertion_bias parameter controls the service's bias for competing strings of different lengths + // during speech recognition. With next-generation models, the service parses audio character by character. + // As it does, it establishes hypotheses of previous character strings to help determine viable next characters. + // During this process, it collects candidate strings of different lengths. + // + // By default, each model uses a default character_insertion_bias of 0.0. + // This value is optimized to produce the best balance between hypotheses with different numbers of characters. + // The default is typically adequate for most speech recognition. + // However, certain use cases might benefit from favoring hypotheses with shorter or longer strings of characters. + // In such cases, specifying a change from the default can improve speech recognition. + // + // You can use the character_insertion_bias parameter to indicate that the service is to favor shorter or longer + // strings as it considers subsequent characters for its hypotheses. + // The value you provide depends on the characteristics of your audio. + // The range of acceptable values is from -1.0 to 1.0: + // + // Negative values cause the service to prefer hypotheses with shorter strings of characters. + // Positive values cause the service to prefer hypotheses with longer strings of characters. + // As your value approaches -1.0 or 1.0, the impact of the parameter becomes more pronounced. + // To determine the most effective value for your scenario, start by setting the value of the parameter + // to a small increment, such as -0.1, -0.05, 0.05, or 0.1, and assess how the value impacts the transcription results. + // + // The parameter is not available for previous-generation models. + CharacterInsertionBias *float32 `json:"character_insertion_bias,omitempty"` + // If `true` for next-generation `Multimedia` and `Telephony` models that support low latency, directs the service to // produce results even more quickly than it usually does. Next-generation models produce transcription results faster // than previous-generation models. The `low_latency` parameter causes the models to produce results even more quickly, From 493103983f2de36a21a0ddee6a64fb3412cf4526 Mon Sep 17 00:00:00 2001 From: Thomas Schick Date: Tue, 16 May 2023 15:56:44 +0200 Subject: [PATCH 3/4] fix(stt-unit-test): add character insertion bias param to test cases --- v3/speechtotextv1/speech_to_text_v1_test.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/v3/speechtotextv1/speech_to_text_v1_test.go b/v3/speechtotextv1/speech_to_text_v1_test.go index 1c3e821..00eaeaa 100644 --- a/v3/speechtotextv1/speech_to_text_v1_test.go +++ b/v3/speechtotextv1/speech_to_text_v1_test.go @@ -610,6 +610,7 @@ var _ = Describe(`SpeechToTextV1`, func() { // TODO: Add check for split_transcript_at_phrase_end query parameter // TODO: Add check for speech_detector_sensitivity query parameter // TODO: Add check for background_audio_suppression query parameter + // TODO: Add check for character_insertion_bias query parameter // TODO: Add check for low_latency query parameter res.Header().Set("Content-type", "application/json") res.WriteHeader(200) @@ -651,6 +652,7 @@ var _ = Describe(`SpeechToTextV1`, func() { recognizeOptionsModel.SplitTranscriptAtPhraseEnd = core.BoolPtr(false) recognizeOptionsModel.SpeechDetectorSensitivity = core.Float32Ptr(float32(36.0)) recognizeOptionsModel.BackgroundAudioSuppression = core.Float32Ptr(float32(36.0)) + recognizeOptionsModel.CharacterInsertionBias = core.Float32Ptr(float32(36.0)) recognizeOptionsModel.LowLatency = core.BoolPtr(false) recognizeOptionsModel.Headers = map[string]string{"x-custom-header": "x-custom-value"} // Expect response parsing to fail since we are receiving a text/plain response @@ -722,6 +724,7 @@ var _ = Describe(`SpeechToTextV1`, func() { // TODO: Add check for split_transcript_at_phrase_end query parameter // TODO: Add check for speech_detector_sensitivity query parameter // TODO: Add check for background_audio_suppression query parameter + // TODO: Add check for character_insertion_bias query parameter // TODO: Add check for low_latency query parameter // Sleep a short time to support a timeout test time.Sleep(100 * time.Millisecond) @@ -768,6 +771,7 @@ var _ = Describe(`SpeechToTextV1`, func() { recognizeOptionsModel.SplitTranscriptAtPhraseEnd = core.BoolPtr(false) recognizeOptionsModel.SpeechDetectorSensitivity = core.Float32Ptr(float32(36.0)) recognizeOptionsModel.BackgroundAudioSuppression = core.Float32Ptr(float32(36.0)) + recognizeOptionsModel.CharacterInsertionBias = core.Float32Ptr(float32(36.0)) recognizeOptionsModel.LowLatency = core.BoolPtr(false) recognizeOptionsModel.Headers = map[string]string{"x-custom-header": "x-custom-value"} @@ -845,6 +849,7 @@ var _ = Describe(`SpeechToTextV1`, func() { // TODO: Add check for split_transcript_at_phrase_end query parameter // TODO: Add check for speech_detector_sensitivity query parameter // TODO: Add check for background_audio_suppression query parameter + // TODO: Add check for character_insertion_bias query parameter // TODO: Add check for low_latency query parameter // Set mock response res.Header().Set("Content-type", "application/json") @@ -893,6 +898,7 @@ var _ = Describe(`SpeechToTextV1`, func() { recognizeOptionsModel.SplitTranscriptAtPhraseEnd = core.BoolPtr(false) recognizeOptionsModel.SpeechDetectorSensitivity = core.Float32Ptr(float32(36.0)) recognizeOptionsModel.BackgroundAudioSuppression = core.Float32Ptr(float32(36.0)) + recognizeOptionsModel.CharacterInsertionBias = core.Float32Ptr(float32(36.0)) recognizeOptionsModel.LowLatency = core.BoolPtr(false) recognizeOptionsModel.Headers = map[string]string{"x-custom-header": "x-custom-value"} @@ -938,6 +944,7 @@ var _ = Describe(`SpeechToTextV1`, func() { recognizeOptionsModel.SplitTranscriptAtPhraseEnd = core.BoolPtr(false) recognizeOptionsModel.SpeechDetectorSensitivity = core.Float32Ptr(float32(36.0)) recognizeOptionsModel.BackgroundAudioSuppression = core.Float32Ptr(float32(36.0)) + recognizeOptionsModel.CharacterInsertionBias = core.Float32Ptr(float32(36.0)) recognizeOptionsModel.LowLatency = core.BoolPtr(false) recognizeOptionsModel.Headers = map[string]string{"x-custom-header": "x-custom-value"} // Invoke operation with empty URL (negative test) @@ -1004,6 +1011,7 @@ var _ = Describe(`SpeechToTextV1`, func() { recognizeOptionsModel.SplitTranscriptAtPhraseEnd = core.BoolPtr(false) recognizeOptionsModel.SpeechDetectorSensitivity = core.Float32Ptr(float32(36.0)) recognizeOptionsModel.BackgroundAudioSuppression = core.Float32Ptr(float32(36.0)) + recognizeOptionsModel.CharacterInsertionBias = core.Float32Ptr(float32(36.0)) recognizeOptionsModel.LowLatency = core.BoolPtr(false) recognizeOptionsModel.Headers = map[string]string{"x-custom-header": "x-custom-value"} @@ -1352,6 +1360,7 @@ var _ = Describe(`SpeechToTextV1`, func() { // TODO: Add check for split_transcript_at_phrase_end query parameter // TODO: Add check for speech_detector_sensitivity query parameter // TODO: Add check for background_audio_suppression query parameter + // TODO: Add check for character_insertion_bias query parameter // TODO: Add check for low_latency query parameter res.Header().Set("Content-type", "application/json") res.WriteHeader(201) @@ -1399,6 +1408,7 @@ var _ = Describe(`SpeechToTextV1`, func() { createJobOptionsModel.SplitTranscriptAtPhraseEnd = core.BoolPtr(false) createJobOptionsModel.SpeechDetectorSensitivity = core.Float32Ptr(float32(36.0)) createJobOptionsModel.BackgroundAudioSuppression = core.Float32Ptr(float32(36.0)) + createJobOptionsModel.CharacterInsertionBias = core.Float32Ptr(float32(36.0)) createJobOptionsModel.LowLatency = core.BoolPtr(false) createJobOptionsModel.Headers = map[string]string{"x-custom-header": "x-custom-value"} // Expect response parsing to fail since we are receiving a text/plain response @@ -1476,6 +1486,7 @@ var _ = Describe(`SpeechToTextV1`, func() { // TODO: Add check for split_transcript_at_phrase_end query parameter // TODO: Add check for speech_detector_sensitivity query parameter // TODO: Add check for background_audio_suppression query parameter + // TODO: Add check for character_insertion_bias query parameter // TODO: Add check for low_latency query parameter // Sleep a short time to support a timeout test time.Sleep(100 * time.Millisecond) @@ -1528,6 +1539,7 @@ var _ = Describe(`SpeechToTextV1`, func() { createJobOptionsModel.SplitTranscriptAtPhraseEnd = core.BoolPtr(false) createJobOptionsModel.SpeechDetectorSensitivity = core.Float32Ptr(float32(36.0)) createJobOptionsModel.BackgroundAudioSuppression = core.Float32Ptr(float32(36.0)) + createJobOptionsModel.CharacterInsertionBias = core.Float32Ptr(float32(36.0)) createJobOptionsModel.LowLatency = core.BoolPtr(false) createJobOptionsModel.Headers = map[string]string{"x-custom-header": "x-custom-value"} @@ -1611,6 +1623,7 @@ var _ = Describe(`SpeechToTextV1`, func() { // TODO: Add check for split_transcript_at_phrase_end query parameter // TODO: Add check for speech_detector_sensitivity query parameter // TODO: Add check for background_audio_suppression query parameter + // TODO: Add check for character_insertion_bias query parameter // TODO: Add check for low_latency query parameter // Set mock response res.Header().Set("Content-type", "application/json") @@ -1665,6 +1678,7 @@ var _ = Describe(`SpeechToTextV1`, func() { createJobOptionsModel.SplitTranscriptAtPhraseEnd = core.BoolPtr(false) createJobOptionsModel.SpeechDetectorSensitivity = core.Float32Ptr(float32(36.0)) createJobOptionsModel.BackgroundAudioSuppression = core.Float32Ptr(float32(36.0)) + recognizeOptionsModel.CharacterInsertionBias = core.Float32Ptr(float32(36.0)) createJobOptionsModel.LowLatency = core.BoolPtr(false) createJobOptionsModel.Headers = map[string]string{"x-custom-header": "x-custom-value"} @@ -1716,6 +1730,7 @@ var _ = Describe(`SpeechToTextV1`, func() { createJobOptionsModel.SplitTranscriptAtPhraseEnd = core.BoolPtr(false) createJobOptionsModel.SpeechDetectorSensitivity = core.Float32Ptr(float32(36.0)) createJobOptionsModel.BackgroundAudioSuppression = core.Float32Ptr(float32(36.0)) + recognizeOptionsModel.CharacterInsertionBias = core.Float32Ptr(float32(36.0)) createJobOptionsModel.LowLatency = core.BoolPtr(false) createJobOptionsModel.Headers = map[string]string{"x-custom-header": "x-custom-value"} // Invoke operation with empty URL (negative test) @@ -1788,6 +1803,7 @@ var _ = Describe(`SpeechToTextV1`, func() { createJobOptionsModel.SplitTranscriptAtPhraseEnd = core.BoolPtr(false) createJobOptionsModel.SpeechDetectorSensitivity = core.Float32Ptr(float32(36.0)) createJobOptionsModel.BackgroundAudioSuppression = core.Float32Ptr(float32(36.0)) + recognizeOptionsModel.CharacterInsertionBias = core.Float32Ptr(float32(36.0)) createJobOptionsModel.LowLatency = core.BoolPtr(false) createJobOptionsModel.Headers = map[string]string{"x-custom-header": "x-custom-value"} @@ -7215,6 +7231,7 @@ var _ = Describe(`SpeechToTextV1`, func() { createJobOptionsModel.SetSplitTranscriptAtPhraseEnd(false) createJobOptionsModel.SetSpeechDetectorSensitivity(float32(36.0)) createJobOptionsModel.SetBackgroundAudioSuppression(float32(36.0)) + createJobOptionsModel.SetCharacterInsertionBias(float32(36.0)) createJobOptionsModel.SetLowLatency(false) createJobOptionsModel.SetHeaders(map[string]string{"foo": "bar"}) Expect(createJobOptionsModel).ToNot(BeNil()) @@ -7249,6 +7266,7 @@ var _ = Describe(`SpeechToTextV1`, func() { Expect(createJobOptionsModel.SplitTranscriptAtPhraseEnd).To(Equal(core.BoolPtr(false))) Expect(createJobOptionsModel.SpeechDetectorSensitivity).To(Equal(core.Float32Ptr(float32(36.0)))) Expect(createJobOptionsModel.BackgroundAudioSuppression).To(Equal(core.Float32Ptr(float32(36.0)))) + Expect(createJobOptionsModel.CharacterInsertionBias).To(Equal(core.Float32Ptr(float32(36.0)))) Expect(createJobOptionsModel.LowLatency).To(Equal(core.BoolPtr(false))) Expect(createJobOptionsModel.Headers).To(Equal(map[string]string{"foo": "bar"})) }) @@ -7541,6 +7559,7 @@ var _ = Describe(`SpeechToTextV1`, func() { recognizeOptionsModel.SetSplitTranscriptAtPhraseEnd(false) recognizeOptionsModel.SetSpeechDetectorSensitivity(float32(36.0)) recognizeOptionsModel.SetBackgroundAudioSuppression(float32(36.0)) + recognizeOptionsModel.SetCharacterInsertionBias(float32(36.0)) recognizeOptionsModel.SetLowLatency(false) recognizeOptionsModel.SetHeaders(map[string]string{"foo": "bar"}) Expect(recognizeOptionsModel).ToNot(BeNil()) @@ -7569,6 +7588,7 @@ var _ = Describe(`SpeechToTextV1`, func() { Expect(recognizeOptionsModel.SplitTranscriptAtPhraseEnd).To(Equal(core.BoolPtr(false))) Expect(recognizeOptionsModel.SpeechDetectorSensitivity).To(Equal(core.Float32Ptr(float32(36.0)))) Expect(recognizeOptionsModel.BackgroundAudioSuppression).To(Equal(core.Float32Ptr(float32(36.0)))) + Expect(recognizeOptionsModel.CharacterInsertionBias).To(Equal(core.Float32Ptr(float32(36.0)))) Expect(recognizeOptionsModel.LowLatency).To(Equal(core.BoolPtr(false))) Expect(recognizeOptionsModel.Headers).To(Equal(map[string]string{"foo": "bar"})) }) From 4415b09ee1b1d35e249fa0c082a34e89168cf3a7 Mon Sep 17 00:00:00 2001 From: Thomas Schick Date: Tue, 16 May 2023 19:53:33 +0200 Subject: [PATCH 4/4] fix(stt): add missing set func for character insertion bias on CreateJobOptions --- v3/speechtotextv1/speech_to_text_v1.go | 6 ++++++ v3/speechtotextv1/speech_to_text_v1_test.go | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/v3/speechtotextv1/speech_to_text_v1.go b/v3/speechtotextv1/speech_to_text_v1.go index 6dd859e..427d93b 100644 --- a/v3/speechtotextv1/speech_to_text_v1.go +++ b/v3/speechtotextv1/speech_to_text_v1.go @@ -5395,6 +5395,12 @@ func (_options *CreateJobOptions) SetBackgroundAudioSuppression(backgroundAudioS return _options } +// SetCharacterInsertionBias : Allow user to set CharacterInsertionBias +func (_options *CreateJobOptions) SetCharacterInsertionBias(characterInsertionBias float32) *CreateJobOptions { + _options.CharacterInsertionBias = core.Float32Ptr(characterInsertionBias) + return _options +} + // SetLowLatency : Allow user to set LowLatency func (_options *CreateJobOptions) SetLowLatency(lowLatency bool) *CreateJobOptions { _options.LowLatency = core.BoolPtr(lowLatency) diff --git a/v3/speechtotextv1/speech_to_text_v1_test.go b/v3/speechtotextv1/speech_to_text_v1_test.go index 00eaeaa..1a77298 100644 --- a/v3/speechtotextv1/speech_to_text_v1_test.go +++ b/v3/speechtotextv1/speech_to_text_v1_test.go @@ -1678,7 +1678,7 @@ var _ = Describe(`SpeechToTextV1`, func() { createJobOptionsModel.SplitTranscriptAtPhraseEnd = core.BoolPtr(false) createJobOptionsModel.SpeechDetectorSensitivity = core.Float32Ptr(float32(36.0)) createJobOptionsModel.BackgroundAudioSuppression = core.Float32Ptr(float32(36.0)) - recognizeOptionsModel.CharacterInsertionBias = core.Float32Ptr(float32(36.0)) + createJobOptionsModel.CharacterInsertionBias = core.Float32Ptr(float32(36.0)) createJobOptionsModel.LowLatency = core.BoolPtr(false) createJobOptionsModel.Headers = map[string]string{"x-custom-header": "x-custom-value"} @@ -1730,7 +1730,7 @@ var _ = Describe(`SpeechToTextV1`, func() { createJobOptionsModel.SplitTranscriptAtPhraseEnd = core.BoolPtr(false) createJobOptionsModel.SpeechDetectorSensitivity = core.Float32Ptr(float32(36.0)) createJobOptionsModel.BackgroundAudioSuppression = core.Float32Ptr(float32(36.0)) - recognizeOptionsModel.CharacterInsertionBias = core.Float32Ptr(float32(36.0)) + createJobOptionsModel.CharacterInsertionBias = core.Float32Ptr(float32(36.0)) createJobOptionsModel.LowLatency = core.BoolPtr(false) createJobOptionsModel.Headers = map[string]string{"x-custom-header": "x-custom-value"} // Invoke operation with empty URL (negative test) @@ -1803,7 +1803,7 @@ var _ = Describe(`SpeechToTextV1`, func() { createJobOptionsModel.SplitTranscriptAtPhraseEnd = core.BoolPtr(false) createJobOptionsModel.SpeechDetectorSensitivity = core.Float32Ptr(float32(36.0)) createJobOptionsModel.BackgroundAudioSuppression = core.Float32Ptr(float32(36.0)) - recognizeOptionsModel.CharacterInsertionBias = core.Float32Ptr(float32(36.0)) + createJobOptionsModel.CharacterInsertionBias = core.Float32Ptr(float32(36.0)) createJobOptionsModel.LowLatency = core.BoolPtr(false) createJobOptionsModel.Headers = map[string]string{"x-custom-header": "x-custom-value"}