From 4028dbcb8fe04af412ff8a9f53bd23500012ca95 Mon Sep 17 00:00:00 2001 From: Ke Wang Date: Sat, 12 Oct 2024 16:00:11 +0800 Subject: [PATCH] [PronScore][csharp] enable MAS with PA (#2626) * feat(pronscore): enable MAS with PA * fix(log): remove logging --------- Co-authored-by: Ke WANG --- .../csharp/sharedcontent/console/Program.cs | 5 + .../console/speech_recognition_samples.cs | 210 ++++++++++++------ 2 files changed, 147 insertions(+), 68 deletions(-) diff --git a/samples/csharp/sharedcontent/console/Program.cs b/samples/csharp/sharedcontent/console/Program.cs index f9d7beb83..da1acba6f 100644 --- a/samples/csharp/sharedcontent/console/Program.cs +++ b/samples/csharp/sharedcontent/console/Program.cs @@ -802,6 +802,7 @@ private static void PronunciationAssessment() Console.WriteLine(" 3. Pronunciation assessment configured with json."); Console.WriteLine(" 4. Pronunciation assessment continuous with file."); Console.WriteLine(" 5. Pronunciation assessment with content assessment."); + Console.WriteLine(" 6. Pronunciation assessment with Microsoft Audio Stack."); Console.WriteLine(""); Console.Write(prompt); @@ -831,6 +832,10 @@ private static void PronunciationAssessment() case ConsoleKey.NumPad5: SpeechRecognitionSamples.PronunciationAssessmentWithContentAssessment().Wait(); break; + case ConsoleKey.D6: + case ConsoleKey.NumPad6: + SpeechRecognitionSamples.PronunciationAssessmentWithMas().Wait(); + break; case ConsoleKey.D0: case ConsoleKey.NumPad0: Console.WriteLine(back); diff --git a/samples/csharp/sharedcontent/console/speech_recognition_samples.cs b/samples/csharp/sharedcontent/console/speech_recognition_samples.cs index e13f14a9a..d1bcbef71 100644 --- a/samples/csharp/sharedcontent/console/speech_recognition_samples.cs +++ b/samples/csharp/sharedcontent/console/speech_recognition_samples.cs @@ -1117,6 +1117,74 @@ private static async Task PronunciationAssessmentWithStreamInternalAsync(SpeechC } } + // Pronunciation assessment configured with json + // See more information at https://aka.ms/csspeech/pa + public static async Task PronunciationAssessmentConfiguredWithJson() + { + // Creates an instance of a speech config with specified subscription key and service region. + // Replace with your own subscription key and service region (e.g., "westus"). + var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion"); + + // Replace the language with your language in BCP-47 format, e.g., en-US. + var language = "en-US"; + + // Creates an instance of audio config from an audio file + var audioConfig = AudioConfig.FromWavFileInput(@"whatstheweatherlike.wav"); + + var referenceText = "what's the weather like"; + // create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement. + string json_config = "{\"GradingSystem\":\"HundredMark\",\"Granularity\":\"Phoneme\",\"EnableMiscue\":true, \"ScenarioId\":\"\"}"; + var pronunciationConfig = PronunciationAssessmentConfig.FromJson(json_config); + pronunciationConfig.ReferenceText = referenceText; + + pronunciationConfig.EnableProsodyAssessment(); + + // Creates a speech recognizer for the specified language + using (var recognizer = new SpeechRecognizer(config, language, audioConfig)) + { + // Starts recognizing. + pronunciationConfig.ApplyTo(recognizer); + + // Starts speech recognition, and returns after a single utterance is recognized. + // For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead. + var result = await recognizer.RecognizeOnceAsync().ConfigureAwait(false); + + // Checks result. + if (result.Reason == ResultReason.RecognizedSpeech) + { + Console.WriteLine($"RECOGNIZED: Text={result.Text}"); + Console.WriteLine(" PRONUNCIATION ASSESSMENT RESULTS:"); + + var pronunciationResult = PronunciationAssessmentResult.FromResult(result); + Console.WriteLine( + $" Accuracy score: {pronunciationResult.AccuracyScore}, Prosody Score: {pronunciationResult.ProsodyScore}, Pronunciation score: {pronunciationResult.PronunciationScore}, Completeness score : {pronunciationResult.CompletenessScore}, FluencyScore: {pronunciationResult.FluencyScore}"); + + Console.WriteLine(" Word-level details:"); + + foreach (var word in pronunciationResult.Words) + { + Console.WriteLine($" Word: {word.Word}, Accuracy score: {word.AccuracyScore}, Error type: {word.ErrorType}."); + } + } + else if (result.Reason == ResultReason.NoMatch) + { + Console.WriteLine($"NOMATCH: Speech could not be recognized."); + } + else if (result.Reason == ResultReason.Canceled) + { + var cancellation = CancellationDetails.FromResult(result); + Console.WriteLine($"CANCELED: Reason={cancellation.Reason}"); + + if (cancellation.Reason == CancellationReason.Error) + { + Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}"); + Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}"); + Console.WriteLine($"CANCELED: Did you update the subscription info?"); + } + } + } + } + // Pronunciation assessment continous from file // See more information at https://aka.ms/csspeech/pa public static async Task PronunciationAssessmentContinuousWithFile() @@ -1351,6 +1419,80 @@ public static async Task PronunciationAssessmentWithContentAssessment() } } + // Pronunciation assessment with Microsoft Audio Stack (MAS) enabled + // See more information at https://aka.ms/csspeech/pa + public static async Task PronunciationAssessmentWithMas() + { + // Creates an instance of a speech config with specified subscription key and service region. + // Replace with your own subscription key and service region (e.g., "westus"). + var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion"); + + // Replace the language with your language in BCP-47 format, e.g., en-US. + var language = "en-US"; + + // Creates an instance of audio processing options with the default settings + var audioProcessingOptions = AudioProcessingOptions.Create( + AudioProcessingConstants.AUDIO_INPUT_PROCESSING_DISABLE_ECHO_CANCELLATION | + AudioProcessingConstants.AUDIO_INPUT_PROCESSING_ENABLE_DEFAULT, + PresetMicrophoneArrayGeometry.Mono); + + // Creates an instance of audio config from an audio file + var audioConfig = AudioConfig.FromWavFileInput(@"whatstheweatherlike.wav", audioProcessingOptions); + + var referenceText = "what's the weather like"; + + // Create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement. + var pronunciationConfig = new PronunciationAssessmentConfig(referenceText, GradingSystem.HundredMark, Granularity.Phoneme, enableMiscue: true); + + // Enable prosody assessment + pronunciationConfig.EnableProsodyAssessment(); + + // Creates a speech recognizer for the specified language + using (var recognizer = new SpeechRecognizer(config, language, audioConfig)) + { + // Starts recognizing. + pronunciationConfig.ApplyTo(recognizer); + + // Starts speech recognition, and returns after a single utterance is recognized. + // For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead. + var result = await recognizer.RecognizeOnceAsync().ConfigureAwait(false); + + // Checks result. + if (result.Reason == ResultReason.RecognizedSpeech) + { + Console.WriteLine($"RECOGNIZED: Text={result.Text}"); + Console.WriteLine(" PRONUNCIATION ASSESSMENT RESULTS:"); + + var pronunciationResult = PronunciationAssessmentResult.FromResult(result); + Console.WriteLine( + $" Accuracy score: {pronunciationResult.AccuracyScore}, Prosody Score: {pronunciationResult.ProsodyScore}, Pronunciation score: {pronunciationResult.PronunciationScore}, Completeness score : {pronunciationResult.CompletenessScore}, FluencyScore: {pronunciationResult.FluencyScore}"); + + Console.WriteLine(" Word-level details:"); + + foreach (var word in pronunciationResult.Words) + { + Console.WriteLine($" Word: {word.Word}, Accuracy score: {word.AccuracyScore}, Error type: {word.ErrorType}."); + } + } + else if (result.Reason == ResultReason.NoMatch) + { + Console.WriteLine($"NOMATCH: Speech could not be recognized."); + } + else if (result.Reason == ResultReason.Canceled) + { + var cancellation = CancellationDetails.FromResult(result); + Console.WriteLine($"CANCELED: Reason={cancellation.Reason}"); + + if (cancellation.Reason == CancellationReason.Error) + { + Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}"); + Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}"); + Console.WriteLine($"CANCELED: Did you update the subscription info?"); + } + } + } + } + private static async Task RecognizeOnceAsyncInternal(string key, string region) { RecognitionResult recognitionResult = null; @@ -1463,74 +1605,6 @@ public static async Task ContinuousRecognitionFromDefaultMicrophoneWithMASEnable } } - // Pronunciation assessment configured with json - // See more information at https://aka.ms/csspeech/pa - public static async Task PronunciationAssessmentConfiguredWithJson() - { - // Creates an instance of a speech config with specified subscription key and service region. - // Replace with your own subscription key and service region (e.g., "westus"). - var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion"); - - // Replace the language with your language in BCP-47 format, e.g., en-US. - var language = "en-US"; - - // Creates an instance of audio config from an audio file - var audioConfig = AudioConfig.FromWavFileInput(@"whatstheweatherlike.wav"); - - var referenceText = "what's the weather like"; - // create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement. - string json_config = "{\"GradingSystem\":\"HundredMark\",\"Granularity\":\"Phoneme\",\"EnableMiscue\":true, \"ScenarioId\":\"[scenario ID will be assigned by product team]\"}"; - var pronunciationConfig = PronunciationAssessmentConfig.FromJson(json_config); - pronunciationConfig.ReferenceText = referenceText; - - pronunciationConfig.EnableProsodyAssessment(); - - // Creates a speech recognizer for the specified language - using (var recognizer = new SpeechRecognizer(config, language, audioConfig)) - { - // Starts recognizing. - pronunciationConfig.ApplyTo(recognizer); - - // Starts speech recognition, and returns after a single utterance is recognized. - // For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead. - var result = await recognizer.RecognizeOnceAsync().ConfigureAwait(false); - - // Checks result. - if (result.Reason == ResultReason.RecognizedSpeech) - { - Console.WriteLine($"RECOGNIZED: Text={result.Text}"); - Console.WriteLine(" PRONUNCIATION ASSESSMENT RESULTS:"); - - var pronunciationResult = PronunciationAssessmentResult.FromResult(result); - Console.WriteLine( - $" Accuracy score: {pronunciationResult.AccuracyScore}, Prosody Score: {pronunciationResult.ProsodyScore}, Pronunciation score: {pronunciationResult.PronunciationScore}, Completeness score : {pronunciationResult.CompletenessScore}, FluencyScore: {pronunciationResult.FluencyScore}"); - - Console.WriteLine(" Word-level details:"); - - foreach (var word in pronunciationResult.Words) - { - Console.WriteLine($" Word: {word.Word}, Accuracy score: {word.AccuracyScore}, Error type: {word.ErrorType}."); - } - } - else if (result.Reason == ResultReason.NoMatch) - { - Console.WriteLine($"NOMATCH: Speech could not be recognized."); - } - else if (result.Reason == ResultReason.Canceled) - { - var cancellation = CancellationDetails.FromResult(result); - Console.WriteLine($"CANCELED: Reason={cancellation.Reason}"); - - if (cancellation.Reason == CancellationReason.Error) - { - Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}"); - Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}"); - Console.WriteLine($"CANCELED: Did you update the subscription info?"); - } - } - } - } - // Speech recognition from a microphone with Microsoft Audio Stack enabled and pre-defined microphone array geometry specified. public static async Task RecognitionFromMicrophoneWithMASEnabledAndPresetGeometrySpecified() {