Skip to content

Commit

Permalink
[PronScore][csharp] enable MAS with PA (#2626)
Browse files Browse the repository at this point in the history
* feat(pronscore): enable MAS with PA

* fix(log): remove logging

---------

Co-authored-by: Ke WANG <[email protected]>
  • Loading branch information
wangkenpu and Ke WANG authored Oct 12, 2024
1 parent 4feb22e commit 4028dbc
Show file tree
Hide file tree
Showing 2 changed files with 147 additions and 68 deletions.
5 changes: 5 additions & 0 deletions samples/csharp/sharedcontent/console/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -802,6 +802,7 @@ private static void PronunciationAssessment()
Console.WriteLine(" 3. Pronunciation assessment configured with json.");
Console.WriteLine(" 4. Pronunciation assessment continuous with file.");
Console.WriteLine(" 5. Pronunciation assessment with content assessment.");
Console.WriteLine(" 6. Pronunciation assessment with Microsoft Audio Stack.");
Console.WriteLine("");
Console.Write(prompt);

Expand Down Expand Up @@ -831,6 +832,10 @@ private static void PronunciationAssessment()
case ConsoleKey.NumPad5:
SpeechRecognitionSamples.PronunciationAssessmentWithContentAssessment().Wait();
break;
case ConsoleKey.D6:
case ConsoleKey.NumPad6:
SpeechRecognitionSamples.PronunciationAssessmentWithMas().Wait();
break;
case ConsoleKey.D0:
case ConsoleKey.NumPad0:
Console.WriteLine(back);
Expand Down
210 changes: 142 additions & 68 deletions samples/csharp/sharedcontent/console/speech_recognition_samples.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1117,6 +1117,74 @@ private static async Task PronunciationAssessmentWithStreamInternalAsync(SpeechC
}
}

// Pronunciation assessment configured with json
// See more information at https://aka.ms/csspeech/pa
public static async Task PronunciationAssessmentConfiguredWithJson()
{
// Creates an instance of a speech config with specified subscription key and service region.
// Replace with your own subscription key and service region (e.g., "westus").
var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion");

// Replace the language with your language in BCP-47 format, e.g., en-US.
var language = "en-US";

// Creates an instance of audio config from an audio file
var audioConfig = AudioConfig.FromWavFileInput(@"whatstheweatherlike.wav");

var referenceText = "what's the weather like";
// create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
string json_config = "{\"GradingSystem\":\"HundredMark\",\"Granularity\":\"Phoneme\",\"EnableMiscue\":true, \"ScenarioId\":\"\"}";
var pronunciationConfig = PronunciationAssessmentConfig.FromJson(json_config);
pronunciationConfig.ReferenceText = referenceText;

pronunciationConfig.EnableProsodyAssessment();

// Creates a speech recognizer for the specified language
using (var recognizer = new SpeechRecognizer(config, language, audioConfig))
{
// Starts recognizing.
pronunciationConfig.ApplyTo(recognizer);

// Starts speech recognition, and returns after a single utterance is recognized.
// For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead.
var result = await recognizer.RecognizeOnceAsync().ConfigureAwait(false);

// Checks result.
if (result.Reason == ResultReason.RecognizedSpeech)
{
Console.WriteLine($"RECOGNIZED: Text={result.Text}");
Console.WriteLine(" PRONUNCIATION ASSESSMENT RESULTS:");

var pronunciationResult = PronunciationAssessmentResult.FromResult(result);
Console.WriteLine(
$" Accuracy score: {pronunciationResult.AccuracyScore}, Prosody Score: {pronunciationResult.ProsodyScore}, Pronunciation score: {pronunciationResult.PronunciationScore}, Completeness score : {pronunciationResult.CompletenessScore}, FluencyScore: {pronunciationResult.FluencyScore}");

Console.WriteLine(" Word-level details:");

foreach (var word in pronunciationResult.Words)
{
Console.WriteLine($" Word: {word.Word}, Accuracy score: {word.AccuracyScore}, Error type: {word.ErrorType}.");
}
}
else if (result.Reason == ResultReason.NoMatch)
{
Console.WriteLine($"NOMATCH: Speech could not be recognized.");
}
else if (result.Reason == ResultReason.Canceled)
{
var cancellation = CancellationDetails.FromResult(result);
Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");

if (cancellation.Reason == CancellationReason.Error)
{
Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
Console.WriteLine($"CANCELED: Did you update the subscription info?");
}
}
}
}

// Pronunciation assessment continous from file
// See more information at https://aka.ms/csspeech/pa
public static async Task PronunciationAssessmentContinuousWithFile()
Expand Down Expand Up @@ -1351,6 +1419,80 @@ public static async Task PronunciationAssessmentWithContentAssessment()
}
}

// Pronunciation assessment with Microsoft Audio Stack (MAS) enabled
// See more information at https://aka.ms/csspeech/pa
public static async Task PronunciationAssessmentWithMas()
{
// Creates an instance of a speech config with specified subscription key and service region.
// Replace with your own subscription key and service region (e.g., "westus").
var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion");

// Replace the language with your language in BCP-47 format, e.g., en-US.
var language = "en-US";

// Creates an instance of audio processing options with the default settings
var audioProcessingOptions = AudioProcessingOptions.Create(
AudioProcessingConstants.AUDIO_INPUT_PROCESSING_DISABLE_ECHO_CANCELLATION |
AudioProcessingConstants.AUDIO_INPUT_PROCESSING_ENABLE_DEFAULT,
PresetMicrophoneArrayGeometry.Mono);

// Creates an instance of audio config from an audio file
var audioConfig = AudioConfig.FromWavFileInput(@"whatstheweatherlike.wav", audioProcessingOptions);

var referenceText = "what's the weather like";

// Create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
var pronunciationConfig = new PronunciationAssessmentConfig(referenceText, GradingSystem.HundredMark, Granularity.Phoneme, enableMiscue: true);

// Enable prosody assessment
pronunciationConfig.EnableProsodyAssessment();

// Creates a speech recognizer for the specified language
using (var recognizer = new SpeechRecognizer(config, language, audioConfig))
{
// Starts recognizing.
pronunciationConfig.ApplyTo(recognizer);

// Starts speech recognition, and returns after a single utterance is recognized.
// For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead.
var result = await recognizer.RecognizeOnceAsync().ConfigureAwait(false);

// Checks result.
if (result.Reason == ResultReason.RecognizedSpeech)
{
Console.WriteLine($"RECOGNIZED: Text={result.Text}");
Console.WriteLine(" PRONUNCIATION ASSESSMENT RESULTS:");

var pronunciationResult = PronunciationAssessmentResult.FromResult(result);
Console.WriteLine(
$" Accuracy score: {pronunciationResult.AccuracyScore}, Prosody Score: {pronunciationResult.ProsodyScore}, Pronunciation score: {pronunciationResult.PronunciationScore}, Completeness score : {pronunciationResult.CompletenessScore}, FluencyScore: {pronunciationResult.FluencyScore}");

Console.WriteLine(" Word-level details:");

foreach (var word in pronunciationResult.Words)
{
Console.WriteLine($" Word: {word.Word}, Accuracy score: {word.AccuracyScore}, Error type: {word.ErrorType}.");
}
}
else if (result.Reason == ResultReason.NoMatch)
{
Console.WriteLine($"NOMATCH: Speech could not be recognized.");
}
else if (result.Reason == ResultReason.Canceled)
{
var cancellation = CancellationDetails.FromResult(result);
Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");

if (cancellation.Reason == CancellationReason.Error)
{
Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
Console.WriteLine($"CANCELED: Did you update the subscription info?");
}
}
}
}

private static async Task<RecognitionResult> RecognizeOnceAsyncInternal(string key, string region)
{
RecognitionResult recognitionResult = null;
Expand Down Expand Up @@ -1463,74 +1605,6 @@ public static async Task ContinuousRecognitionFromDefaultMicrophoneWithMASEnable
}
}

// Pronunciation assessment configured with json
// See more information at https://aka.ms/csspeech/pa
public static async Task PronunciationAssessmentConfiguredWithJson()
{
// Creates an instance of a speech config with specified subscription key and service region.
// Replace with your own subscription key and service region (e.g., "westus").
var config = SpeechConfig.FromSubscription("YourSubscriptionKey", "YourServiceRegion");

// Replace the language with your language in BCP-47 format, e.g., en-US.
var language = "en-US";

// Creates an instance of audio config from an audio file
var audioConfig = AudioConfig.FromWavFileInput(@"whatstheweatherlike.wav");

var referenceText = "what's the weather like";
// create pronunciation assessment config, set grading system, granularity and if enable miscue based on your requirement.
string json_config = "{\"GradingSystem\":\"HundredMark\",\"Granularity\":\"Phoneme\",\"EnableMiscue\":true, \"ScenarioId\":\"[scenario ID will be assigned by product team]\"}";
var pronunciationConfig = PronunciationAssessmentConfig.FromJson(json_config);
pronunciationConfig.ReferenceText = referenceText;

pronunciationConfig.EnableProsodyAssessment();

// Creates a speech recognizer for the specified language
using (var recognizer = new SpeechRecognizer(config, language, audioConfig))
{
// Starts recognizing.
pronunciationConfig.ApplyTo(recognizer);

// Starts speech recognition, and returns after a single utterance is recognized.
// For long-running multi-utterance recognition, use StartContinuousRecognitionAsync() instead.
var result = await recognizer.RecognizeOnceAsync().ConfigureAwait(false);

// Checks result.
if (result.Reason == ResultReason.RecognizedSpeech)
{
Console.WriteLine($"RECOGNIZED: Text={result.Text}");
Console.WriteLine(" PRONUNCIATION ASSESSMENT RESULTS:");

var pronunciationResult = PronunciationAssessmentResult.FromResult(result);
Console.WriteLine(
$" Accuracy score: {pronunciationResult.AccuracyScore}, Prosody Score: {pronunciationResult.ProsodyScore}, Pronunciation score: {pronunciationResult.PronunciationScore}, Completeness score : {pronunciationResult.CompletenessScore}, FluencyScore: {pronunciationResult.FluencyScore}");

Console.WriteLine(" Word-level details:");

foreach (var word in pronunciationResult.Words)
{
Console.WriteLine($" Word: {word.Word}, Accuracy score: {word.AccuracyScore}, Error type: {word.ErrorType}.");
}
}
else if (result.Reason == ResultReason.NoMatch)
{
Console.WriteLine($"NOMATCH: Speech could not be recognized.");
}
else if (result.Reason == ResultReason.Canceled)
{
var cancellation = CancellationDetails.FromResult(result);
Console.WriteLine($"CANCELED: Reason={cancellation.Reason}");

if (cancellation.Reason == CancellationReason.Error)
{
Console.WriteLine($"CANCELED: ErrorCode={cancellation.ErrorCode}");
Console.WriteLine($"CANCELED: ErrorDetails={cancellation.ErrorDetails}");
Console.WriteLine($"CANCELED: Did you update the subscription info?");
}
}
}
}

// Speech recognition from a microphone with Microsoft Audio Stack enabled and pre-defined microphone array geometry specified.
public static async Task RecognitionFromMicrophoneWithMASEnabledAndPresetGeometrySpecified()
{
Expand Down

0 comments on commit 4028dbc

Please sign in to comment.