Skip to content

Commit 8571fcb

Browse files
authored
Robustify speech recognition test (#76652)
* robustify * more improvements * Update src/libraries/System.Speech/tests/SynthesizeRecognizeTests.cs * Apply suggestions from code review
1 parent 4aa4d28 commit 8571fcb

File tree

1 file changed

+112
-19
lines changed

1 file changed

+112
-19
lines changed

src/libraries/System.Speech/tests/SynthesizeRecognizeTests.cs

Lines changed: 112 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
using System.Threading;
1515
using System.Xml;
1616
using Xunit;
17+
using Xunit.Abstractions;
1718

1819
namespace SampleSynthesisTests
1920
{
@@ -26,33 +27,125 @@ public class SynthesizeRecognizeTests : FileCleanupTestBase
2627
PlatformDetection.IsNotWindowsNanoNorServerCore &&
2728
SpeechRecognitionEngine.InstalledRecognizers().Count > 0;
2829

30+
private ITestOutputHelper _output;
31+
32+
public SynthesizeRecognizeTests(ITestOutputHelper output)
33+
{
34+
_output = output;
35+
}
36+
2937
[ConditionalFact(nameof(HasInstalledRecognizers))]
30-
public void SpeechSynthesizerToSpeechRecognitionEngine()
38+
public void SpeechSynthesizerToSpeechRecognitionEngine1()
3139
{
32-
if (Thread.CurrentThread.CurrentCulture.ToString() != "en-US")
33-
return;
40+
// word chosen to be recognized with high confidence
41+
SpeechSynthesizerToSpeechRecognitionEngine_Core("recognize", "recognize");
42+
}
3443

35-
using var ms = new MemoryStream();
44+
[ConditionalFact(nameof(HasInstalledRecognizers))]
45+
public void SpeechSynthesizerToSpeechRecognitionEngine2()
46+
{
47+
// word chosen to be recognized with high confidence
48+
SpeechSynthesizerToSpeechRecognitionEngine_Core("apple", "apple");
49+
}
3650

37-
using (var synth = new SpeechSynthesizer())
38-
{
39-
synth.SetOutputToWaveStream(ms);
40-
var prompt = new Prompt("synthesizer");
41-
synth.Speak(prompt);
42-
}
51+
[ConditionalFact(nameof(HasInstalledRecognizers))]
52+
public void SpeechSynthesizerToSpeechRecognitionEngine_SilenceFails()
53+
{
54+
SpeechSynthesizerToSpeechRecognitionEngine_Core(" ", null);
55+
}
4356

44-
ms.Position = 0;
57+
private void SpeechSynthesizerToSpeechRecognitionEngine_Core(string input, string output)
58+
{
59+
if (PlatformDetection.IsWindows7 && PlatformDetection.IsX86Process)
60+
return; // Flaky on this configuration
4561

46-
using (var rec = new SpeechRecognitionEngine())
62+
RetryHelper.Execute(() => // Flaky in some cases
4763
{
48-
rec.LoadGrammar(new DictationGrammar());
49-
rec.SetInputToWaveStream(ms);
50-
RecognitionResult result = rec.Recognize();
64+
if (Thread.CurrentThread.CurrentCulture.ToString() != "en-US")
65+
return;
5166

52-
Assert.True(result.Confidence > 0.1);
53-
// handles "synthesizer", "synthesizes", etc.
54-
Assert.StartsWith("synthe", result.Text, StringComparison.OrdinalIgnoreCase);
55-
}
67+
using var ms = new MemoryStream();
68+
69+
using (var synth = new SpeechSynthesizer())
70+
{
71+
synth.SetOutputToWaveStream(ms);
72+
var prompt = new Prompt(input);
73+
synth.Speak(prompt);
74+
}
75+
76+
ms.Position = 0;
77+
78+
using (var rec = new SpeechRecognitionEngine())
79+
{
80+
Stopwatch sw = new();
81+
rec.LoadGrammar(new DictationGrammar());
82+
rec.SetInputToWaveStream(ms);
83+
rec.InitialSilenceTimeout = TimeSpan.FromSeconds(60); // for slow machines
84+
rec.BabbleTimeout = TimeSpan.FromSeconds(60); // for slow machines/robustness
85+
86+
StringBuilder diagnostics = new();
87+
diagnostics.AppendLine($"Passing synthesized input '{input}'");
88+
try
89+
{
90+
rec.SpeechDetected += (o, args) =>
91+
{
92+
diagnostics.AppendLine($"Speech detected at position {args.AudioPosition}");
93+
};
94+
95+
rec.SpeechRecognitionRejected += (o, args) =>
96+
{
97+
if (output != null)
98+
{
99+
foreach (RecognizedPhrase phrase in args.Result.Alternates)
100+
{
101+
diagnostics.AppendLine($"Alternatives included '{phrase.Text}' with confidence {phrase.Confidence}");
102+
}
103+
diagnostics.Append($"Elapsed {sw.Elapsed}");
104+
Assert.Fail($"Recognition of '{input}' was expected to produce a string containing '{output}', but failed");
105+
}
106+
};
107+
108+
RecognitionResult argsResult = null;
109+
rec.SpeechRecognized += (o, args) =>
110+
{
111+
argsResult = args.Result;
112+
diagnostics.AppendLine($"Received speech recognized event with result '{args.Result.Text}'");
113+
};
114+
115+
sw.Start();
116+
RecognitionResult result = rec.Recognize();
117+
sw.Stop();
118+
119+
Assert.Equal(argsResult, result);
120+
121+
if (output == null)
122+
{
123+
Assert.Null(result);
124+
}
125+
else
126+
{
127+
Assert.NotNull(result);
128+
diagnostics.AppendLine($"Recognized '{result.Text}' with confidence {result.Confidence}");
129+
diagnostics.AppendLine($"Elapsed {sw.Elapsed}");
130+
131+
foreach (RecognizedPhrase phrase in result.Alternates)
132+
{
133+
diagnostics.AppendLine($"Alternatives included '{phrase.Text}' with confidence {phrase.Confidence}");
134+
}
135+
136+
Assert.True(result.Confidence > 0.1); // strings we use are normally > 0.8
137+
138+
// Use Contains as sometimes we get garbage on the end, eg., "recognize" can be "recognized" or "a recognize"
139+
Assert.Contains(output, result.Text, StringComparison.OrdinalIgnoreCase);
140+
}
141+
}
142+
catch
143+
{
144+
_output.WriteLine(diagnostics.ToString());
145+
throw;
146+
}
147+
}
148+
});
56149
}
57150

58151
[ConditionalFact(nameof(HasInstalledRecognizers))]

0 commit comments

Comments
 (0)