Skip to content

Commit

Permalink
Add hysteresis to silence detection algorithm (#211)
Browse files Browse the repository at this point in the history
* Add hysteresis to silence detection algorithm

Fixes #195

Signed-off-by: Dave Thaler <[email protected]>

* Address coderabbit feedback

Signed-off-by: Dave Thaler <[email protected]>

---------

Signed-off-by: Dave Thaler <[email protected]>
  • Loading branch information
dthaler authored Nov 30, 2024
1 parent 4283d09 commit d5b49f6
Show file tree
Hide file tree
Showing 4 changed files with 58 additions and 14 deletions.
2 changes: 1 addition & 1 deletion OrcanodeMonitor/Core/Fetcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -908,7 +908,7 @@ public async static Task UpdateManifestTimestampAsync(OrcanodeMonitorContext con
try
{
using Stream stream = await _httpClient.GetStreamAsync(newUri);
node.AudioStreamStatus = await FfmpegCoreAnalyzer.AnalyzeAudioStreamAsync(stream);
node.AudioStreamStatus = await FfmpegCoreAnalyzer.AnalyzeAudioStreamAsync(stream, oldStatus);
node.AudioStandardDeviation = 0.0;
} catch (Exception ex)
{
Expand Down
48 changes: 38 additions & 10 deletions OrcanodeMonitor/Core/FfmpegCoreAnalyzer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,29 @@ namespace OrcanodeMonitor.Core
{
public class FfmpegCoreAnalyzer
{
// We consider anything above this average amplitude as not silence.
const double _defaultMaxSilenceAmplitude = 20.0;
private static double MaxSilenceAmplitude
{
get
{
string? maxSilenceAmplitudeString = Environment.GetEnvironmentVariable("ORCASOUND_MAX_SILENCE_AMPLITUDE");
double maxSilenceAmplitude = double.TryParse(maxSilenceAmplitudeString, out var amplitude) ? amplitude : _defaultMaxSilenceAmplitude;
return maxSilenceAmplitude;
}
}

// We consider anything below this average amplitude as silence.
const double MaxSilenceAmplitude = 17.0;
const double _defaultMinNoiseAmplitude = 15.0;
private static double MinNoiseAmplitude
{
get
{
string? minNoiseAmplitudeString = Environment.GetEnvironmentVariable("ORCASOUND_MIN_NOISE_AMPLITUDE");
double minNoiseAmplitude = double.TryParse(minNoiseAmplitudeString, out var amplitude) ? amplitude : _defaultMinNoiseAmplitude;
return minNoiseAmplitude;
}
}

// Minimum ratio of amplitude outside the hum range to amplitude
// within the hum range. So far the max in a known-unintelligible
Expand All @@ -36,7 +57,7 @@ private static double MinSignalRatio

private static bool IsHumFrequency(double frequency) => (frequency >= MinHumFrequency && frequency <= MaxHumFrequency);

private static OrcanodeOnlineStatus AnalyzeFrequencies(float[] data, int sampleRate)
private static OrcanodeOnlineStatus AnalyzeFrequencies(float[] data, int sampleRate, OrcanodeOnlineStatus oldStatus)
{
int n = data.Length;
Complex[] complexData = data.Select(d => new Complex(d, 0)).ToArray();
Expand All @@ -48,12 +69,18 @@ private static OrcanodeOnlineStatus AnalyzeFrequencies(float[] data, int sampleR
}

double max = amplitudes.Max();
if (max < MaxSilenceAmplitude)
if (max < MinNoiseAmplitude)
{
// File contains mostly silence across all frequencies.
return OrcanodeOnlineStatus.Unintelligible;
}

if ((max <= MaxSilenceAmplitude) && (oldStatus == OrcanodeOnlineStatus.Unintelligible))
{
// In between the min and max unintelligibility range, so keep previous status.
return OrcanodeOnlineStatus.Unintelligible;
}

// Find the maximum amplitude outside the audio hum range.
double maxNonHumAmplitude = 0;
for (int i = 0; i < amplitudes.Length; i++)
Expand Down Expand Up @@ -83,8 +110,9 @@ private static OrcanodeOnlineStatus AnalyzeFrequencies(float[] data, int sampleR
/// Get the status of the most recent audio stream sample.
/// </summary>
/// <param name="args">FFMpeg arguments</param>
/// <param name="oldStatus">Previous online status</param>
/// <returns>Status of the most recent audio samples</returns>
private static async Task<OrcanodeOnlineStatus> AnalyzeAsync(FFMpegArguments args)
private static async Task<OrcanodeOnlineStatus> AnalyzeAsync(FFMpegArguments args, OrcanodeOnlineStatus oldStatus)
{
var outputStream = new MemoryStream(); // Create an output stream (e.g., MemoryStream)
var pipeSink = new StreamPipeSink(outputStream);
Expand Down Expand Up @@ -115,21 +143,21 @@ private static async Task<OrcanodeOnlineStatus> AnalyzeAsync(FFMpegArguments arg
}

// Perform FFT and analyze frequencies
var status = AnalyzeFrequencies(floatBuffer, waveFormat.SampleRate);
var status = AnalyzeFrequencies(floatBuffer, waveFormat.SampleRate, oldStatus);
return status;
}

public static async Task<OrcanodeOnlineStatus> AnalyzeFileAsync(string filename)
public static async Task<OrcanodeOnlineStatus> AnalyzeFileAsync(string filename, OrcanodeOnlineStatus oldStatus)
{
var args = FFMpegArguments.FromFileInput(filename);
return await AnalyzeAsync(args);
return await AnalyzeAsync(args, oldStatus);
}

public static async Task<OrcanodeOnlineStatus> AnalyzeAudioStreamAsync(Stream stream)
public static async Task<OrcanodeOnlineStatus> AnalyzeAudioStreamAsync(Stream stream, OrcanodeOnlineStatus oldStatus)
{
StreamPipeSource streamPipeSource = new StreamPipeSource(stream);
var args = FFMpegArguments.FromPipeInput(streamPipeSource);
return await AnalyzeAsync(args);
return await AnalyzeAsync(args, oldStatus);
}
}
}
}
18 changes: 15 additions & 3 deletions Test/UnintelligibilityTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ namespace Test
[TestClass]
public class UnintelligibilityTests
{
private async Task TestSampleAsync(string filename, OrcanodeOnlineStatus expected_status)
private async Task TestSampleAsync(string filename, OrcanodeOnlineStatus expected_status, OrcanodeOnlineStatus? oldStatus = null)
{
// Get the current directory (where the test assembly is located)
string currentDirectory = Directory.GetCurrentDirectory();
Expand All @@ -23,7 +23,8 @@ private async Task TestSampleAsync(string filename, OrcanodeOnlineStatus expecte
string filePath = Path.Combine(rootDirectory, "Test\\samples", filename);
try
{
OrcanodeOnlineStatus status = await FfmpegCoreAnalyzer.AnalyzeFileAsync(filePath);
OrcanodeOnlineStatus previousStatus = oldStatus ?? expected_status;
OrcanodeOnlineStatus status = await FfmpegCoreAnalyzer.AnalyzeFileAsync(filePath, previousStatus);
Assert.IsTrue(status == expected_status);
}
catch (Exception ex)
Expand Down Expand Up @@ -54,9 +55,20 @@ public async Task TestNormalSample()
await TestSampleAsync("normal\\live385.ts", OrcanodeOnlineStatus.Online);
await TestSampleAsync("normal\\live839.ts", OrcanodeOnlineStatus.Online);
await TestSampleAsync("normal\\live1184.ts", OrcanodeOnlineStatus.Online);
}

[TestMethod]
public async Task TestHysteresisBehavior()
{
// Bush Point file from arond 5pm 11/18/2024 is relatively quiet (max amplitude 17.46).
await TestSampleAsync("normal\\live6079.ts", OrcanodeOnlineStatus.Online);
// Test state retention when transitioning from Online to borderline Unintelligible.
await TestSampleAsync("normal/live6079.ts", OrcanodeOnlineStatus.Online, OrcanodeOnlineStatus.Online);

// Test state retention when transitioning from Unintelligible to borderline Online.
await TestSampleAsync("normal/live6079.ts", OrcanodeOnlineStatus.Unintelligible, OrcanodeOnlineStatus.Unintelligible);

// Test clear state changes (should override hysteresis).
await TestSampleAsync("unintelligible/live4869.ts", OrcanodeOnlineStatus.Unintelligible, OrcanodeOnlineStatus.Online);
}
}
}
4 changes: 4 additions & 0 deletions docs/Design.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ The following state will be stored per orcanode:

**ORCASOUND_MIN_INTELLIGIBLE_SIGNAL_PERCENT**: The minimum percentage of max amplitude across all frequencies of amplitude outside the 50-60 Hz range needed to determine that an audio stream is intelligible. Default: 30

**ORCASOUND_MAX_SILENCE_AMPLITUDE**: The maximum amplitude at which an stream stream might still be considered unintelligible due to silence. Default: 20

**ORCASOUND_MIN_NOISE_AMPLITUDE**: The minimum amplitude at which an stream stream might still be considered intelligible. Default: 15

**MEZMO_LOG_SECONDS**: The number of seconds of Mezmo logs to check for activity. Default: 60

## Web page front end
Expand Down

0 comments on commit d5b49f6

Please sign in to comment.