From 3ab52299bc497ff2ab6639c0fc33b2c87288fe53 Mon Sep 17 00:00:00 2001 From: MediaPipe Team Date: Mon, 13 May 2024 05:58:55 -0700 Subject: [PATCH] No public description PiperOrigin-RevId: 633181944 --- .../audio/spectrogram_calculator.cc | 9 ++- .../audio/spectrogram_calculator.proto | 4 + .../audio/spectrogram_calculator_test.cc | 27 ++++++- third_party/com_google_audio_tools_fixes.diff | 76 +++++++++++++++++++ 4 files changed, 113 insertions(+), 3 deletions(-) diff --git a/mediapipe/calculators/audio/spectrogram_calculator.cc b/mediapipe/calculators/audio/spectrogram_calculator.cc index ac32100061..8ff8c83fde 100644 --- a/mediapipe/calculators/audio/spectrogram_calculator.cc +++ b/mediapipe/calculators/audio/spectrogram_calculator.cc @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "absl/strings/string_view.h" @@ -293,11 +294,17 @@ absl::Status SpectrogramCalculator::Open(CalculatorContext* cc) { window_fun->GetPeriodicSamples(frame_duration_samples_, &window); // Propagate settings down to the actual Spectrogram object. + std::optional fft_size; + if (spectrogram_options.fft_size() > 0) { + fft_size = spectrogram_options.fft_size(); + } + spectrogram_generators_.clear(); for (int i = 0; i < num_input_channels_; i++) { spectrogram_generators_.push_back( std::unique_ptr(new audio_dsp::Spectrogram())); - spectrogram_generators_[i]->Initialize(window, frame_step_samples()); + spectrogram_generators_[i]->Initialize(window, frame_step_samples(), + fft_size); } num_output_channels_ = diff --git a/mediapipe/calculators/audio/spectrogram_calculator.proto b/mediapipe/calculators/audio/spectrogram_calculator.proto index ac7181f4f0..68eaf0c483 100644 --- a/mediapipe/calculators/audio/spectrogram_calculator.proto +++ b/mediapipe/calculators/audio/spectrogram_calculator.proto @@ -83,4 +83,8 @@ message SpectrogramCalculatorOptions { // the cumulative timestamping, which is inferred from the initial input // timestamp and the cumulative number of samples. optional bool use_local_timestamp = 8 [default = false]; + + // Defines a fixed FFT size. If set to 0, the FFT size will be determined + // based on the frame duration and sample rate. + optional int32 fft_size = 9 [default = 0]; } diff --git a/mediapipe/calculators/audio/spectrogram_calculator_test.cc b/mediapipe/calculators/audio/spectrogram_calculator_test.cc index 0e5f5db189..8dc48548f7 100644 --- a/mediapipe/calculators/audio/spectrogram_calculator_test.cc +++ b/mediapipe/calculators/audio/spectrogram_calculator_test.cc @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -176,8 +177,10 @@ class SpectrogramCalculatorTest } // Checks output headers and Timestamps. - void CheckOutputHeadersAndTimestamps() { - const int fft_size = audio_dsp::NextPowerOfTwo(frame_duration_samples_); + void CheckOutputHeadersAndTimestamps( + std::optional fft_size_override = std::nullopt) { + const int fft_size = fft_size_override.value_or( + audio_dsp::NextPowerOfTwo(frame_duration_samples_)); TimeSeriesHeader expected_header = input().header.Get(); expected_header.set_num_channels(fft_size / 2 + 1); @@ -310,6 +313,26 @@ TEST_F(SpectrogramCalculatorTest, IntegerFrameDurationNoOverlap) { EXPECT_EQ(OutputFramesPerPacket(), expected_output_packet_sizes); } +TEST_F(SpectrogramCalculatorTest, IntegerFrameDurationNoOverlap2XFftSize) { + options_.set_frame_duration_seconds(100.0 / input_sample_rate_); + options_.set_frame_overlap_seconds(0.0 / input_sample_rate_); + options_.set_pad_final_packet(false); + constexpr int kFFtSize = 512; + options_.set_fft_size(kFFtSize); + + const std::vector input_packet_sizes = {500, 200}; + const std::vector expected_output_packet_sizes = {5, 2}; + + InitializeGraph(); + FillInputHeader(); + SetupConstantInputPackets(input_packet_sizes); + + MP_ASSERT_OK(Run()); + + CheckOutputHeadersAndTimestamps(kFFtSize); + EXPECT_EQ(OutputFramesPerPacket(), expected_output_packet_sizes); +} + TEST_F(SpectrogramCalculatorTest, IntegerFrameDurationSomeOverlap) { options_.set_frame_duration_seconds(100.0 / input_sample_rate_); options_.set_frame_overlap_seconds(60.0 / input_sample_rate_); diff --git a/third_party/com_google_audio_tools_fixes.diff b/third_party/com_google_audio_tools_fixes.diff index c0431de72e..4cd0f78088 100644 --- a/third_party/com_google_audio_tools_fixes.diff +++ b/third_party/com_google_audio_tools_fixes.diff @@ -34,6 +34,82 @@ index 56e45d2..37ab6e9 100644 #include #include #include +diff --git a/audio/dsp/spectrogram/spectrogram.cc b/audio/dsp/spectrogram/spectrogram.cc +index e52280d..13c45d0 100644 +--- a/audio/dsp/spectrogram/spectrogram.cc ++++ b/audio/dsp/spectrogram/spectrogram.cc +@@ -18,6 +18,7 @@ + #include "audio/dsp/spectrogram/spectrogram.h" + + #include ++#include + + #include "audio/dsp/number_util.h" + #include "audio/dsp/window_functions.h" +@@ -38,14 +39,15 @@ bool Spectrogram::ResetSampleBuffer() { + return true; + } + +-bool Spectrogram::Initialize(int window_length, int step_length) { ++bool Spectrogram::Initialize(int window_length, int step_length, ++ std::optional fft_length) { + std::vector window; + HannWindow().GetPeriodicSamples(window_length, &window); +- return Initialize(window, step_length); ++ return Initialize(window, step_length, fft_length); + } + +-bool Spectrogram::Initialize(const std::vector& window, +- int step_length) { ++bool Spectrogram::Initialize(const std::vector& window, int step_length, ++ std::optional fft_length) { + window_length_ = window.size(); + window_ = window; // Copy window. + if (window_length_ < 2) { +@@ -61,7 +63,12 @@ bool Spectrogram::Initialize(const std::vector& window, + return false; + } + +- fft_length_ = NextPowerOfTwo(window_length_); ++ if (fft_length.has_value() && !IsPowerOfTwoOrZero(fft_length.value())) { ++ LOG(ERROR) << "FFT length must be a power of two."; ++ initialized_ = false; ++ return false; ++ } ++ fft_length_ = fft_length.value_or(NextPowerOfTwo(window_length_)); + CHECK(fft_length_ >= window_length_); + output_frequency_channels_ = 1 + fft_length_ / 2; + +diff --git a/audio/dsp/spectrogram/spectrogram.h b/audio/dsp/spectrogram/spectrogram.h +index 1214422..0f6ada6 100644 +--- a/audio/dsp/spectrogram/spectrogram.h ++++ b/audio/dsp/spectrogram/spectrogram.h +@@ -36,6 +36,7 @@ + #define AUDIO_DSP_SPECTROGRAM_SPECTROGRAM_H_ + + #include ++#include + #include + #include + +@@ -57,11 +58,14 @@ class Spectrogram { + // (both in samples). Internally a Hann window is used as the window + // function. Returns true on success, after which calls to Process() + // are possible. window_length must be greater than 1 and step +- // length must be greater than 0. +- bool Initialize(int window_length, int step_length); ++ // length must be greater than 0. fft_length defines the fft length which must ++ // be greater than window_length and a power of 2. ++ bool Initialize(int window_length, int step_length, ++ std::optional fft_length = std::nullopt); + + // Initialize with an explicit window instead of a length. +- bool Initialize(const vector& window, int step_length); ++ bool Initialize(const std::vector& window, int step_length, ++ std::optional fft_length = std::nullopt); + + // Re-initializes/resets the internal sample buffer to the state before any + // samples have been passed to the Compute methods. diff --git a/third_party/eigen3/BUILD b/third_party/eigen3/BUILD index 497c1f0..de1c7f4 100644 --- a/third_party/eigen3/BUILD