Skip to content

Commit

Permalink
No public description
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 633181944
  • Loading branch information
MediaPipe Team authored and copybara-github committed May 13, 2024
1 parent ededd21 commit 3ab5229
Show file tree
Hide file tree
Showing 4 changed files with 113 additions and 3 deletions.
9 changes: 8 additions & 1 deletion mediapipe/calculators/audio/spectrogram_calculator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <complex>
#include <deque>
#include <memory>
#include <optional>
#include <string>

#include "absl/strings/string_view.h"
Expand Down Expand Up @@ -293,11 +294,17 @@ absl::Status SpectrogramCalculator::Open(CalculatorContext* cc) {
window_fun->GetPeriodicSamples(frame_duration_samples_, &window);

// Propagate settings down to the actual Spectrogram object.
std::optional<int> fft_size;
if (spectrogram_options.fft_size() > 0) {
fft_size = spectrogram_options.fft_size();
}

spectrogram_generators_.clear();
for (int i = 0; i < num_input_channels_; i++) {
spectrogram_generators_.push_back(
std::unique_ptr<audio_dsp::Spectrogram>(new audio_dsp::Spectrogram()));
spectrogram_generators_[i]->Initialize(window, frame_step_samples());
spectrogram_generators_[i]->Initialize(window, frame_step_samples(),
fft_size);
}

num_output_channels_ =
Expand Down
4 changes: 4 additions & 0 deletions mediapipe/calculators/audio/spectrogram_calculator.proto
Original file line number Diff line number Diff line change
Expand Up @@ -83,4 +83,8 @@ message SpectrogramCalculatorOptions {
// the cumulative timestamping, which is inferred from the initial input
// timestamp and the cumulative number of samples.
optional bool use_local_timestamp = 8 [default = false];

// Defines a fixed FFT size. If set to 0, the FFT size will be determined
// based on the frame duration and sample rate.
optional int32 fft_size = 9 [default = 0];
}
27 changes: 25 additions & 2 deletions mediapipe/calculators/audio/spectrogram_calculator_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <cstdint>
#include <memory>
#include <numeric>
#include <optional>
#include <string>
#include <vector>

Expand Down Expand Up @@ -176,8 +177,10 @@ class SpectrogramCalculatorTest
}

// Checks output headers and Timestamps.
void CheckOutputHeadersAndTimestamps() {
const int fft_size = audio_dsp::NextPowerOfTwo(frame_duration_samples_);
void CheckOutputHeadersAndTimestamps(
std::optional<int> fft_size_override = std::nullopt) {
const int fft_size = fft_size_override.value_or(
audio_dsp::NextPowerOfTwo(frame_duration_samples_));

TimeSeriesHeader expected_header = input().header.Get<TimeSeriesHeader>();
expected_header.set_num_channels(fft_size / 2 + 1);
Expand Down Expand Up @@ -310,6 +313,26 @@ TEST_F(SpectrogramCalculatorTest, IntegerFrameDurationNoOverlap) {
EXPECT_EQ(OutputFramesPerPacket(), expected_output_packet_sizes);
}

TEST_F(SpectrogramCalculatorTest, IntegerFrameDurationNoOverlap2XFftSize) {
options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
options_.set_frame_overlap_seconds(0.0 / input_sample_rate_);
options_.set_pad_final_packet(false);
constexpr int kFFtSize = 512;
options_.set_fft_size(kFFtSize);

const std::vector<int> input_packet_sizes = {500, 200};
const std::vector<int> expected_output_packet_sizes = {5, 2};

InitializeGraph();
FillInputHeader();
SetupConstantInputPackets(input_packet_sizes);

MP_ASSERT_OK(Run());

CheckOutputHeadersAndTimestamps(kFFtSize);
EXPECT_EQ(OutputFramesPerPacket(), expected_output_packet_sizes);
}

TEST_F(SpectrogramCalculatorTest, IntegerFrameDurationSomeOverlap) {
options_.set_frame_duration_seconds(100.0 / input_sample_rate_);
options_.set_frame_overlap_seconds(60.0 / input_sample_rate_);
Expand Down
76 changes: 76 additions & 0 deletions third_party/com_google_audio_tools_fixes.diff
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,82 @@ index 56e45d2..37ab6e9 100644
#include <iostream>
#include <cmath>
#include <string>
diff --git a/audio/dsp/spectrogram/spectrogram.cc b/audio/dsp/spectrogram/spectrogram.cc
index e52280d..13c45d0 100644
--- a/audio/dsp/spectrogram/spectrogram.cc
+++ b/audio/dsp/spectrogram/spectrogram.cc
@@ -18,6 +18,7 @@
#include "audio/dsp/spectrogram/spectrogram.h"

#include <math.h>
+#include <optional>

#include "audio/dsp/number_util.h"
#include "audio/dsp/window_functions.h"
@@ -38,14 +39,15 @@ bool Spectrogram::ResetSampleBuffer() {
return true;
}

-bool Spectrogram::Initialize(int window_length, int step_length) {
+bool Spectrogram::Initialize(int window_length, int step_length,
+ std::optional<int> fft_length) {
std::vector<double> window;
HannWindow().GetPeriodicSamples(window_length, &window);
- return Initialize(window, step_length);
+ return Initialize(window, step_length, fft_length);
}

-bool Spectrogram::Initialize(const std::vector<double>& window,
- int step_length) {
+bool Spectrogram::Initialize(const std::vector<double>& window, int step_length,
+ std::optional<int> fft_length) {
window_length_ = window.size();
window_ = window; // Copy window.
if (window_length_ < 2) {
@@ -61,7 +63,12 @@ bool Spectrogram::Initialize(const std::vector<double>& window,
return false;
}

- fft_length_ = NextPowerOfTwo(window_length_);
+ if (fft_length.has_value() && !IsPowerOfTwoOrZero(fft_length.value())) {
+ LOG(ERROR) << "FFT length must be a power of two.";
+ initialized_ = false;
+ return false;
+ }
+ fft_length_ = fft_length.value_or(NextPowerOfTwo(window_length_));
CHECK(fft_length_ >= window_length_);
output_frequency_channels_ = 1 + fft_length_ / 2;

diff --git a/audio/dsp/spectrogram/spectrogram.h b/audio/dsp/spectrogram/spectrogram.h
index 1214422..0f6ada6 100644
--- a/audio/dsp/spectrogram/spectrogram.h
+++ b/audio/dsp/spectrogram/spectrogram.h
@@ -36,6 +36,7 @@
#define AUDIO_DSP_SPECTROGRAM_SPECTROGRAM_H_

#include <complex>
+#include <optional>
#include <deque>
#include <vector>

@@ -57,11 +58,14 @@ class Spectrogram {
// (both in samples). Internally a Hann window is used as the window
// function. Returns true on success, after which calls to Process()
// are possible. window_length must be greater than 1 and step
- // length must be greater than 0.
- bool Initialize(int window_length, int step_length);
+ // length must be greater than 0. fft_length defines the fft length which must
+ // be greater than window_length and a power of 2.
+ bool Initialize(int window_length, int step_length,
+ std::optional<int> fft_length = std::nullopt);

// Initialize with an explicit window instead of a length.
- bool Initialize(const vector<double>& window, int step_length);
+ bool Initialize(const std::vector<double>& window, int step_length,
+ std::optional<int> fft_length = std::nullopt);

// Re-initializes/resets the internal sample buffer to the state before any
// samples have been passed to the Compute methods.
diff --git a/third_party/eigen3/BUILD b/third_party/eigen3/BUILD
index 497c1f0..de1c7f4 100644
--- a/third_party/eigen3/BUILD
Expand Down

0 comments on commit 3ab5229

Please sign in to comment.