Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add new algo audio2midi #1437

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 143 additions & 0 deletions src/algorithms/tonal/audio2midi.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
#include "audio2midi.h"

using namespace std;
using namespace essentia;
using namespace standard;

const char *Audio2Midi::name = "Audio2Midi";
const char *Audio2Midi::category = "Pitch";
const char *Audio2Midi::description = DOC("Wrapper around Audio2Pitch and Pitch2Midi for real time application");
xaviliz marked this conversation as resolved.
Show resolved Hide resolved

void Audio2Midi::configure()
{
_sampleRate = parameter("sampleRate").toReal();
// _frameSize = parameter("frameSize").toInt();
_hopSize = parameter("hopSize").toInt();
// _pitchAlgorithm = parameter("pitchAlgorithm").toString();
// _loudnessAlgorithm = parameter("loudnessAlgorithm").toString();
_minFrequency = parameter("minFrequency").toReal();
_maxFrequency = parameter("maxFrequency").toReal();
_tuningFrequency = parameter("tuningFrequency").toInt();
_pitchConfidenceThreshold = parameter("pitchConfidenceThreshold").toReal();
_loudnessThreshold = parameter("loudnessThreshold").toReal();
_transposition = parameter("transpositionAmount").toInt();
_minOccurrenceRate = parameter("minOccurrenceRate").toReal();
_midiBufferDuration = parameter("midiBufferDuration").toReal();
_minNoteChangePeriod = parameter("minNoteChangePeriod").toReal();
_minOnsetCheckPeriod = parameter("minOnsetCheckPeriod").toReal();
_minOffsetCheckPeriod = parameter("minOffsetCheckPeriod").toReal();

// define frameSize depending on sampleRate
xaviliz marked this conversation as resolved.
Show resolved Hide resolved
switch (static_cast<int>(_sampleRate)) {
case 16000:
_frameSize = 2048;
break;
case 24000:
_frameSize = 4096;
break;
case 44100:
_frameSize = _fixedFrameSize;
xaviliz marked this conversation as resolved.
Show resolved Hide resolved
break;
case 48000:
_frameSize = _fixedFrameSize;
break;
default:
_frameSize = _fixedFrameSize;
}

_applyTimeCompensation = parameter("applyTimeCompensation").toBool();

if (_frameSize > _sampleRate * 0.5)
{
throw EssentiaException("Sax2Pitch: Frame size cannot be higher than Nyquist frequency");
xaviliz marked this conversation as resolved.
Show resolved Hide resolved
xaviliz marked this conversation as resolved.
Show resolved Hide resolved
}

_lowpass->configure(INHERIT("sampleRate"),
"cutoffFrequency", 1000);
_framebuffer->configure("bufferSize", _frameSize);
_audio2pitch->configure(INHERIT("sampleRate"),
"frameSize", _frameSize,
"pitchAlgorithm", _pitchAlgorithm,
"minFrequency", _minFrequency,
"maxFrequency", _maxFrequency,
INHERIT("pitchConfidenceThreshold"),
INHERIT("loudnessThreshold"));

_pitch2midi->configure(INHERIT("sampleRate"),
INHERIT("hopSize"),
INHERIT("minOccurrenceRate"),
INHERIT("applyTimeCompensation"),
"minOnsetCheckPeriod", _minOnsetCheckPeriod,
"minOffsetCheckPeriod", _minOffsetCheckPeriod,
"minNoteChangePeriod", _minNoteChangePeriod,
"midiBufferDuration", _midiBufferDuration,
"minFrequency", _minFrequency,
"tuningFrequency", _tuningFrequency,
"transpositionAmount", _transposition);
}

void Audio2Midi::compute()
{
// get ref to input
const std::vector<Real> &frame = _frame.get();
Real& pitch = _pitch.get();
Real& loudness = _loudness.get();
vector<string>& messageType = _messageType.get();
vector<Real>& midiNoteNumber = _midiNoteNumber.get();
vector<Real>& timeCompensation = _timeCompensation.get();

_lowpass->input("signal").set(frame);
_lowpass->output("signal").set(lpFrame);

_framebuffer->input("frame").set(lpFrame);
_framebuffer->output("frame").set(analysisFrame);

_audio2pitch->input("frame").set(analysisFrame);
_audio2pitch->output("pitch").set(pitch);
_audio2pitch->output("pitchConfidence").set(pitchConfidence);
_audio2pitch->output("loudness").set(loudness);
_audio2pitch->output("voiced").set(voiced);

_pitch2midi->input("pitch").set(pitch);
_pitch2midi->input("voiced").set(voiced);
_pitch2midi->output("midiNoteNumber").set(midiNoteNumber);
_pitch2midi->output("timeCompensation").set(timeCompensation);
_pitch2midi->output("messageType").set(messageType);

// E_INFO("\nsax2midi: algorithm inputs and outputs set");
_lowpass->compute();
// E_INFO("sax2midi: lp compute");
_framebuffer->compute();
// E_INFO("sax2midi: framebuffer compute");
// std::cout << "frame: \n" << frame << "\nanalysisFrame: \n" << analysisFrame << "\n";
_audio2pitch->compute();
// E_INFO("sax2midi: a2p compute");
_pitch2midi->compute();
// E_INFO("sax2midi: p2m compute");

// TODO: assign outputs

// set outputs
// get pitchMessage from log_message_formatter
/*pitchMessage = _formatter->pitch_loudness(midiNoteNumber, pitch, pitchConfidence, loudness);

switch (messageType)
{
case 0:
noteOffMessage = _formatter->note_off(midiNoteNumber, offsetTimeCompensation);
break;
case 1:
noteOnMessage = _formatter->note_on(midiNoteNumber, pitch, pitchConfidence, onsetTimeCompensation);
break;
case 2:
noteOffMessage = _formatter->note_off(previousMidiNoteNumber, offsetTimeCompensation);
noteOnMessage = _formatter->note_on(midiNoteNumber, pitch, pitchConfidence, onsetTimeCompensation);
break;
default:
noteOnMessage = "";
noteOffMessage = "";
break;
}*/
xaviliz marked this conversation as resolved.
Show resolved Hide resolved

// E_INFO("sax2midi compute is done");
}
103 changes: 103 additions & 0 deletions src/algorithms/tonal/audio2midi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#ifndef ESSENTIA_AUDIO2MIDI_H
#define ESSENTIA_AUDIO2MIDI_H

#include "algorithmfactory.h"

namespace essentia {
namespace standard {

class Audio2Midi : public Algorithm {
protected:
Input<std::vector<Real>> _frame;
Output<Real> _pitch;
Output<Real> _loudness;
Output<std::vector<std::string> > _messageType;
Output<std::vector<Real> > _midiNoteNumber;
Output<std::vector<Real> > _timeCompensation;

Algorithm* _lowpass;
Algorithm* _framebuffer;
Algorithm* _audio2pitch;
Algorithm* _pitch2midi;

Real _sampleRate;
int _frameSize;
int _fixedFrameSize = 8192;
int _hopSize;
std::string _pitchAlgorithm = "pitchyinfft";
std::string _loudnessAlgorithm = "rms";
Real _minFrequency;
Real _maxFrequency;
int _tuningFrequency;
Real _pitchConfidenceThreshold, _loudnessThreshold, _minOccurrenceRate;
Real _midiBufferDuration;
Real _minNoteChangePeriod;
Real _minOnsetCheckPeriod;
Real _minOffsetCheckPeriod;

bool _applyTimeCompensation;
int _transposition;

// Containers
std::vector<Real> lpFrame, analysisFrame;
Real pitch, pitchConfidence, loudness;
std::vector<Real> midiNoteNumber, timeCompensation;
std::vector<std::string> messageType;
Real onsetTimeCompensation, offsetTimeCompensation;

int voiced;

public:
Audio2Midi() {
declareInput(_frame, "frame", "the input frame to analyse");
declareOutput(_pitch, "pitch", "pitch given in Hz");
declareOutput(_loudness, "loudness", "detected loudness in decibels");
declareOutput(_messageType, "messageType", "the output of MIDI message type, as string, {noteoff, noteon, noteoff-noteon}");
declareOutput(_midiNoteNumber, "midiNoteNumber", "the output of detected MIDI note number, as integer, in range [0,127]");
declareOutput(_timeCompensation, "timeCompensation", "time to be compensated in the messages");

_lowpass = AlgorithmFactory::create("LowPass");
_framebuffer = AlgorithmFactory::create("FrameBuffer");
_audio2pitch = AlgorithmFactory::create("Audio2Pitch");
_pitch2midi = AlgorithmFactory::create("Pitch2Midi");
}

~Audio2Midi() {
delete _lowpass;
delete _framebuffer;
delete _audio2pitch;
delete _pitch2midi;
}

void declareParameters() {
declareParameter("sampleRate", "sample rate of incoming audio frames", "[8000,inf)", 44100);
declareParameter("hopSize", "equivalent to I/O buffer size", "[1,inf)", 32);
// declareParameter("pitchAlgorithm", "pitch algorithm to use", "{pyin,pyin_fft}", "pyin_fft");
// declareParameter("loudnessAlgorithm", "loudness algorithm to use", "{loudness,rms}", "rms");
declareParameter("minFrequency", "minimum frequency to detect in Hz", "[10,20000]", 60.0);
declareParameter("maxFrequency", "maximum frequency to detect in Hz", "[10,20000]", 2300.0);
declareParameter("tuningFrequency", "tuning frequency for semitone index calculation, corresponding to A3 [Hz]", "{432,440}", 440);
declareParameter("pitchConfidenceThreshold", "level of pitch confidence above which note ON/OFF start to be considered", "[0,1]", 0.25);
declareParameter("loudnessThreshold", "loudness level above/below which note ON/OFF start to be considered, in decibels", "[-inf,0]", -51.0);
declareParameter("transpositionAmount", "Apply transposition (in semitones) to the detected MIDI notes.", "(-69,50)", 0);
declareParameter("minOccurrenceRate", "rate of predominant pitch occurrence in MidiPool buffer to consider note ON event", "[0,1]", 0.5);
declareParameter("midiBufferDuration", "duration in seconds of buffer used for voting in MidiPool algorithm", "[0.005,0.5]", 0.05); // 15ms
declareParameter("minNoteChangePeriod", "minimum time to wait until a note change is detected (testing only)", "(0,1]", 0.030);
declareParameter("minOnsetCheckPeriod", "minimum time to wait until an onset is detected (testing only)", "(0,1]", 0.075);
declareParameter("minOffsetCheckPeriod", "minimum time to wait until an offset is detected (testing only)", "(0,1]", 0.2);
declareParameter("applyTimeCompensation", "whether to apply time compensation correction to MIDI note detection", "{true,false}", true);
}

void configure();
void compute();

static const char* name;
static const char* category;
static const char* description;
};


} // namespace standard
} // namespace essentia

#endif
17 changes: 8 additions & 9 deletions src/algorithms/tonal/pitch2midi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ void Pitch2Midi::configure()
_sampleRate = parameter("sampleRate").toReal();
_hopSize = parameter("hopSize").toInt();
_minFrequency = parameter("minFrequency").toReal();
_minOcurrenceRate = parameter("minOcurrenceRate").toReal();
_minOccurrenceRate = parameter("minOccurrenceRate").toReal();
_bufferDuration = parameter("midiBufferDuration").toReal();
_minOnsetCheckPeriod = parameter("minOnsetCheckPeriod").toReal();
_minOffsetCheckPeriod = parameter("minOffsetCheckPeriod").toReal();
Expand All @@ -33,8 +33,8 @@ void Pitch2Midi::configure()
_offsetCheckCounter = 0;
_onsetCheckCounter = 0;

_minOcurrenceRatePeriod = _minOcurrenceRate * _bufferDuration;
_minOcurrenceRateThreshold = _minOcurrenceRatePeriod / _frameTime;
_minOccurrenceRatePeriod = _minOccurrenceRate * _bufferDuration;
_minOccurrenceRateThreshold = _minOccurrenceRatePeriod / _frameTime;

// estimate buffer capacity
int c = static_cast<int>( round( _sampleRate / float(_hopSize) * _bufferDuration ) );
Expand Down Expand Up @@ -151,7 +151,6 @@ void Pitch2Midi::compute()
_noteOff = true;
updateDnote();
setOutputs(dnote, 0.0, _minNoteChangePeriod);
//E_INFO("offset(unvoiced frame)");
_unvoicedFrameCounter = 0;
_offsetCheckCounter = 0;
_onsetCheckCounter = 0;
Expand Down Expand Up @@ -220,27 +219,27 @@ void Pitch2Midi::compute()
if (!hasCoherence() && _NOTED_ON) {
if (_maxVoted[0] != 0.0) {
_onsetCheckCounter++;
// combines checker with minOcurrenceRate
if ((_onsetCheckCounter > _minOcurrenceRateThreshold)){
// combines checker with minOccurrenceRate
if ((_onsetCheckCounter > _minOccurrenceRateThreshold)){
_NOTED_ON = true;
if (note != _maxVoted[0]){ // avoid note slicing effect
_noteOff = true;
_noteOn = true;
updateDnote();
note = _maxVoted[0];
}
//E_INFO("off-onset(" << _maxVoted[0] << ", uncoherent & NOTED): " << _onsetCheckCounter << " - " << _minOcurrenceRateThreshold);
//E_INFO("off-onset(" << _maxVoted[0] << ", uncoherent & NOTED): " << _onsetCheckCounter << " - " << _minOccurrenceRateThreshold);
_offsetCheckCounter = 0;
_onsetCheckCounter = 0;
}
}
// output the max-voted midi note to avoid unestable midi note numbers
setOutputs(_maxVoted[0], _minOcurrenceRatePeriod, _minOcurrenceRatePeriod);
setOutputs(_maxVoted[0], _minOccurrenceRatePeriod, _minOccurrenceRatePeriod);
return;
}

if (!hasCoherence() && !_NOTED_ON) {
if (_maxVoted[1] > _minOcurrenceRate) {
if (_maxVoted[1] > _minOccurrenceRate) {
_onsetCheckCounter++;

if (_onsetCheckCounter > _minOnsetCheckThreshold) {
Expand Down
8 changes: 4 additions & 4 deletions src/algorithms/tonal/pitch2midi.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace standard {
Real _sampleRate;
int _hopSize;
Real _minFrequency;
Real _minOcurrenceRate;
Real _minOccurrenceRate;
Real _minOnsetCheckPeriod;
Real _minOffsetCheckPeriod;
Real _minNoteChangePeriod;
Expand Down Expand Up @@ -66,8 +66,8 @@ namespace standard {
int _onsetCheckCounter;

Real _frameTime;
Real _minOcurrenceRateThreshold;
Real _minOcurrenceRatePeriod;
Real _minOccurrenceRateThreshold;
Real _minOccurrenceRatePeriod;

// former Pitch2Midi outputs, now interal vars
Real _midiNoteNumberTransposed;
Expand All @@ -89,7 +89,7 @@ namespace standard {
declareParameter("sampleRate", "Audio sample rate", "[8000,inf)", 44100);
declareParameter("hopSize", "Pitch Detection analysis hop size in samples, equivalent to I/O buffer size", "[1,inf)", 128);
declareParameter("minFrequency", "minimum detectable frequency", "[20,20000]", 60.0);
declareParameter("minOcurrenceRate", "minimum number of times a midi note has to ocur compared to total capacity", "[0,1]", 0.5);
declareParameter("minOccurrenceRate", "minimum number of times a midi note has to ocur compared to total capacity", "[0,1]", 0.5);
declareParameter("midiBufferDuration", "duration in seconds of buffer used for voting in the note toggle detection algorithm", "[0.005,0.5]", 0.015); // 15ms
declareParameter("minNoteChangePeriod", "minimum time to wait until a note change is detected (s)", "(0,1]", 0.030);
declareParameter("minOnsetCheckPeriod", "minimum time to wait until an onset is detected (s)", "(0,1]", 0.075);
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading
Loading