Skip to content

Commit

Permalink
Merge pull request #1437 from xaviliz/add-new-algo-audio2midi
Browse files Browse the repository at this point in the history
Add new algo audio2midi
  • Loading branch information
dbogdanov authored Jan 9, 2025
2 parents eaf8ddf + 5b9499d commit b8c49d0
Show file tree
Hide file tree
Showing 9 changed files with 550 additions and 14 deletions.
97 changes: 97 additions & 0 deletions src/algorithms/tonal/audio2midi.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#include "audio2midi.h"

using namespace std;
using namespace essentia;
using namespace standard;

const char *Audio2Midi::name = "Audio2Midi";
const char *Audio2Midi::category = "Pitch";
const char *Audio2Midi::description = DOC("Wrapper around Audio2Pitch and Pitch2Midi for real time application. This algorithm has a state that is used to estimate note on/off events based on consequent compute() calls.");

void Audio2Midi::configure()
{
_sampleRate = parameter("sampleRate").toReal();
_hopSize = parameter("hopSize").toInt();
_minFrequency = parameter("minFrequency").toReal();
_maxFrequency = parameter("maxFrequency").toReal();
_tuningFrequency = parameter("tuningFrequency").toInt();
_pitchConfidenceThreshold = parameter("pitchConfidenceThreshold").toReal();
_loudnessThreshold = parameter("loudnessThreshold").toReal();
_transposition = parameter("transpositionAmount").toInt();
_minOccurrenceRate = parameter("minOccurrenceRate").toReal();
_midiBufferDuration = parameter("midiBufferDuration").toReal();
_minNoteChangePeriod = parameter("minNoteChangePeriod").toReal();
_minOnsetCheckPeriod = parameter("minOnsetCheckPeriod").toReal();
_minOffsetCheckPeriod = parameter("minOffsetCheckPeriod").toReal();

// define frameSize depending on sampleRate
if (static_cast<int>(_sampleRate) <= 16000){
_frameSize = 2048;
}
else if (static_cast<int>(_sampleRate) <= 24000){
_frameSize = 4096;
}
else {
_frameSize = 8192;
}

_applyTimeCompensation = parameter("applyTimeCompensation").toBool();

_lowpass->configure(INHERIT("sampleRate"),
"cutoffFrequency", 1000);
_framebuffer->configure("bufferSize", _frameSize);
_audio2pitch->configure(INHERIT("sampleRate"),
"frameSize", _frameSize,
"pitchAlgorithm", _pitchAlgorithm,
"minFrequency", _minFrequency,
"maxFrequency", _maxFrequency,
INHERIT("pitchConfidenceThreshold"),
INHERIT("loudnessThreshold"));

_pitch2midi->configure(INHERIT("sampleRate"),
INHERIT("hopSize"),
INHERIT("minOccurrenceRate"),
INHERIT("applyTimeCompensation"),
"minOnsetCheckPeriod", _minOnsetCheckPeriod,
"minOffsetCheckPeriod", _minOffsetCheckPeriod,
"minNoteChangePeriod", _minNoteChangePeriod,
"midiBufferDuration", _midiBufferDuration,
"minFrequency", _minFrequency,
"tuningFrequency", _tuningFrequency,
"transpositionAmount", _transposition);
}

void Audio2Midi::compute()
{
// get ref to input
const std::vector<Real> &frame = _frame.get();
Real& pitch = _pitch.get();
Real& loudness = _loudness.get();
vector<string>& messageType = _messageType.get();
vector<Real>& midiNoteNumber = _midiNoteNumber.get();
vector<Real>& timeCompensation = _timeCompensation.get();

_lowpass->input("signal").set(frame);
_lowpass->output("signal").set(lpFrame);

_framebuffer->input("frame").set(lpFrame);
_framebuffer->output("frame").set(analysisFrame);

_audio2pitch->input("frame").set(analysisFrame);
_audio2pitch->output("pitch").set(pitch);
_audio2pitch->output("pitchConfidence").set(pitchConfidence);
_audio2pitch->output("loudness").set(loudness);
_audio2pitch->output("voiced").set(voiced);

_pitch2midi->input("pitch").set(pitch);
_pitch2midi->input("voiced").set(voiced);
_pitch2midi->output("midiNoteNumber").set(midiNoteNumber);
_pitch2midi->output("timeCompensation").set(timeCompensation);
_pitch2midi->output("messageType").set(messageType);

_lowpass->compute();
_framebuffer->compute();
_audio2pitch->compute();
_pitch2midi->compute();

}
100 changes: 100 additions & 0 deletions src/algorithms/tonal/audio2midi.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#ifndef ESSENTIA_AUDIO2MIDI_H
#define ESSENTIA_AUDIO2MIDI_H

#include "algorithmfactory.h"

namespace essentia {
namespace standard {

class Audio2Midi : public Algorithm {
protected:
Input<std::vector<Real>> _frame;
Output<Real> _pitch;
Output<Real> _loudness;
Output<std::vector<std::string> > _messageType;
Output<std::vector<Real> > _midiNoteNumber;
Output<std::vector<Real> > _timeCompensation;

Algorithm* _lowpass;
Algorithm* _framebuffer;
Algorithm* _audio2pitch;
Algorithm* _pitch2midi;

Real _sampleRate;
int _frameSize;
int _hopSize;
std::string _pitchAlgorithm = "pitchyinfft";
std::string _loudnessAlgorithm = "rms";
Real _minFrequency;
Real _maxFrequency;
int _tuningFrequency;
Real _pitchConfidenceThreshold, _loudnessThreshold, _minOccurrenceRate;
Real _midiBufferDuration;
Real _minNoteChangePeriod;
Real _minOnsetCheckPeriod;
Real _minOffsetCheckPeriod;

bool _applyTimeCompensation;
int _transposition;

// Containers
std::vector<Real> lpFrame, analysisFrame;
Real pitch, pitchConfidence, loudness;
std::vector<Real> midiNoteNumber, timeCompensation;
std::vector<std::string> messageType;
Real onsetTimeCompensation, offsetTimeCompensation;

int voiced;

public:
Audio2Midi() {
declareInput(_frame, "frame", "the input frame to analyse");
declareOutput(_pitch, "pitch", "pitch given in Hz");
declareOutput(_loudness, "loudness", "detected loudness in decibels");
declareOutput(_messageType, "messageType", "the output of MIDI message type, as string, {noteoff, noteon, noteoff-noteon}");
declareOutput(_midiNoteNumber, "midiNoteNumber", "the output of detected MIDI note number, as integer, in range [0,127]");
declareOutput(_timeCompensation, "timeCompensation", "time to be compensated in the messages");

_lowpass = AlgorithmFactory::create("LowPass");
_framebuffer = AlgorithmFactory::create("FrameBuffer");
_audio2pitch = AlgorithmFactory::create("Audio2Pitch");
_pitch2midi = AlgorithmFactory::create("Pitch2Midi");
}

~Audio2Midi() {
delete _lowpass;
delete _framebuffer;
delete _audio2pitch;
delete _pitch2midi;
}

void declareParameters() {
declareParameter("sampleRate", "sample rate of incoming audio frames", "[8000,inf)", 44100);
declareParameter("hopSize", "equivalent to I/O buffer size", "[1,inf)", 32);
declareParameter("minFrequency", "minimum frequency to detect in Hz", "[10,20000]", 60.0);
declareParameter("maxFrequency", "maximum frequency to detect in Hz", "[10,20000]", 2300.0);
declareParameter("tuningFrequency", "tuning frequency for semitone index calculation, corresponding to A3 [Hz]", "{432,440}", 440);
declareParameter("pitchConfidenceThreshold", "level of pitch confidence above which note ON/OFF start to be considered", "[0,1]", 0.25);
declareParameter("loudnessThreshold", "loudness level above/below which note ON/OFF start to be considered, in decibels", "[-inf,0]", -51.0);
declareParameter("transpositionAmount", "Apply transposition (in semitones) to the detected MIDI notes.", "(-69,50)", 0);
declareParameter("minOccurrenceRate", "rate of predominant pitch occurrence in MidiPool buffer to consider note ON event", "[0,1]", 0.5);
declareParameter("midiBufferDuration", "duration in seconds of buffer used for voting in MidiPool algorithm", "[0.005,0.5]", 0.05); // 15ms
declareParameter("minNoteChangePeriod", "minimum time to wait until a note change is detected (testing only)", "(0,1]", 0.030);
declareParameter("minOnsetCheckPeriod", "minimum time to wait until an onset is detected (testing only)", "(0,1]", 0.075);
declareParameter("minOffsetCheckPeriod", "minimum time to wait until an offset is detected (testing only)", "(0,1]", 0.2);
declareParameter("applyTimeCompensation", "whether to apply time compensation correction to MIDI note detection", "{true,false}", true);
}

void configure();
void compute();

static const char* name;
static const char* category;
static const char* description;
};


} // namespace standard
} // namespace essentia

#endif
17 changes: 8 additions & 9 deletions src/algorithms/tonal/pitch2midi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ void Pitch2Midi::configure()
_sampleRate = parameter("sampleRate").toReal();
_hopSize = parameter("hopSize").toInt();
_minFrequency = parameter("minFrequency").toReal();
_minOcurrenceRate = parameter("minOcurrenceRate").toReal();
_minOccurrenceRate = parameter("minOccurrenceRate").toReal();
_bufferDuration = parameter("midiBufferDuration").toReal();
_minOnsetCheckPeriod = parameter("minOnsetCheckPeriod").toReal();
_minOffsetCheckPeriod = parameter("minOffsetCheckPeriod").toReal();
Expand All @@ -33,8 +33,8 @@ void Pitch2Midi::configure()
_offsetCheckCounter = 0;
_onsetCheckCounter = 0;

_minOcurrenceRatePeriod = _minOcurrenceRate * _bufferDuration;
_minOcurrenceRateThreshold = _minOcurrenceRatePeriod / _frameTime;
_minOccurrenceRatePeriod = _minOccurrenceRate * _bufferDuration;
_minOccurrenceRateThreshold = _minOccurrenceRatePeriod / _frameTime;

// estimate buffer capacity
int c = static_cast<int>( round( _sampleRate / float(_hopSize) * _bufferDuration ) );
Expand Down Expand Up @@ -151,7 +151,6 @@ void Pitch2Midi::compute()
_noteOff = true;
updateDnote();
setOutputs(dnote, 0.0, _minNoteChangePeriod);
//E_INFO("offset(unvoiced frame)");
_unvoicedFrameCounter = 0;
_offsetCheckCounter = 0;
_onsetCheckCounter = 0;
Expand Down Expand Up @@ -220,27 +219,27 @@ void Pitch2Midi::compute()
if (!hasCoherence() && _NOTED_ON) {
if (_maxVoted[0] != 0.0) {
_onsetCheckCounter++;
// combines checker with minOcurrenceRate
if ((_onsetCheckCounter > _minOcurrenceRateThreshold)){
// combines checker with minOccurrenceRate
if ((_onsetCheckCounter > _minOccurrenceRateThreshold)){
_NOTED_ON = true;
if (note != _maxVoted[0]){ // avoid note slicing effect
_noteOff = true;
_noteOn = true;
updateDnote();
note = _maxVoted[0];
}
//E_INFO("off-onset(" << _maxVoted[0] << ", uncoherent & NOTED): " << _onsetCheckCounter << " - " << _minOcurrenceRateThreshold);
//E_INFO("off-onset(" << _maxVoted[0] << ", uncoherent & NOTED): " << _onsetCheckCounter << " - " << _minOccurrenceRateThreshold);
_offsetCheckCounter = 0;
_onsetCheckCounter = 0;
}
}
// output the max-voted midi note to avoid unestable midi note numbers
setOutputs(_maxVoted[0], _minOcurrenceRatePeriod, _minOcurrenceRatePeriod);
setOutputs(_maxVoted[0], _minOccurrenceRatePeriod, _minOccurrenceRatePeriod);
return;
}

if (!hasCoherence() && !_NOTED_ON) {
if (_maxVoted[1] > _minOcurrenceRate) {
if (_maxVoted[1] > _minOccurrenceRate) {
_onsetCheckCounter++;

if (_onsetCheckCounter > _minOnsetCheckThreshold) {
Expand Down
8 changes: 4 additions & 4 deletions src/algorithms/tonal/pitch2midi.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace standard {
Real _sampleRate;
int _hopSize;
Real _minFrequency;
Real _minOcurrenceRate;
Real _minOccurrenceRate;
Real _minOnsetCheckPeriod;
Real _minOffsetCheckPeriod;
Real _minNoteChangePeriod;
Expand Down Expand Up @@ -66,8 +66,8 @@ namespace standard {
int _onsetCheckCounter;

Real _frameTime;
Real _minOcurrenceRateThreshold;
Real _minOcurrenceRatePeriod;
Real _minOccurrenceRateThreshold;
Real _minOccurrenceRatePeriod;

// former Pitch2Midi outputs, now interal vars
Real _midiNoteNumberTransposed;
Expand All @@ -89,7 +89,7 @@ namespace standard {
declareParameter("sampleRate", "Audio sample rate", "[8000,inf)", 44100);
declareParameter("hopSize", "Pitch Detection analysis hop size in samples, equivalent to I/O buffer size", "[1,inf)", 128);
declareParameter("minFrequency", "minimum detectable frequency", "[20,20000]", 60.0);
declareParameter("minOcurrenceRate", "minimum number of times a midi note has to ocur compared to total capacity", "[0,1]", 0.5);
declareParameter("minOccurrenceRate", "minimum number of times a midi note has to ocur compared to total capacity", "[0,1]", 0.5);
declareParameter("midiBufferDuration", "duration in seconds of buffer used for voting in the note toggle detection algorithm", "[0.005,0.5]", 0.015); // 15ms
declareParameter("minNoteChangePeriod", "minimum time to wait until a note change is detected (s)", "(0,1]", 0.030);
declareParameter("minOnsetCheckPeriod", "minimum time to wait until an onset is detected (s)", "(0,1]", 0.075);
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading

0 comments on commit b8c49d0

Please sign in to comment.