From 0c92e5d122127964ae42e447c18038f76bc7d2e6 Mon Sep 17 00:00:00 2001 From: Justin Salamon Date: Tue, 12 Apr 2016 20:23:18 -0400 Subject: [PATCH] Implement offset_precision_recall_f1 and tests --- mir_eval/transcription.py | 66 +++++++++++++++++++++++++++++++++++++ tests/test_transcription.py | 22 +++++++++++++ 2 files changed, 88 insertions(+) diff --git a/mir_eval/transcription.py b/mir_eval/transcription.py index cfeee2f1..8a204518 100644 --- a/mir_eval/transcription.py +++ b/mir_eval/transcription.py @@ -582,6 +582,72 @@ def onset_precision_recall_f1(ref_intervals, est_intervals, return onset_precision, onset_recall, onset_f_measure +def offset_precision_recall_f1(ref_intervals, est_intervals, offset_ratio=0.2, + offset_min_tolerance=0.05, strict=False): + """Compute the Precision, Recall and F-measure of note offsets: an + estimated offset is considered correct if it is within +-50ms (or 20% of + the ref note duration, which ever is greater) of a ref offset. Note + that this metric completely ignores note onsets and note pitch. This means + an estimated offset will be considered correct if it matches a + reference offset, even if the offsetes come from notes with completely + different pitches (i.e. notes that would not match with `match_notes`). + + + Examples + -------- + >>> ref_intervals, _ = mir_eval.io.load_valued_intervals( + ... 'reference.txt') + >>> est_intervals, _ = mir_eval.io.load_valued_intervals( + ... 'estimated.txt') + >>> (offset_precision, + ... offset_recall, + ... offset_f_measure) = mir_eval.transcription.offset_precision_recall_f1( + ... ref_intervals, est_intervals) + + Parameters + ---------- + ref_intervals : np.ndarray, shape=(n,2) + Array of reference notes time intervals (onset and offset times) + est_intervals : np.ndarray, shape=(m,2) + Array of estimated notes time intervals (onset and offset times) + offset_ratio: float > 0 or None + The ratio of the reference note's duration used to define the + offset_tolerance. Default is 0.2 (20%), meaning the offset_tolerance + will equal the ref_duration * 0.2, or min_offset_tolerance (0.05 by + default, i.e. 50 ms), whichever is greater. + offset_min_tolerance: float > 0 + The minimum tolerance for offset matching. See offset_ratio description + for an explanation of how the offset tolerance is determined. + strict: bool + If ``strict=False`` (the default), threshold checks for onset matching + are performed using ``<=`` (less than or equal). If ``strict=True``, + the threshold checks are performed using ``<`` (less than). + + Returns + ------- + precision : float + The computed precision score + recall : float + The computed recall score + f_measure : float + The computed F-measure score + """ + validate_intervals(ref_intervals, est_intervals) + # When reference notes are empty, metrics are undefined, return 0's + if len(ref_intervals) == 0 or len(est_intervals) == 0: + return 0., 0., 0. + + matching = match_offsets(ref_intervals, est_intervals, + offset_ratio=offset_ratio, + offset_min_tolerance=offset_min_tolerance, + strict=strict) + + offset_precision = float(len(matching))/len(est_intervals) + offset_recall = float(len(matching))/len(ref_intervals) + offset_f_measure = util.f_measure(offset_precision, offset_recall) + return offset_precision, offset_recall, offset_f_measure + + def evaluate(ref_intervals, ref_pitches, est_intervals, est_pitches, **kwargs): """Compute all metrics for the given reference and estimated annotations. diff --git a/tests/test_transcription.py b/tests/test_transcription.py index 8ad9c8c0..2893e11b 100644 --- a/tests/test_transcription.py +++ b/tests/test_transcription.py @@ -42,6 +42,12 @@ "Onset_F-measure": 0.8888888888888889, } +OFFSET_SCORES = { + "Offset_Precision": 0.6, + "Offset_Recall": 0.75, + "Offset_F-measure": 0.6666666666666665, +} + def test_match_offsets(): @@ -164,6 +170,22 @@ def test_onset_precision_recall_f1(): assert np.allclose(scores_exp, scores_gen, atol=A_TOL) +def test_offset_precision_recall_f1(): + + # load test data + ref_int = REF[:, :2] + est_int = EST[:, :2] + + precision, recall, f_measure = ( + mir_eval.transcription.offset_precision_recall_f1(ref_int, est_int)) + + scores_gen = np.array([precision, recall, f_measure]) + scores_exp = np.array([OFFSET_SCORES['Offset_Precision'], + OFFSET_SCORES['Offset_Recall'], + OFFSET_SCORES['Offset_F-measure']]) + assert np.allclose(scores_exp, scores_gen, atol=A_TOL) + + def test_regression(): # Regression tests