From 28a754e2246e780674af24e79d6ada2bf1ed78d4 Mon Sep 17 00:00:00 2001 From: Humair Raj Khan Date: Sun, 21 Nov 2021 14:46:33 +0000 Subject: [PATCH 01/13] Modify score to return tuple for sedeval --- heareval/score.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/heareval/score.py b/heareval/score.py index 39010d9b..558ea980 100644 --- a/heareval/score.py +++ b/heareval/score.py @@ -2,7 +2,8 @@ Common utils for scoring. """ from functools import partial -from typing import Any, Callable, Dict, List, Optional +from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from collections import ChainMap import numpy as np import pandas as pd @@ -142,20 +143,21 @@ class SoundEventScore(ScoreFunction): def __init__( self, label_to_idx: Dict[str, int], - score: str, + scores: Tuple[str], params: Dict = None, name: Optional[str] = None, maximize: bool = True, ): """ - :param score: Score to use, from the list of overall SED eval scores. + :param scores: Scores to use, from the list of overall SED eval scores. + The first score in the tuple will be the primary score for this metric :param params: Parameters to pass to the scoring function, see inheriting children for details. """ if params is None: params = {} super().__init__(label_to_idx=label_to_idx, name=name, maximize=maximize) - self.score = score + self.scores = scores self.params = params assert self.score_class is not None @@ -176,10 +178,18 @@ def __call__(self, predictions: Dict, targets: Dict, **kwargs): estimated_event_list=estimated_event_list.filter(filename=filename), ) - # This (and segment_based_scores) return a pretty large selection of scores. - overall_scores = scores.results_overall_metrics() - # Keep the specific score we want - return overall_scores[self.score][self.score] + # results_overall_metrics return a pretty large nested selection of scores, + # with dicts of scores keyed on the type of scores, like f_measure, error_rate, + # accuracy + nested_overall_scores: Dict[ + str, Dict[str, float] + ] = scores.results_overall_metrics() + # Open up nested overall scores + overall_scores: Dict[str, Union[float, int]] = dict( + ChainMap(*nested_overall_scores.values()) + ) + # Return the required scores as tuples + return tuple([(score, overall_scores[score]) for score in self.scores]) @staticmethod def sed_eval_event_container( From 1bc9686488f69289560d48ba65ab22e281ae7069 Mon Sep 17 00:00:00 2001 From: Humair Raj Khan Date: Sun, 21 Nov 2021 14:47:22 +0000 Subject: [PATCH 02/13] Modify the actual score functions --- heareval/score.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/heareval/score.py b/heareval/score.py index 558ea980..50ef253a 100644 --- a/heareval/score.py +++ b/heareval/score.py @@ -319,19 +319,20 @@ def __call__(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> fl "event_onset_200ms_fms": partial( EventBasedScore, name="event_onset_200ms_fms", - score="f_measure", + # If first score will be used as the primary score for this metric + scores=("f_measure", "precision", "recall"), params={"evaluate_onset": True, "evaluate_offset": False, "t_collar": 0.2}, ), "event_onset_50ms_fms": partial( EventBasedScore, name="event_onset_50ms_fms", - score="f_measure", + scores=("f_measure", "precision", "recall"), params={"evaluate_onset": True, "evaluate_offset": False, "t_collar": 0.05}, ), "event_onset_offset_50ms_20perc_fms": partial( EventBasedScore, name="event_onset_offset_50ms_20perc_fms", - score="f_measure", + scores=("f_measure", "precision", "recall"), params={ "evaluate_onset": True, "evaluate_offset": True, @@ -342,7 +343,7 @@ def __call__(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> fl "segment_1s_er": partial( SegmentBasedScore, name="segment_1s_er", - score="error_rate", + scores=("error_rate"), params={"time_resolution": 1.0}, maximize=False, ), From 98e52dc251378b4b79e83933aa5dd78a4a570630 Mon Sep 17 00:00:00 2001 From: Humair Raj Khan Date: Sun, 21 Nov 2021 14:55:22 +0000 Subject: [PATCH 03/13] Change the predictions to handle tuple returns from metric --- heareval/predictions/task_predictions.py | 84 +++++++++++++++++------- 1 file changed, 62 insertions(+), 22 deletions(-) diff --git a/heareval/predictions/task_predictions.py b/heareval/predictions/task_predictions.py index fb37eae0..49cfd755 100755 --- a/heareval/predictions/task_predictions.py +++ b/heareval/predictions/task_predictions.py @@ -253,6 +253,48 @@ def validation_step(self, batch, batch_idx): def test_step(self, batch, batch_idx): return self._step(batch, batch_idx) + def log_scores(self, name: str, score_args: Tuple[Any]): + """Logs the metric score value for each score defined for the model""" + assert hasattr(self, "scores"), "Scores for the model should be defined" + end_scores: Dict[str, Union[int, float]] = {} + for score in self.scores: + score_ret = score(*score_args) + # If the returned score is a tuple, store each subscore as separate entry + if isinstance(score_ret, tuple): + assert all( + type(s) == tuple + and type(s[0]) == str + and type(s[1]) in [float, int] + for s in score_ret + ), ( + "If the return type of the score is a tuple, all the elements " + "in the tuple should be tuple of type tuple(str, (float, int))" + ) + # The first score in the returned tuple will be used + # as primary score for this metric + end_scores[f"{name}_{score}"] = score_ret[0][1] + # All other scores will also be logged + for (subscore, value) in score_ret: + end_scores[f"{name}_{score}_{subscore}"] = value + elif isinstance(score_ret, (float, int)): + end_scores[f"{name}_{score}"] = score_ret + else: + raise ValueError( + f"Return type {type(score_ret)} is unexpected. Return type of " + "the score function should either be a " + "tuple(tuple(str, (float, int))) or float or int. " + ) + + # Weird, scores can be nan if precision has zero guesses + for score_name in end_scores: + end_scores[score_name] = 0.0 if math.isnan(value) else value + + self.log( + f"{name}_score", end_scores[f"{name}_{str(self.scores[0])}"], logger=True + ) + for score_name in end_scores: + self.log(score_name, end_scores[score_name], prog_bar=True, logger=True) + # Implement this for each inheriting class # TODO: Can we combine the boilerplate for both of these? def _score_epoch_end(self, name: str, outputs: List[Dict[str, List[Any]]]): @@ -335,15 +377,13 @@ def _score_epoch_end(self, name: str, outputs: List[Dict[str, List[Any]]]): # Cache all predictions for later serialization self.test_predicted_labels = prediction - for score in self.scores: - end_scores[f"{name}_{score}"] = score( - prediction.detach().cpu().numpy(), target.detach().cpu().numpy() - ) - self.log( - f"{name}_score", end_scores[f"{name}_{str(self.scores[0])}"], logger=True + self.log_scores( + name, + score_args=( + prediction.detach().cpu().numpy(), + target.detach().cpu().numpy(), + ), ) - for score_name in end_scores: - self.log(score_name, end_scores[score_name], prog_bar=True, logger=True) class EventPredictionModel(AbstractPredictionModel): @@ -424,11 +464,23 @@ def _score_epoch_end(self, name: str, outputs: List[Dict[str, List[Any]]]): for postprocessing in tqdm(predicted_events_by_postprocessing): predicted_events = predicted_events_by_postprocessing[postprocessing] primary_score_fn = self.scores[0] - primary_score = primary_score_fn( + primary_score_ret = primary_score_fn( # predicted_events, self.target_events[name] predicted_events, self.target_events[name], ) + # If the score returns a tuple of scores, the first score + # is used as the primary score + if isinstance(primary_score_ret, tuple): + primary_score = primary_score_ret[0][1] + elif isinstance(primary_score_ret, (float, int)): + primary_score = primary_score_ret + else: + raise ValueError( + f"Return type {type(primary_score_ret)} is unexpected. " + "Return type of the score function should either be a " + "tuple(tuple(str, (float, int))) or float or int. " + ) if np.isnan(primary_score): primary_score = 0.0 score_and_postprocessing.append((primary_score, postprocessing)) @@ -454,19 +506,7 @@ def _score_epoch_end(self, name: str, outputs: List[Dict[str, List[Any]]]): self.test_predicted_labels = prediction self.test_predicted_events = predicted_events - for score in self.scores: - end_scores[f"{name}_{score}"] = score( - predicted_events, self.target_events[name] - ) - # Weird, this can happen if precision has zero guesses - if math.isnan(end_scores[f"{name}_{score}"]): - end_scores[f"{name}_{score}"] = 0.0 - self.log( - f"{name}_score", end_scores[f"{name}_{str(self.scores[0])}"], logger=True - ) - - for score_name in end_scores: - self.log(score_name, end_scores[score_name], prog_bar=True, logger=True) + self.log_scores(name, score_args=(predicted_events, self.target_events[name])) class SplitMemmapDataset(Dataset): From 0a05eb1ab800d5bd2c95e8467b0b79f5f3437ab1 Mon Sep 17 00:00:00 2001 From: Humair Raj Khan Date: Sun, 21 Nov 2021 15:18:49 +0000 Subject: [PATCH 04/13] Make validate function --- heareval/predictions/task_predictions.py | 11 ++----- heareval/score.py | 41 ++++++++++++++++++++---- 2 files changed, 36 insertions(+), 16 deletions(-) diff --git a/heareval/predictions/task_predictions.py b/heareval/predictions/task_predictions.py index 49cfd755..2e22dd8e 100755 --- a/heareval/predictions/task_predictions.py +++ b/heareval/predictions/task_predictions.py @@ -46,6 +46,7 @@ available_scores, label_to_binary_vector, label_vocab_as_dict, + validate_score_return_type, ) TASK_SPECIFIC_PARAM_GRID = { @@ -259,17 +260,9 @@ def log_scores(self, name: str, score_args: Tuple[Any]): end_scores: Dict[str, Union[int, float]] = {} for score in self.scores: score_ret = score(*score_args) + validate_score_return_type(score_ret) # If the returned score is a tuple, store each subscore as separate entry if isinstance(score_ret, tuple): - assert all( - type(s) == tuple - and type(s[0]) == str - and type(s[1]) in [float, int] - for s in score_ret - ), ( - "If the return type of the score is a tuple, all the elements " - "in the tuple should be tuple of type tuple(str, (float, int))" - ) # The first score in the returned tuple will be used # as primary score for this metric end_scores[f"{name}_{score}"] = score_ret[0][1] diff --git a/heareval/score.py b/heareval/score.py index 50ef253a..003e6d1b 100644 --- a/heareval/score.py +++ b/heareval/score.py @@ -54,6 +54,28 @@ def label_to_binary_vector(label: List, num_labels: int) -> torch.Tensor: return binary_labels +def validate_score_return_type( + ret: Union[float, int, Tuple[Tuple[str, Union[int, float]]]] +): + """Validates if the return type of the score is valid""" + if isinstance(ret, tuple): + assert all( + type(s) == tuple and type(s[0]) == str and type(s[1]) in [float, int] + for s in ret + ), ( + "If the return type of the score is a tuple, all the elements " + "in the tuple should be tuple of type tuple(str, (float, int))" + ) + elif isinstance(ret, (float, int)): + pass + else: + raise ValueError( + f"Return type {type(ret)} is unexpected. Return type of " + "the score function should either be a " + "tuple(tuple(str, (float, int))) or float or int. " + ) + + class ScoreFunction: """ A simple abstract base class for score functions @@ -77,7 +99,12 @@ def __init__( self.name = name self.maximize = maximize - def __call__(self, predictions: Any, targets: Any, **kwargs) -> float: + def __call__(self, *args, **kwargs) -> float: + ret = self.compute(*args, **kwargs) + validate_score_return_type(ret) + return ret + + def compute(self, predictions: Any, targets: Any, **kwargs) -> float: """ Compute the score based on the predictions and targets. Returns the score. """ @@ -90,7 +117,7 @@ def __str__(self): class Top1Accuracy(ScoreFunction): name = "top1_acc" - def __call__(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: + def compute(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: assert predictions.ndim == 2 assert targets.ndim == 2 # One hot # Compute the number of correct predictions @@ -115,7 +142,7 @@ class ChromaAccuracy(ScoreFunction): name = "chroma_acc" - def __call__(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: + def compute(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: # Compute the number of correct predictions correct = 0 for target, prediction in zip(targets, predictions): @@ -161,7 +188,7 @@ def __init__( self.params = params assert self.score_class is not None - def __call__(self, predictions: Dict, targets: Dict, **kwargs): + def compute(self, predictions: Dict, targets: Dict, **kwargs): # Containers of events for sed_eval reference_event_list = self.sed_eval_event_container(targets) estimated_event_list = self.sed_eval_event_container(predictions) @@ -243,7 +270,7 @@ class MeanAveragePrecision(ScoreFunction): name = "mAP" - def __call__(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: + def compute(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: assert predictions.ndim == 2 assert targets.ndim == 2 # One hot @@ -272,7 +299,7 @@ class DPrime(ScoreFunction): name = "d_prime" - def __call__(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: + def compute(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: assert predictions.ndim == 2 assert targets.ndim == 2 # One hot # ROC-AUC Requires more than one example for each class @@ -297,7 +324,7 @@ class AUCROC(ScoreFunction): name = "aucroc" - def __call__(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: + def compute(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: assert predictions.ndim == 2 assert targets.ndim == 2 # One hot # ROC-AUC Requires more than one example for each class From c34546f5f805bd15a207c2b9e4d18825608354cb Mon Sep 17 00:00:00 2001 From: Humair Raj Khan Date: Sun, 21 Nov 2021 15:23:55 +0000 Subject: [PATCH 05/13] Fix mypy --- heareval/predictions/task_predictions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/heareval/predictions/task_predictions.py b/heareval/predictions/task_predictions.py index 2e22dd8e..09c4687d 100755 --- a/heareval/predictions/task_predictions.py +++ b/heareval/predictions/task_predictions.py @@ -254,10 +254,10 @@ def validation_step(self, batch, batch_idx): def test_step(self, batch, batch_idx): return self._step(batch, batch_idx) - def log_scores(self, name: str, score_args: Tuple[Any]): + def log_scores(self, name: str, score_args): """Logs the metric score value for each score defined for the model""" assert hasattr(self, "scores"), "Scores for the model should be defined" - end_scores: Dict[str, Union[int, float]] = {} + end_scores = {} for score in self.scores: score_ret = score(*score_args) validate_score_return_type(score_ret) From d25bd50cedfe3c00f9f8b8c64e1ec8f1bd8e4b5c Mon Sep 17 00:00:00 2001 From: Humair Raj Khan Date: Sun, 21 Nov 2021 15:57:57 +0000 Subject: [PATCH 06/13] Minor fixes --- heareval/predictions/task_predictions.py | 2 +- heareval/score.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/heareval/predictions/task_predictions.py b/heareval/predictions/task_predictions.py index 09c4687d..45f6e3a2 100755 --- a/heareval/predictions/task_predictions.py +++ b/heareval/predictions/task_predictions.py @@ -279,7 +279,7 @@ def log_scores(self, name: str, score_args): ) # Weird, scores can be nan if precision has zero guesses - for score_name in end_scores: + for score_name, value in end_scores.items(): end_scores[score_name] = 0.0 if math.isnan(value) else value self.log( diff --git a/heareval/score.py b/heareval/score.py index 003e6d1b..eb98cbaf 100644 --- a/heareval/score.py +++ b/heareval/score.py @@ -370,7 +370,7 @@ def compute(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> flo "segment_1s_er": partial( SegmentBasedScore, name="segment_1s_er", - scores=("error_rate"), + scores=("error_rate",), params={"time_resolution": 1.0}, maximize=False, ), From b08a552713983e602264e5609030bafe4a964445 Mon Sep 17 00:00:00 2001 From: Humair Raj Khan Date: Sun, 21 Nov 2021 18:17:22 +0000 Subject: [PATCH 07/13] Fixes as per comment --- heareval/predictions/task_predictions.py | 20 +++++----- heareval/score.py | 47 +++++++++++++++++------- 2 files changed, 43 insertions(+), 24 deletions(-) diff --git a/heareval/predictions/task_predictions.py b/heareval/predictions/task_predictions.py index 45f6e3a2..873c05ed 100755 --- a/heareval/predictions/task_predictions.py +++ b/heareval/predictions/task_predictions.py @@ -263,25 +263,23 @@ def log_scores(self, name: str, score_args): validate_score_return_type(score_ret) # If the returned score is a tuple, store each subscore as separate entry if isinstance(score_ret, tuple): - # The first score in the returned tuple will be used - # as primary score for this metric + # The first score in the returned tuple will be used for + # any optimisation criterion, for instance if the metric is + # primary metric, the score corresponding to the first tuple + # will be used for early stopping end_scores[f"{name}_{score}"] = score_ret[0][1] # All other scores will also be logged for (subscore, value) in score_ret: end_scores[f"{name}_{score}_{subscore}"] = value - elif isinstance(score_ret, (float, int)): + elif isinstance(score_ret, float): end_scores[f"{name}_{score}"] = score_ret else: raise ValueError( f"Return type {type(score_ret)} is unexpected. Return type of " "the score function should either be a " - "tuple(tuple(str, (float, int))) or float or int. " + "tuple(tuple) or float." ) - # Weird, scores can be nan if precision has zero guesses - for score_name, value in end_scores.items(): - end_scores[score_name] = 0.0 if math.isnan(value) else value - self.log( f"{name}_score", end_scores[f"{name}_{str(self.scores[0])}"], logger=True ) @@ -463,16 +461,16 @@ def _score_epoch_end(self, name: str, outputs: List[Dict[str, List[Any]]]): self.target_events[name], ) # If the score returns a tuple of scores, the first score - # is used as the primary score + # is used if isinstance(primary_score_ret, tuple): primary_score = primary_score_ret[0][1] - elif isinstance(primary_score_ret, (float, int)): + elif isinstance(primary_score_ret, float): primary_score = primary_score_ret else: raise ValueError( f"Return type {type(primary_score_ret)} is unexpected. " "Return type of the score function should either be a " - "tuple(tuple(str, (float, int))) or float or int. " + "tuple(tuple) or float. " ) if np.isnan(primary_score): primary_score = 0.0 diff --git a/heareval/score.py b/heareval/score.py index eb98cbaf..996a8105 100644 --- a/heareval/score.py +++ b/heareval/score.py @@ -54,25 +54,37 @@ def label_to_binary_vector(label: List, num_labels: int) -> torch.Tensor: return binary_labels -def validate_score_return_type( - ret: Union[float, int, Tuple[Tuple[str, Union[int, float]]]] -): - """Validates if the return type of the score is valid""" +def validate_score_return_type(ret: Union[Tuple[Tuple], float]): + """ + Valid return types for the metric are + - tuple(tuple(string: name of the subtype, float: the value)): This is the + case with sed eval metrics. They can return (("f_measure", value), + ("precision", value), ...), depending on the scores + the metric should is supposed to return. This is set as `scores` + attribute in the metric. + - float: Standard metric behaviour + + The downstream prediction pipeline is able to handle these two types. + In case of the tuple return type, the value of the first entry in the + tuple will be used as an optimisation criterion wherever required. + For instance, if the return is (("f_measure", value), ("precision", value)), + the value corresponding to the f_measure will be used ( for instance in + early stopping if this metric is the primary score for the task ) + """ if isinstance(ret, tuple): assert all( - type(s) == tuple and type(s[0]) == str and type(s[1]) in [float, int] - for s in ret + type(s) == tuple and type(s[0]) == str and type(s[1]) == float for s in ret ), ( "If the return type of the score is a tuple, all the elements " - "in the tuple should be tuple of type tuple(str, (float, int))" + "in the tuple should be tuple of type (string, float)" ) - elif isinstance(ret, (float, int)): + elif isinstance(ret, float): pass else: raise ValueError( f"Return type {type(ret)} is unexpected. Return type of " "the score function should either be a " - "tuple(tuple(str, (float, int))) or float or int. " + "tuple(tuple) or float. " ) @@ -99,12 +111,18 @@ def __init__( self.name = name self.maximize = maximize - def __call__(self, *args, **kwargs) -> float: + def __call__(self, *args, **kwargs) -> Union[Tuple[Tuple], float]: + """ + Calls the compute function of the metric, and after validating the output, + returns the metric score + """ ret = self.compute(*args, **kwargs) validate_score_return_type(ret) return ret - def compute(self, predictions: Any, targets: Any, **kwargs) -> float: + def compute( + self, predictions: Any, targets: Any, **kwargs + ) -> Union[Tuple[Tuple], float]: """ Compute the score based on the predictions and targets. Returns the score. """ @@ -188,7 +206,9 @@ def __init__( self.params = params assert self.score_class is not None - def compute(self, predictions: Dict, targets: Dict, **kwargs): + def compute( + self, predictions: Dict, targets: Dict, **kwargs + ) -> Tuple[Tuple[str, float]]: # Containers of events for sed_eval reference_event_list = self.sed_eval_event_container(targets) estimated_event_list = self.sed_eval_event_container(predictions) @@ -215,7 +235,8 @@ def compute(self, predictions: Dict, targets: Dict, **kwargs): overall_scores: Dict[str, Union[float, int]] = dict( ChainMap(*nested_overall_scores.values()) ) - # Return the required scores as tuples + # Return the required scores as tuples. The scores are returned in the + # order they are passed in the `scores` argument return tuple([(score, overall_scores[score]) for score in self.scores]) @staticmethod From 391ded8929762c5d6a3a6cfaa45fe28f6c305af0 Mon Sep 17 00:00:00 2001 From: Humair Raj Khan Date: Sun, 21 Nov 2021 18:20:50 +0000 Subject: [PATCH 08/13] flake --- heareval/predictions/task_predictions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/heareval/predictions/task_predictions.py b/heareval/predictions/task_predictions.py index 873c05ed..743c457c 100755 --- a/heareval/predictions/task_predictions.py +++ b/heareval/predictions/task_predictions.py @@ -14,7 +14,6 @@ import copy import json -import math import multiprocessing import pickle import random From 613ee7f42c789ba9fb7076cd9a9b3b93d01a1655 Mon Sep 17 00:00:00 2001 From: Humair Raj Khan Date: Sun, 21 Nov 2021 18:26:48 +0000 Subject: [PATCH 09/13] convert compute to _compute --- heareval/score.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/heareval/score.py b/heareval/score.py index 996a8105..551ee7f4 100644 --- a/heareval/score.py +++ b/heareval/score.py @@ -116,15 +116,18 @@ def __call__(self, *args, **kwargs) -> Union[Tuple[Tuple], float]: Calls the compute function of the metric, and after validating the output, returns the metric score """ - ret = self.compute(*args, **kwargs) + ret = self._compute(*args, **kwargs) validate_score_return_type(ret) return ret - def compute( + def _compute( self, predictions: Any, targets: Any, **kwargs ) -> Union[Tuple[Tuple], float]: """ - Compute the score based on the predictions and targets. Returns the score. + Compute the score based on the predictions and targets. + This is a private function and the metric should be used as a functor + by calling the `__call__` method which calls this and also validates + the return type """ raise NotImplementedError("Inheriting classes must implement this function") @@ -135,7 +138,7 @@ def __str__(self): class Top1Accuracy(ScoreFunction): name = "top1_acc" - def compute(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: + def _compute(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: assert predictions.ndim == 2 assert targets.ndim == 2 # One hot # Compute the number of correct predictions @@ -160,7 +163,7 @@ class ChromaAccuracy(ScoreFunction): name = "chroma_acc" - def compute(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: + def _compute(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: # Compute the number of correct predictions correct = 0 for target, prediction in zip(targets, predictions): @@ -206,7 +209,7 @@ def __init__( self.params = params assert self.score_class is not None - def compute( + def _compute( self, predictions: Dict, targets: Dict, **kwargs ) -> Tuple[Tuple[str, float]]: # Containers of events for sed_eval @@ -291,7 +294,7 @@ class MeanAveragePrecision(ScoreFunction): name = "mAP" - def compute(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: + def _compute(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: assert predictions.ndim == 2 assert targets.ndim == 2 # One hot @@ -320,7 +323,7 @@ class DPrime(ScoreFunction): name = "d_prime" - def compute(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: + def _compute(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: assert predictions.ndim == 2 assert targets.ndim == 2 # One hot # ROC-AUC Requires more than one example for each class @@ -345,7 +348,7 @@ class AUCROC(ScoreFunction): name = "aucroc" - def compute(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: + def _compute(self, predictions: np.ndarray, targets: np.ndarray, **kwargs) -> float: assert predictions.ndim == 2 assert targets.ndim == 2 # One hot # ROC-AUC Requires more than one example for each class From 15ba67d05b505427bfe58610b6ccbafbf3017811 Mon Sep 17 00:00:00 2001 From: Humair Raj Khan Date: Sun, 21 Nov 2021 18:33:25 +0000 Subject: [PATCH 10/13] Change return type --- heareval/score.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/heareval/score.py b/heareval/score.py index 551ee7f4..ba96931c 100644 --- a/heareval/score.py +++ b/heareval/score.py @@ -54,7 +54,7 @@ def label_to_binary_vector(label: List, num_labels: int) -> torch.Tensor: return binary_labels -def validate_score_return_type(ret: Union[Tuple[Tuple], float]): +def validate_score_return_type(ret: Union[Tuple[Tuple[str, float], ...], float]): """ Valid return types for the metric are - tuple(tuple(string: name of the subtype, float: the value)): This is the @@ -111,7 +111,7 @@ def __init__( self.name = name self.maximize = maximize - def __call__(self, *args, **kwargs) -> Union[Tuple[Tuple], float]: + def __call__(self, *args, **kwargs) -> Union[Tuple[Tuple[str, float], ...], float]: """ Calls the compute function of the metric, and after validating the output, returns the metric score @@ -122,7 +122,7 @@ def __call__(self, *args, **kwargs) -> Union[Tuple[Tuple], float]: def _compute( self, predictions: Any, targets: Any, **kwargs - ) -> Union[Tuple[Tuple], float]: + ) -> Union[Tuple[Tuple[str, float], ...], float]: """ Compute the score based on the predictions and targets. This is a private function and the metric should be used as a functor @@ -211,7 +211,7 @@ def __init__( def _compute( self, predictions: Dict, targets: Dict, **kwargs - ) -> Tuple[Tuple[str, float]]: + ) -> Tuple[Tuple[str, float], ...]: # Containers of events for sed_eval reference_event_list = self.sed_eval_event_container(targets) estimated_event_list = self.sed_eval_event_container(predictions) @@ -235,7 +235,7 @@ def _compute( str, Dict[str, float] ] = scores.results_overall_metrics() # Open up nested overall scores - overall_scores: Dict[str, Union[float, int]] = dict( + overall_scores: Dict[str, float] = dict( ChainMap(*nested_overall_scores.values()) ) # Return the required scores as tuples. The scores are returned in the From 67f05e8ee949608165915dcc9459d73180fe7027 Mon Sep 17 00:00:00 2001 From: Humair Raj Khan Date: Sun, 21 Nov 2021 19:43:02 +0000 Subject: [PATCH 11/13] Update comment --- heareval/predictions/task_predictions.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/heareval/predictions/task_predictions.py b/heareval/predictions/task_predictions.py index 743c457c..75fb2a63 100755 --- a/heareval/predictions/task_predictions.py +++ b/heareval/predictions/task_predictions.py @@ -262,10 +262,8 @@ def log_scores(self, name: str, score_args): validate_score_return_type(score_ret) # If the returned score is a tuple, store each subscore as separate entry if isinstance(score_ret, tuple): - # The first score in the returned tuple will be used for - # any optimisation criterion, for instance if the metric is - # primary metric, the score corresponding to the first tuple - # will be used for early stopping + # The first score in the returned tuple for the + # first `self.scores` is the optimization criterion end_scores[f"{name}_{score}"] = score_ret[0][1] # All other scores will also be logged for (subscore, value) in score_ret: From 45eb94bc976570954b34c46ddf15f52c8bbba67a Mon Sep 17 00:00:00 2001 From: Joseph Turian Date: Sun, 21 Nov 2021 20:32:17 +0000 Subject: [PATCH 12/13] Comment update --- heareval/predictions/task_predictions.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/heareval/predictions/task_predictions.py b/heareval/predictions/task_predictions.py index 75fb2a63..af2fd415 100755 --- a/heareval/predictions/task_predictions.py +++ b/heareval/predictions/task_predictions.py @@ -257,13 +257,12 @@ def log_scores(self, name: str, score_args): """Logs the metric score value for each score defined for the model""" assert hasattr(self, "scores"), "Scores for the model should be defined" end_scores = {} + # The first score in the first `self.scores` is the optimization criterion for score in self.scores: score_ret = score(*score_args) validate_score_return_type(score_ret) # If the returned score is a tuple, store each subscore as separate entry if isinstance(score_ret, tuple): - # The first score in the returned tuple for the - # first `self.scores` is the optimization criterion end_scores[f"{name}_{score}"] = score_ret[0][1] # All other scores will also be logged for (subscore, value) in score_ret: From c515a5fe6674637cb74d0416b4017796ea4bd6ec Mon Sep 17 00:00:00 2001 From: Joseph Turian Date: Mon, 22 Nov 2021 10:26:10 +0000 Subject: [PATCH 13/13] rm math --- heareval/predictions/task_predictions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/heareval/predictions/task_predictions.py b/heareval/predictions/task_predictions.py index 3b6f9bb2..aba2749d 100755 --- a/heareval/predictions/task_predictions.py +++ b/heareval/predictions/task_predictions.py @@ -15,7 +15,6 @@ import copy import json import logging -import math import multiprocessing import pickle import random