diff --git a/flair/models/multitask_model.py b/flair/models/multitask_model.py index a20ad01ac7..8e953e757b 100644 --- a/flair/models/multitask_model.py +++ b/flair/models/multitask_model.py @@ -198,7 +198,16 @@ def evaluate( # type: ignore[override] # Add metrics so they will be available to _publish_eval_result. for avg_type in ("micro avg", "macro avg"): for metric_type in ("f1-score", "precision", "recall"): - scores[(task_id, avg_type, metric_type)] = result.classification_report[avg_type][metric_type] + if result.classification_report.get(avg_type) and result.classification_report[avg_type].get( + metric_type + ): + scores[(task_id, avg_type, metric_type)] = result.classification_report[avg_type][metric_type] + + # The above metrics only apply to classification tasks. This adds + # regression metrics also. + for metric_type in ("mse", "mae", "pearson", "spearman"): + if result.scores.get(metric_type): + scores[(task_id, metric_type)] = result.scores[metric_type] scores["loss"] = loss.item() / len(batch_split)