diff --git a/flair/models/multitask_model.py b/flair/models/multitask_model.py
index a20ad01ac7..8e953e757b 100644
--- a/flair/models/multitask_model.py
+++ b/flair/models/multitask_model.py
@@ -198,7 +198,16 @@ def evaluate(  # type: ignore[override]
             # Add metrics so they will be available to _publish_eval_result.
             for avg_type in ("micro avg", "macro avg"):
                 for metric_type in ("f1-score", "precision", "recall"):
-                    scores[(task_id, avg_type, metric_type)] = result.classification_report[avg_type][metric_type]
+                    if result.classification_report.get(avg_type) and result.classification_report[avg_type].get(
+                        metric_type
+                    ):
+                        scores[(task_id, avg_type, metric_type)] = result.classification_report[avg_type][metric_type]
+
+            # The above metrics only apply to classification tasks. This adds
+            # regression metrics also.
+            for metric_type in ("mse", "mae", "pearson", "spearman"):
+                if result.scores.get(metric_type):
+                    scores[(task_id, metric_type)] = result.scores[metric_type]
 
         scores["loss"] = loss.item() / len(batch_split)