improving error handling

LAAC-LSCP · Nov 14, 2021 · 63e3c64 · 63e3c64
1 parent 627faab
commit 63e3c64
Show file tree

Hide file tree

Showing 2 changed files with 83 additions and 13 deletions.
diff --git a/ChildProject/annotations.py b/ChildProject/annotations.py
@@ -12,7 +12,10 @@
 from . import __version__
 from .projects import ChildProject
 from .converters import *
-from .tables import IndexTable, IndexColumn
+from .tables import (
+    IndexTable, IndexColumn,
+    assert_dataframe, assert_columns_presence
+)
 from .utils import Segment, intersect_ranges, path_is_parent
 
 
@@ -326,8 +329,10 @@ def validate(
         :return: a tuple containg the list of errors and the list of warnings detected
         :rtype: Tuple[List[str], List[str]]
         """
-        if not isinstance(annotations, pd.DataFrame):
+        if annotations is None:
             annotations = self.annotations
+        else:
+            assert_dataframe("annotations", annotations)
 
         annotations = annotations.dropna(subset=["annotation_filename"])
 
@@ -478,14 +483,9 @@ def import_annotations(
             for c in AnnotationManager.INDEX_COLUMNS
             if c.required and not c.generated
         }
-        missing_columns = required_columns - set(input.columns)
 
-        if len(missing_columns):
-            raise IndexError(
-                "import_annotations requires the following missing columns: {}".format(
-                    ",".join(missing_columns)
-                )
-            )
+        assert_dataframe("input", input)
+        assert_columns_presence("input", input, required_columns)
 
         missing_recordings = input[
             ~input["recording_filename"].isin(
@@ -939,6 +939,19 @@ def get_segments(self, annotations: pd.DataFrame) -> pd.DataFrame:
         :return: dataframe of all the segments merged (as specified in :ref:`format-annotations-segments`), merged with ``annotations``. 
         :rtype: pd.DataFrame
         """
+        assert_dataframe("annotations", annotations)
+        assert_columns_presence(
+            "annotations",
+            annotations,
+            {
+                "annotation_filename",
+                "raw_filename",
+                "set",
+                "range_onset",
+                "range_offset",
+            },
+        )
+
         annotations = annotations.dropna(subset=["annotation_filename"])
         annotations.drop(columns=["raw_filename"], inplace=True)
 
@@ -989,6 +1002,13 @@ def get_collapsed_segments(self, annotations: pd.DataFrame) -> pd.DataFrame:
         :return: dataframe of all the segments merged (as specified in :ref:`format-annotations-segments`), merged with ``annotations``
         :rtype: pd.DataFrame
         """
+        assert_dataframe("annotations", annotations)
+        assert_columns_presence(
+            "annotations",
+            annotations,
+            {"range_onset", "range_offset", "recording_filename", "set",},
+        )
+
         annotations["duration"] = (
             annotations["range_offset"] - annotations["range_onset"]
         ).astype(float)
@@ -1033,6 +1053,13 @@ def get_within_time_range(
         :rtype: pd.DataFrame
         """
 
+        assert_dataframe("annotations", annotations)
+        assert_columns_presence(
+            "annotations",
+            annotations,
+            {"recording_filename", "range_onset", "range_offset"},
+        )
+
         def get_ms_since_midight(dt):
             return (dt - dt.replace(hour=0, minute=0, second=0)).total_seconds() * 1000
 
@@ -1139,6 +1166,12 @@ def get_segments_timestamps(
         both values will be set to NaT.
         :rtype: pd.DataFrame
         """
+
+        assert_dataframe("segments", segments)
+        assert_columns_presence(
+            "segments", segments, {"recording_filename", onset, offset}
+        )
+
         columns_to_merge = ["start_time"]
         if not ignore_date:
             columns_to_merge.append("date_iso")
@@ -1194,6 +1227,13 @@ def intersection(annotations: pd.DataFrame, sets: list = None) -> pd.DataFrame:
         :return: dataframe of annotations, according to :ref:`format-annotations`
         :rtype: pd.DataFrame
         """
+        assert_dataframe("annotations", annotations)
+        assert_columns_presence(
+            "annotations",
+            annotations,
+            {"recording_filename", "set", "range_onset", "range_offset"},
+        )
+
         stack = []
         recordings = list(annotations["recording_filename"].unique())
 
@@ -1241,15 +1281,15 @@ def intersection(annotations: pd.DataFrame, sets: list = None) -> pd.DataFrame:
         return pd.concat(stack) if len(stack) else pd.DataFrame()
 
     def set_from_path(self, path: str) -> str:
-        annotations_path = os.path.join(self.project.path, 'annotations')
+        annotations_path = os.path.join(self.project.path, "annotations")
 
         if not path_is_parent(annotations_path, path):
             return None
 
         annotation_set = os.path.relpath(path, annotations_path)
 
         basename = os.path.basename(annotation_set)
-        if basename == 'raw' or basename == 'converted':
+        if basename == "raw" or basename == "converted":
             annotation_set = os.path.dirname(annotation_set)
 
         return annotation_set
@@ -1268,6 +1308,11 @@ def clip_segments(segments: pd.DataFrame, start: int, stop: int) -> pd.DataFrame
         :return: Dataframe of the clipped segments
         :rtype: pd.DataFrame
         """
+        assert_dataframe("segments", segments)
+        assert_columns_presence(
+            "segments", segments, {"segment_onset", "segment_offset"}
+        )
+
         start = int(start)
         stop = int(stop)
 

diff --git a/ChildProject/tables.py b/ChildProject/tables.py
@@ -3,6 +3,29 @@
 import re
 import datetime
 import numpy as np
+from typing import Union, Set, List
+
+
+class MissingColumnsException(Exception):
+    def __init__(self, name: str, missing: Set):
+        missing = ",".join(list(missing))
+
+        super().__init__(
+            f"dataframe {name} misses the following required columns: {missing}"
+        )
+
+
+def assert_dataframe(name: str, df: pd.DataFrame):
+    assert isinstance(
+        df, pd.DataFrame
+    ), f"{name} should be a dataframe, but type is '{type(df)}' instead."
+
+
+def assert_columns_presence(name: str, df: pd.DataFrame, columns: Union[Set, List]):
+    missing = set(df.columns) - set(columns)
+
+    if len(missing):
+        raise MissingColumnsException(name, missing)
 
 
 def is_boolean(x):
@@ -44,7 +67,7 @@ def __repr__(self):
 
 
 class IndexTable:
-    def __init__(self, name, path=None, columns=[],enforce_dtypes: bool = False):
+    def __init__(self, name, path=None, columns=[], enforce_dtypes: bool = False):
         self.name = name
         self.path = path
         self.columns = columns
@@ -78,7 +101,9 @@ def read(self):
         }
 
         if self.enforce_dtypes:
-            dtype = {column.name: column.dtype for column in self.columns if column.dtype}
+            dtype = {
+                column.name: column.dtype for column in self.columns if column.dtype
+            }
             self.df = pd.read_csv(self.path, dtype=dtype, **pd_flags)
         else:
             self.df = pd.read_csv(self.path, **pd_flags)