Skip to content

Commit

Permalink
improving error handling
Browse files Browse the repository at this point in the history
  • Loading branch information
lucasgautheron committed Nov 14, 2021
1 parent 627faab commit 63e3c64
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 13 deletions.
67 changes: 56 additions & 11 deletions ChildProject/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
from . import __version__
from .projects import ChildProject
from .converters import *
from .tables import IndexTable, IndexColumn
from .tables import (
IndexTable, IndexColumn,
assert_dataframe, assert_columns_presence
)
from .utils import Segment, intersect_ranges, path_is_parent


Expand Down Expand Up @@ -326,8 +329,10 @@ def validate(
:return: a tuple containg the list of errors and the list of warnings detected
:rtype: Tuple[List[str], List[str]]
"""
if not isinstance(annotations, pd.DataFrame):
if annotations is None:
annotations = self.annotations
else:
assert_dataframe("annotations", annotations)

annotations = annotations.dropna(subset=["annotation_filename"])

Expand Down Expand Up @@ -478,14 +483,9 @@ def import_annotations(
for c in AnnotationManager.INDEX_COLUMNS
if c.required and not c.generated
}
missing_columns = required_columns - set(input.columns)

if len(missing_columns):
raise IndexError(
"import_annotations requires the following missing columns: {}".format(
",".join(missing_columns)
)
)
assert_dataframe("input", input)
assert_columns_presence("input", input, required_columns)

missing_recordings = input[
~input["recording_filename"].isin(
Expand Down Expand Up @@ -939,6 +939,19 @@ def get_segments(self, annotations: pd.DataFrame) -> pd.DataFrame:
:return: dataframe of all the segments merged (as specified in :ref:`format-annotations-segments`), merged with ``annotations``.
:rtype: pd.DataFrame
"""
assert_dataframe("annotations", annotations)
assert_columns_presence(
"annotations",
annotations,
{
"annotation_filename",
"raw_filename",
"set",
"range_onset",
"range_offset",
},
)

annotations = annotations.dropna(subset=["annotation_filename"])
annotations.drop(columns=["raw_filename"], inplace=True)

Expand Down Expand Up @@ -989,6 +1002,13 @@ def get_collapsed_segments(self, annotations: pd.DataFrame) -> pd.DataFrame:
:return: dataframe of all the segments merged (as specified in :ref:`format-annotations-segments`), merged with ``annotations``
:rtype: pd.DataFrame
"""
assert_dataframe("annotations", annotations)
assert_columns_presence(
"annotations",
annotations,
{"range_onset", "range_offset", "recording_filename", "set",},
)

annotations["duration"] = (
annotations["range_offset"] - annotations["range_onset"]
).astype(float)
Expand Down Expand Up @@ -1033,6 +1053,13 @@ def get_within_time_range(
:rtype: pd.DataFrame
"""

assert_dataframe("annotations", annotations)
assert_columns_presence(
"annotations",
annotations,
{"recording_filename", "range_onset", "range_offset"},
)

def get_ms_since_midight(dt):
return (dt - dt.replace(hour=0, minute=0, second=0)).total_seconds() * 1000

Expand Down Expand Up @@ -1139,6 +1166,12 @@ def get_segments_timestamps(
both values will be set to NaT.
:rtype: pd.DataFrame
"""

assert_dataframe("segments", segments)
assert_columns_presence(
"segments", segments, {"recording_filename", onset, offset}
)

columns_to_merge = ["start_time"]
if not ignore_date:
columns_to_merge.append("date_iso")
Expand Down Expand Up @@ -1194,6 +1227,13 @@ def intersection(annotations: pd.DataFrame, sets: list = None) -> pd.DataFrame:
:return: dataframe of annotations, according to :ref:`format-annotations`
:rtype: pd.DataFrame
"""
assert_dataframe("annotations", annotations)
assert_columns_presence(
"annotations",
annotations,
{"recording_filename", "set", "range_onset", "range_offset"},
)

stack = []
recordings = list(annotations["recording_filename"].unique())

Expand Down Expand Up @@ -1241,15 +1281,15 @@ def intersection(annotations: pd.DataFrame, sets: list = None) -> pd.DataFrame:
return pd.concat(stack) if len(stack) else pd.DataFrame()

def set_from_path(self, path: str) -> str:
annotations_path = os.path.join(self.project.path, 'annotations')
annotations_path = os.path.join(self.project.path, "annotations")

if not path_is_parent(annotations_path, path):
return None

annotation_set = os.path.relpath(path, annotations_path)

basename = os.path.basename(annotation_set)
if basename == 'raw' or basename == 'converted':
if basename == "raw" or basename == "converted":
annotation_set = os.path.dirname(annotation_set)

return annotation_set
Expand All @@ -1268,6 +1308,11 @@ def clip_segments(segments: pd.DataFrame, start: int, stop: int) -> pd.DataFrame
:return: Dataframe of the clipped segments
:rtype: pd.DataFrame
"""
assert_dataframe("segments", segments)
assert_columns_presence(
"segments", segments, {"segment_onset", "segment_offset"}
)

start = int(start)
stop = int(stop)

Expand Down
29 changes: 27 additions & 2 deletions ChildProject/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,29 @@
import re
import datetime
import numpy as np
from typing import Union, Set, List


class MissingColumnsException(Exception):
def __init__(self, name: str, missing: Set):
missing = ",".join(list(missing))

super().__init__(
f"dataframe {name} misses the following required columns: {missing}"
)


def assert_dataframe(name: str, df: pd.DataFrame):
assert isinstance(
df, pd.DataFrame
), f"{name} should be a dataframe, but type is '{type(df)}' instead."


def assert_columns_presence(name: str, df: pd.DataFrame, columns: Union[Set, List]):
missing = set(df.columns) - set(columns)

if len(missing):
raise MissingColumnsException(name, missing)


def is_boolean(x):
Expand Down Expand Up @@ -44,7 +67,7 @@ def __repr__(self):


class IndexTable:
def __init__(self, name, path=None, columns=[],enforce_dtypes: bool = False):
def __init__(self, name, path=None, columns=[], enforce_dtypes: bool = False):
self.name = name
self.path = path
self.columns = columns
Expand Down Expand Up @@ -78,7 +101,9 @@ def read(self):
}

if self.enforce_dtypes:
dtype = {column.name: column.dtype for column in self.columns if column.dtype}
dtype = {
column.name: column.dtype for column in self.columns if column.dtype
}
self.df = pd.read_csv(self.path, dtype=dtype, **pd_flags)
else:
self.df = pd.read_csv(self.path, **pd_flags)
Expand Down

0 comments on commit 63e3c64

Please sign in to comment.