Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rework metrics #376

Merged
merged 22 commits into from
Jul 25, 2022
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
9c33de1
first version of metrics with parameters, only superClass Metrics
LoannPeurey Jun 1, 2022
e65dec8
redefine lenaMetrics class
LoannPeurey Jun 8, 2022
18e0460
prepare usual pipelines : custom lena aclew
LoannPeurey Jun 9, 2022
4163f27
metrics.py was left out
LoannPeurey Jun 13, 2022
1dae06d
csv file input, verbose errors for missing columns
LoannPeurey Jun 17, 2022
8fdef10
new dataset : organizing data
LoannPeurey Jun 21, 2022
f908d6b
change yaml output to have the full list of metrics
LoannPeurey Jun 22, 2022
c242627
intersect time ranges, new yml pipeline
LoannPeurey Jun 27, 2022
ff6868a
automated tests
LoannPeurey Jun 28, 2022
cc22c56
fix tests
LoannPeurey Jun 28, 2022
8687900
drop tests python 3.6 (support will be dropped)
LoannPeurey Jun 28, 2022
7ec1bf0
metricsFunctions use decorators
LoannPeurey Jun 30, 2022
0b6943a
Merge branch 'rework_metrics' of github.com:LAAC-LSCP/ChildProject in…
LoannPeurey Jun 30, 2022
1d9da92
avoid division by 0 un lp metric
LoannPeurey Jun 30, 2022
07013f3
multiple minor improvements, requirement python to >=3.7
LoannPeurey Jul 6, 2022
e41679d
update documentation
LoannPeurey Jul 8, 2022
89de391
fix doc generation
LoannPeurey Jul 8, 2022
0684f79
throw ValueError when given incorrect recording
LoannPeurey Jul 13, 2022
7daaba6
add comments, use getattr()
LoannPeurey Jul 18, 2022
2697a64
add Metrics docstring
LoannPeurey Jul 25, 2022
e57fdb2
do not catch exceptions for handling un-annotated segments
LoannPeurey Jul 25, 2022
d7592cf
resolve utils conflicts
LoannPeurey Jul 25, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,6 @@ docs/PROJECTS.md

# vim
*~

# sphinx created csvs
docs/source/*.csv
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
language: python
python:
- "3.6"
- "3.7"
- "3.8"
- "3.9"
Expand Down
38 changes: 18 additions & 20 deletions ChildProject/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from .projects import ChildProject
from .converters import *
from .tables import IndexTable, IndexColumn, assert_dataframe, assert_columns_presence
from .utils import Segment, intersect_ranges, path_is_parent
from .utils import Segment, intersect_ranges, path_is_parent, TimeInterval


class AnnotationManager:
Expand Down Expand Up @@ -1191,7 +1191,7 @@ def get_within_ranges(
return pd.concat(stack) if len(stack) else pd.DataFrame()

def get_within_time_range(
self, annotations: pd.DataFrame, start_time: str, end_time: str, errors="raise"
self, annotations: pd.DataFrame, interval : TimeInterval, errors="raise"
):
"""Clip all input annotations within a given HH:MM clock-time range.
Those that do not intersect the input time range at all are filtered out.
Expand Down Expand Up @@ -1222,24 +1222,12 @@ def get_within_time_range(
def get_ms_since_midight(dt):
return (dt - dt.replace(hour=0, minute=0, second=0)).total_seconds() * 1000

try:
start_dt = datetime.datetime.strptime(start_time, "%H:%M")
except:
raise ValueError(
f"invalid value for start_time ('{start_time}'); should have HH:MM format instead"
)

try:
end_dt = datetime.datetime.strptime(end_time, "%H:%M")
except:
raise ValueError(
f"invalid value for end_time ('{end_time}'); should have HH:MM format instead"
)

assert end_dt > start_dt, "end_time must follow start_time"
#assert end_dt > start_dt, "end_time must follow start_time"
# no reason to keep this condition, 23:00 to 03:00 is completely acceptable

start_ts = get_ms_since_midight(start_dt)
end_ts = get_ms_since_midight(end_dt)
if not isinstance(interval, TimeInterval): raise ValueError("interval must be a TimeInterval object")
start_ts = get_ms_since_midight(interval.start)
end_ts = get_ms_since_midight(interval.stop)

annotations = annotations.merge(
self.project.recordings[["recording_filename", "start_time"]], how="left"
Expand Down Expand Up @@ -1267,8 +1255,18 @@ def get_ms_since_midight(dt):

matches = []
for annotation in annotations.to_dict(orient="records"):
#onsets = np.arange(start_ts, annotation["range_offset_ts"], 86400 * 1000)
#offsets = onsets + (end_ts - start_ts)

onsets = np.arange(start_ts, annotation["range_offset_ts"], 86400 * 1000)
offsets = onsets + (end_ts - start_ts)
offsets = np.arange(end_ts, annotation["range_offset_ts"], 86400 * 1000)
#treat edge cases when the offset is after the end of annotation, onset before start etc
if len(onsets) > 0 and onsets[0] < annotation["range_onset_ts"] :
if len(offsets) > 0 and offsets[0] < annotation["range_onset_ts"]: onsets = onsets[1:]
else : onsets[0] = annotation["range_onset_ts"]
if len(offsets) > 0 and offsets[0] < annotation["range_onset_ts"] : offsets = offsets[1:]
if len(onsets) > 0 and len(offsets) > 0 and onsets[0] > offsets[0] : onsets = np.append(annotation["range_onset_ts"], onsets)
if (len(onsets) > 0 and len(offsets) > 0 and onsets[-1] > offsets[-1]) or len(onsets) > len(offsets) : offsets = np.append(offsets,annotation["range_offset_ts"])

xs = (Segment(onset, offset) for onset, offset in zip(onsets, offsets))
ys = iter(
Expand Down
1 change: 1 addition & 0 deletions ChildProject/cmdline.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,6 +513,7 @@ def main():
register_pipeline("eaf-builder", EafBuilderPipeline)
register_pipeline("anonymize", AnonymizationPipeline)
register_pipeline("metrics", MetricsPipeline)
register_pipeline("metrics-specification", MetricsSpecificationPipeline)

args = parser.parse_args()
args.func(args)
1 change: 1 addition & 0 deletions ChildProject/pipelines/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
from .eafbuilder import EafBuilderPipeline
from .zooniverse import ZooniversePipeline
from .metrics import MetricsPipeline
from .metrics import MetricsSpecificationPipeline
from .processors import AudioProcessingPipeline
from .anonymize import AnonymizationPipeline
Loading