Skip to content

Commit

Permalink
Merge pull request #488 from LAAC-LSCP/projects/valid_discarded_recs
Browse files Browse the repository at this point in the history
Projects/valid discarded recs
  • Loading branch information
LoannPeurey authored Feb 11, 2025
2 parents 497619b + e56c8e3 commit 0bc09a5
Show file tree
Hide file tree
Showing 4 changed files with 23 additions and 9 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ All notable changes to this project will be documented in this file.

- Use pathlib across the package (except a few cases where justified usage) instead of os.path
- the overview CLI call has a very different output, giving more information about more things
- discarded items (recordings and children) are now stored in the ChildProject object

## [0.2.3] 2024-11-21

Expand Down
17 changes: 14 additions & 3 deletions ChildProject/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,13 +408,13 @@ def read(self, verbose=False, accumulate=True):

if self.ignore_discarded and "discard" in self.ct.df:
self.ct.df['discard'] = self.ct.df["discard"].apply(np.nan_to_num).astype(int, errors='ignore')
self.ct.df = self.ct.df[self.ct.df["discard"].astype(str) != "1"]
self.discarded_children = self.ct.df[self.ct.df["discard"].astype(str) == "1"]
self.ct.df = self.ct.df[self.ct.df["discard"].astype(str) != "1"]

if self.ignore_discarded and "discard" in self.rt.df:
self.rt.df['discard'] = self.rt.df["discard"].apply(np.nan_to_num).astype(int, errors='ignore')
self.rt.df = self.rt.df[self.rt.df["discard"].astype(str) != "1"]
self.discarded_recordings = self.rt.df[self.rt.df['discard'].astype(str) == '1']
self.rt.df = self.rt.df[self.rt.df["discard"].astype(str) != "1"]

self.children = self.ct.df
self.recordings = self.rt.df
Expand Down Expand Up @@ -633,6 +633,17 @@ def validate(self, ignore_recordings: bool = False, profile: str = None, accumul
for f in pd.core.common.flatten(files)
]

discarded_files = [
self.discarded_recordings[c.name].tolist()
for c in self.RECORDINGS_COLUMNS
if c.filename and c.name in self.recordings.columns
]

indexed_discarded_files = [
(self.path / RAW_RECORDINGS / str(f)).absolute()
for f in pd.core.common.flatten(discarded_files)
]

recordings_files = (self.path / RAW_RECORDINGS).rglob("*.*")

for rf in recordings_files:
Expand All @@ -644,7 +655,7 @@ def validate(self, ignore_recordings: bool = False, profile: str = None, accumul
continue

ap = rf.absolute()
if ap not in indexed_files:
if ap not in indexed_files and ap not in indexed_discarded_files:
self.warnings.append("file '{}' not indexed.".format(rf))

return self.errors, self.warnings
Expand Down
2 changes: 1 addition & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def test_overview(project):
["child-project", "overview", PATH]
)
assert exit_code == 0
assert stdout == f"\n\x1b[1m2 recordings with 0.00 hours 2 locally (0 discarded)\x1b[0m:\n\x1b[94mdate range :\x1b[0m 2020-04-20 to 2020-04-21\n\x1b[94mdevices :\x1b[0m usb (0.00h 2/2 locally);\n\n\x1b[1m1 participants\x1b[0m:\n\x1b[94mage range :\x1b[0m 3.6mo to 3.6mo\n\x1b[94mlanguages :\x1b[0m\n\n\x1b[1mannotations:\x1b[0m\nduration method algo version date transcr\n 8.0s automated VTC 1 2024-04-07 \x1b[94mvtc_present\x1b[0m\n\n"
assert stdout == f"\n\x1b[1m2 recordings with 0.00 hours 2 locally (1 discarded)\x1b[0m:\n\x1b[94mdate range :\x1b[0m 2020-04-20 to 2020-04-21\n\x1b[94mdevices :\x1b[0m usb (0.00h 2/2 locally);\n\n\x1b[1m1 participants\x1b[0m:\n\x1b[94mage range :\x1b[0m 3.6mo to 3.6mo\n\x1b[94mlanguages :\x1b[0m\n\n\x1b[1mannotations:\x1b[0m\nduration method algo version date transcr\n 8.0s automated VTC 1 2024-04-07 \x1b[94mvtc_present\x1b[0m\n\n"
assert stderr == f"\x1b[33mWARNING \x1b[0m column(s) child_dob overwritten by {Path('output/cli/metadata/children/0_test.csv')} \x1b[35m[ChildProject.projects]\x1b[0m\n\x1b[33mWARNING \x1b[0m column(s) notes overwritten by {Path('output/cli/metadata/recordings/1_very_confidential.csv')} \x1b[35m[ChildProject.projects]\x1b[0m\n\x1b[33mWARNING \x1b[0m column(s) date_iso overwritten by {Path('output/cli/metadata/recordings/0_confidential.csv')} \x1b[35m[ChildProject.projects]\x1b[0m\n"


Expand Down
12 changes: 7 additions & 5 deletions tests/test_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,18 @@ def test_enforce_dtypes():
assert project.recordings["child_id"].dtype.kind == "i"
assert project.children["child_id"].dtype.kind == "i"

@pytest.mark.parametrize("idis,rshape,cshape",
[(True,2,1),
(False,3,2),
@pytest.mark.parametrize("idis,rshape,cshape,drshape,dcshape",
[(True,2,1,1,1),
(False,3,2,0,0),
])
def test_ignore_discarded(idis,rshape,cshape):
def test_ignore_discarded(idis, rshape, cshape, drshape, dcshape):
project = ChildProject("examples/valid_raw_data", ignore_discarded=idis)
project.read()

assert project.recordings.shape[0] == rshape
assert project.discarded_recordings.shape[0] == drshape
assert project.children.shape[0] == cshape
assert project.discarded_children.shape[0] == dcshape


def test_compute_ages():
Expand Down Expand Up @@ -82,4 +84,4 @@ def test_projects_read(project, error, chi_lines, rec_lines):
def test_dict_summary(project):
project.read()
summary = project.dict_summary()
assert summary == {'recordings': {'count': 2, 'duration': 8000, 'first_date': '2020-04-20', 'last_date': '2020-04-21', 'discarded': 0, 'devices': {'usb': {'count': 2, 'duration': 8000}}}, 'children': {'count': 1, 'min_age': 3.6139630390143735, 'max_age': 3.646817248459959, 'M': None, 'F': None, 'languages': {}, 'monolingual': None, 'multilingual': None, 'normative': None, 'non-normative': None}}
assert summary == {'recordings': {'count': 2, 'duration': 8000, 'first_date': '2020-04-20', 'last_date': '2020-04-21', 'discarded': 1, 'devices': {'usb': {'count': 2, 'duration': 8000}}}, 'children': {'count': 1, 'min_age': 3.6139630390143735, 'max_age': 3.646817248459959, 'M': None, 'F': None, 'languages': {}, 'monolingual': None, 'multilingual': None, 'normative': None, 'non-normative': None}}

0 comments on commit 0bc09a5

Please sign in to comment.