Skip to content

Commit

Permalink
retain order or recs when writing recordings dataframe
Browse files Browse the repository at this point in the history
  • Loading branch information
LoannPeurey committed Feb 11, 2025
1 parent a440fe1 commit 990b44b
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 20 deletions.
2 changes: 1 addition & 1 deletion ChildProject/cmdline.py
Original file line number Diff line number Diff line change
Expand Up @@ -731,7 +731,7 @@ def compute_durations(args):
how="left",
left_on="recording_filename",
right_on="recording_filename",
).set_index('index')
).set_index('index').sort_index()
recordings["duration"].fillna(0, inplace=True)
recordings["duration"] = recordings["duration"].astype("Int64")

Expand Down
21 changes: 4 additions & 17 deletions ChildProject/projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,27 +480,14 @@ def write_recordings(self, keep_discarded: bool = True, keep_original_columns: b
if self.recordings is None:
#logger to add (can not write recordings file as recordings is not initialized)
return None
#get the file as reference point
current_csv = pd.read_csv(self.path / METADATA_FOLDER /RECORDINGS_CSV)

if 'discard' in current_csv.columns and keep_discarded:
# put the discard column into a usable form
current_csv['discard'] = current_csv['discard'].apply(np.nan_to_num).astype(int, errors='ignore')
# keep the discarded lines somewhere
discarded_recs = current_csv[current_csv['discard'].astype(str) == "1"]

recs_to_write = pd.concat([self.recordings, discarded_recs])
if keep_discarded:
recs_to_write = pd.concat([self.recordings, self.discarded_recordings])
recs_to_write = recs_to_write.astype(self.recordings.dtypes.to_dict())
else:
recs_to_write = self.recordings

if keep_original_columns:
columns = current_csv.columns
for new in self.recordings.columns:
if new not in columns:
columns = columns.append(pd.Index([new]))
else:
columns = self.recordings.columns

columns = self.recordings.columns

recs_to_write.sort_index().to_csv(self.path / METADATA_FOLDER / RECORDINGS_CSV,columns = columns,index=False)
return recs_to_write
Expand Down
4 changes: 2 additions & 2 deletions ChildProject/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,8 +176,8 @@ def get_audio_duration(filename: Path):

duration = 0
try:
duration = info(filename).duration
except:
duration = info(str(filename)).duration
except Exception as e:
print('Warning: could not read duration for {}, setting duration to 0'.format(filename))
pass

Expand Down

0 comments on commit 990b44b

Please sign in to comment.