From c9a4606cc20cbe87fee4f36d4fb29b718daf8c35 Mon Sep 17 00:00:00 2001 From: Benjamin Cates <34255563+benjamin-cates@users.noreply.github.com> Date: Thu, 11 Jul 2024 11:10:29 -0700 Subject: [PATCH] Audio splitter fixes (#154) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Audio splitter hotfixes * Put file splitting back * Fix audio splitting * Line too long teehee * Pyright 🥰 --- .../chunking_methods/audio_splitter.py | 40 ++++++++++++++----- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/pyha_analyzer/chunking_methods/audio_splitter.py b/pyha_analyzer/chunking_methods/audio_splitter.py index 596551b..d9ca710 100644 --- a/pyha_analyzer/chunking_methods/audio_splitter.py +++ b/pyha_analyzer/chunking_methods/audio_splitter.py @@ -1,13 +1,14 @@ """ Splits longer audio files into smaller ones """ import os +from typing import List import pandas as pd import torch import torchaudio from tqdm import tqdm CONFIG = { - "metadata_csv": "annotations_chunked.csv", + "metadata_csv": "annotations.csv", "metadata_output": "annotations_split.csv", "audio_dir": "input", @@ -16,10 +17,10 @@ "output_format": "flac", # Supports torch audio formats "chunk_length_s": 60 * 5, # Length of each clip in seconds - "overlap_s": 10, # Overlap to add to each file in seconds "file_name_col": "FILE NAME", "offset_col": "OFFSET", + "duration_col": "DURATION" } @@ -44,27 +45,46 @@ def split_audio_file(path: str): for i in range(num_splits): # Create slice - aud_slice = audio[i*split_len*sample_rate:((i+1)*split_len+CONFIG["overlap_s"])*sample_rate] + aud_slice = audio[i*split_len*sample_rate:(i+1)*split_len*sample_rate] torchaudio.save(os.path.join(CONFIG["output_dir"], # type: ignore output_file_name(path,i,CONFIG["output_format"])), torch.unsqueeze(aud_slice,0), sample_rate) -def edit_row(row: pd.Series) -> pd.Series: +def edit_row(row: pd.Series) -> List[pd.Series]: """ Edits a row of the metadata csv to reflect the new audio files Changes file name and offset """ - offset = row[CONFIG["offset_col"]] - file_index = int(offset/CONFIG["chunk_length_s"]) + chunk_len = CONFIG["chunk_length_s"] + file_index = int(row[CONFIG["offset_col"]]/chunk_len) # Update file name + cur_file_name = str(row[CONFIG["file_name_col"]]) row[CONFIG["file_name_col"]] = \ - output_file_name(str(row[CONFIG["file_name_col"]]), file_index, CONFIG["output_format"]) + output_file_name(cur_file_name, file_index, CONFIG["output_format"]) # Shift offset - row[CONFIG["offset_col"]] -= file_index * CONFIG["chunk_length_s"] - return row + end = row[CONFIG["offset_col"]] + row[CONFIG["duration_col"]] + row[CONFIG["offset_col"]] -= file_index * chunk_len + row[CONFIG["duration_col"]] = \ + min(chunk_len - row[CONFIG["offset_col"]], row[CONFIG["duration_col"]]) # type: ignore + row["CLIP LENGTH"] = chunk_len + out = [row] + while end > (file_index+1) * chunk_len: + file_index += 1 + row_cpy = row.copy() + row_cpy[CONFIG["file_name_col"]] = \ + output_file_name(cur_file_name, file_index, CONFIG["output_format"]) + row_cpy[CONFIG["offset_col"]] = 0 + row_cpy[CONFIG["duration_col"]] = min(chunk_len, end - file_index * chunk_len) + out.append(row_cpy) + return out def edit_metadata(df: pd.DataFrame): """ Edits metadata to reflect the new audio files """ - return df.apply(edit_row, axis=1) + out_list = [] + for _, row in df.iterrows(): + out_list.extend(edit_row(row)) + out = pd.DataFrame(out_list) + out.reset_index(drop=True,inplace=True) + return out def split_all(input_dir: str): """ Splits all audio files in the input directory """