Skip to content

Commit

Permalink
Enhance AudioSpltiter to handle files larger than 4GB
Browse files Browse the repository at this point in the history
  • Loading branch information
AliOsm committed Jun 27, 2024
1 parent 20fe210 commit cb87cda
Showing 1 changed file with 74 additions and 11 deletions.
85 changes: 74 additions & 11 deletions src/audio_splitter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
import io
import os
import subprocess

from auditok import AudioRegion
from auditok.core import split
from pydub import AudioSegment
from pydub.generators import WhiteNoise
from pydub.utils import mediainfo


MAX_FILE_DURATION = 4 * 60 * 60


class AudioSplitter:
Expand All @@ -17,22 +23,79 @@ def split(
noise_seconds: int = 1,
noise_amplitude: int = 0,
) -> list[tuple[bytes, float, float]]:
segments = [
(
self._expand_segment_with_noise(segment, noise_seconds, noise_amplitude),
segment.meta.start,
segment.meta.end,
) for segment in split(
duration = float(mediainfo(file_path)['duration'])

if int(duration) > MAX_FILE_DURATION:
return self._split_large_file(
file_path,
min_dur=min_dur,
max_dur=max_dur,
max_silence=max_silence,
energy_threshold=energy_threshold,
min_dur,
max_dur,
max_silence,
energy_threshold,
noise_seconds,
noise_amplitude,
duration,
)
]
else:
segments = [
(
self._expand_segment_with_noise(segment, noise_seconds, noise_amplitude),
segment.meta.start,
segment.meta.end,
) for segment in split(
file_path,
min_dur=min_dur,
max_dur=max_dur,
max_silence=max_silence,
energy_threshold=energy_threshold,
)
]

return self._segments_to_data(segments)

def _split_large_file(
self,
file_path: str,
min_dur: float,
max_dur: float,
max_silence: float,
energy_threshold: float,
noise_seconds: int,
noise_amplitude: int,
duration: float,
) -> list[tuple[bytes, float, float]]:
segments = []

base_name, ext = os.path.splitext(file_path)
output_file = f"{base_name}_part{ext}"

for i in range(0, int(duration), MAX_FILE_DURATION):
start_time = i
end_time = min(i + MAX_FILE_DURATION, duration)

with open(os.devnull, 'w') as devnull:
subprocess.run(
['ffmpeg', '-y', '-i', file_path, '-ss', str(start_time), '-to', str(end_time), '-c', 'copy', output_file],
stdout=devnull,
stderr=devnull,
)

part_segments = self.split(
output_file,
min_dur,
max_dur,
max_silence,
energy_threshold,
noise_seconds,
noise_amplitude,
)

segments.extend([(segment[0], segment[1] + start_time, segment[2] + start_time) for segment in part_segments])

os.remove(output_file)

return segments

def _expand_segment_with_noise(self, segment: AudioRegion, noise_seconds: int, noise_amplitude: int) -> AudioSegment:
audio_segment = AudioSegment(
segment._data,
Expand Down

0 comments on commit cb87cda

Please sign in to comment.