Skip to content

Commit

Permalink
Remove custom large files splitting logic and utilize large_file opti…
Browse files Browse the repository at this point in the history
…on from auditok
  • Loading branch information
AliOsm committed Jun 27, 2024
1 parent cb70e08 commit 45a2503
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 64 deletions.
81 changes: 19 additions & 62 deletions src/audio_splitter.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,16 @@
import io
import os
import subprocess

from concurrent.futures import ThreadPoolExecutor

from auditok import AudioRegion
from auditok.core import split
from pydub import AudioSegment
from pydub.exceptions import CouldntDecodeError
from pydub.generators import WhiteNoise
from pydub.utils import mediainfo


MAX_FILE_SIZE = 4 * 1024 * 1024 * 1024
MAX_FILE_DURATION = 1 * 60 * 60
MAX_FILE_DURATION = 4 * 60 * 60


class AudioSplitter:
Expand All @@ -26,20 +24,7 @@ def split(
noise_seconds: int = 1,
noise_amplitude: int = 0,
) -> list[tuple[bytes, float, float]]:
file_info = mediainfo(file_path)
file_size = float(file_info['duration']) * int(file_info['sample_rate']) * int(file_info['channels']) * 16 / 8

if file_size > MAX_FILE_SIZE:
return self._split_large_file(
file_path,
min_dur,
max_dur,
max_silence,
energy_threshold,
noise_seconds,
noise_amplitude,
)
else:
try:
segments = [
(
self._expand_segment_with_noise(segment, noise_seconds, noise_amplitude),
Expand All @@ -51,54 +36,26 @@ def split(
max_dur=max_dur,
max_silence=max_silence,
energy_threshold=energy_threshold,
large_file=float(mediainfo(file_path)['duration']) > MAX_FILE_DURATION,
)
]

return self._segments_to_data(segments)

def _split_large_file(
self,
file_path: str,
min_dur: float,
max_dur: float,
max_silence: float,
energy_threshold: float,
noise_seconds: int,
noise_amplitude: int,
) -> list[tuple[bytes, float, float]]:
duration = float(mediainfo(file_path)['duration'])

segments = []

base_name, ext = os.path.splitext(file_path)
output_file = f"{base_name}_part{ext}"

for i in range(0, int(duration), MAX_FILE_DURATION):
start_time = i
end_time = min(i + MAX_FILE_DURATION, duration)

with open(os.devnull, 'w') as devnull:
subprocess.run(
['ffmpeg', '-y', '-i', file_path, '-ss', str(start_time), '-to', str(end_time), '-c', 'copy', output_file],
stdout=devnull,
stderr=devnull,
except CouldntDecodeError:
segments = [
(
self._expand_segment_with_noise(segment, noise_seconds, noise_amplitude),
segment.meta.start,
segment.meta.end,
) for segment in split(
file_path,
min_dur=min_dur,
max_dur=max_dur,
max_silence=max_silence,
energy_threshold=energy_threshold,
large_file=True,
)
]

part_segments = self.split(
output_file,
min_dur,
max_dur,
max_silence,
energy_threshold,
noise_seconds,
noise_amplitude,
)

segments.extend([(segment[0], segment[1] + start_time, segment[2] + start_time) for segment in part_segments])

os.remove(output_file)

return segments
return self._segments_to_data(segments)

def _expand_segment_with_noise(self, segment: AudioRegion, noise_seconds: int, noise_amplitude: int) -> AudioSegment:
audio_segment = AudioSegment(
Expand Down
9 changes: 7 additions & 2 deletions src/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import requests

from .recognizers.wit_recognizer import WitRecognizer
from .utils.wit import file_utils as wit_file_utils
except ModuleNotFoundError:
pass

Expand Down Expand Up @@ -149,7 +150,8 @@ def process_local(
file_path = str(file['file_path'].absolute())

if config.use_wit():
recognize_generator = WitRecognizer(verbose=config.input.verbose).recognize(file_path, config.wit)
wav_file_path = str(wit_file_utils.convert_to_wav(file['file_path']).absolute())
recognize_generator = WitRecognizer(verbose=config.input.verbose).recognize(wav_file_path, config.wit)
else:
recognize_generator = WhisperRecognizer(verbose=config.input.verbose).recognize(
file_path,
Expand All @@ -165,6 +167,9 @@ def process_local(
segments: list[SegmentType] = exception.value
break

if config.use_wit() and file['file_path'].suffix != '.wav':
Path(wav_file_path).unlink(missing_ok=True)

writer.write_all(Path(file['file_name']).stem, segments, config.output)

for segment in segments:
Expand Down Expand Up @@ -213,7 +218,7 @@ def process_url(

continue

file_path = os.path.join(config.output.output_dir, f"{element['id']}.{element['audio_ext']}")
file_path = os.path.join(config.output.output_dir, f"{element['id']}.wav")

if config.use_wit():
recognize_generator = WitRecognizer(verbose=config.input.verbose).recognize(file_path, config.wit)
Expand Down
6 changes: 6 additions & 0 deletions src/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ def _config(self, download_archive: str | bool) -> dict[str, Any]:
'ignoreerrors': True,
'download_archive': download_archive,
'playlist_items': self.playlist_items,
'postprocessors': [
{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'wav',
},
],
}

def download(self, url: str, save_response: bool = False) -> dict[str, Any]:
Expand Down
Empty file added src/utils/wit/__init__.py
Empty file.
10 changes: 10 additions & 0 deletions src/utils/wit/file_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from pathlib import Path

from pydub import AudioSegment


def convert_to_wav(file: Path) -> Path:
audio_file = AudioSegment.from_file(str(file))
converted_file_path = file.with_suffix('.wav')
audio_file.export(str(converted_file_path), format='wav')
return converted_file_path

0 comments on commit 45a2503

Please sign in to comment.