Skip to content

Commit

Permalink
Use custom splitting logic for large files (again)
Browse files Browse the repository at this point in the history
  • Loading branch information
AliOsm committed Jun 28, 2024
1 parent 7334bca commit bdb59b0
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 38 deletions.
87 changes: 66 additions & 21 deletions tafrigh/audio_splitter.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
import io
import os
import subprocess

from concurrent.futures import ThreadPoolExecutor

from auditok import AudioRegion
from auditok.core import split
from pydub import AudioSegment
from pydub.exceptions import CouldntDecodeError
from pydub.generators import WhiteNoise
from pydub.utils import mediainfo


MAX_FILE_DURATION = 4 * 60 * 60
LARGE_FILE_DURATION = 3 * 60 * 60
MAX_FILE_DURATION = 1 * 60 * 60


class AudioSplitter:
Expand All @@ -23,9 +25,25 @@ def split(
energy_threshold: float = 50,
noise_seconds: int = 1,
noise_amplitude: int = 0,
from_split_large_file: bool = False,
) -> list[tuple[bytes, float, float]]:
try:
segments = [
file_info = mediainfo(file_path)

if (
not from_split_large_file
and ('duration' not in file_info or float(file_info['duration']) > LARGE_FILE_DURATION)
):
return self._split_large_file(
file_path,
min_dur,
max_dur,
max_silence,
energy_threshold,
noise_seconds,
noise_amplitude,
)
else:
return self._segments_to_data([
(
self._expand_segment_with_noise(segment, noise_seconds, noise_amplitude),
segment.meta.start,
Expand All @@ -36,26 +54,53 @@ def split(
max_dur=max_dur,
max_silence=max_silence,
energy_threshold=energy_threshold,
large_file=float(mediainfo(file_path)['duration']) > MAX_FILE_DURATION,
)
]
except CouldntDecodeError:
segments = [
(
self._expand_segment_with_noise(segment, noise_seconds, noise_amplitude),
segment.meta.start,
segment.meta.end,
) for segment in split(
file_path,
min_dur=min_dur,
max_dur=max_dur,
max_silence=max_silence,
energy_threshold=energy_threshold,
large_file=True,
])

def _split_large_file(
self,
file_path: str,
min_dur: float,
max_dur: float,
max_silence: float,
energy_threshold: float,
noise_seconds: int,
noise_amplitude: int,
) -> list[tuple[bytes, float, float]]:
duration = float(mediainfo(file_path)['duration'])

segments = []

base_name, ext = os.path.splitext(file_path)
output_file = f"{base_name}_part{ext}"

for i in range(0, int(duration), MAX_FILE_DURATION):
start_time = i
end_time = min(i + MAX_FILE_DURATION, duration)

with open(os.devnull, 'w') as devnull:
subprocess.run(
['ffmpeg', '-y', '-i', file_path, '-ss', str(start_time), '-to', str(end_time), '-c', 'copy', output_file],
stdout=devnull,
stderr=devnull,
)
]

return self._segments_to_data(segments)
part_segments = self.split(
output_file,
min_dur,
max_dur,
max_silence,
energy_threshold,
noise_seconds,
noise_amplitude,
True,
)

segments.extend([(segment[0], segment[1] + start_time, segment[2] + start_time) for segment in part_segments])

os.remove(output_file)

return segments

def _expand_segment_with_noise(self, segment: AudioRegion, noise_seconds: int, noise_amplitude: int) -> AudioSegment:
audio_segment = AudioSegment(
Expand Down
9 changes: 2 additions & 7 deletions tafrigh/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import requests

from .recognizers.wit_recognizer import WitRecognizer
from .utils.wit import file_utils as wit_file_utils
except ModuleNotFoundError:
pass

Expand Down Expand Up @@ -150,8 +149,7 @@ def process_local(
file_path = str(file['file_path'].absolute())

if config.use_wit():
wav_file_path = str(wit_file_utils.convert_to_wav(file['file_path']).absolute())
recognize_generator = WitRecognizer(verbose=config.input.verbose).recognize(wav_file_path, config.wit)
recognize_generator = WitRecognizer(verbose=config.input.verbose).recognize(file_path, config.wit)
else:
recognize_generator = WhisperRecognizer(verbose=config.input.verbose).recognize(
file_path,
Expand All @@ -167,9 +165,6 @@ def process_local(
segments: list[SegmentType] = exception.value
break

if config.use_wit() and file['file_path'].suffix != '.wav':
Path(wav_file_path).unlink(missing_ok=True)

writer.write_all(Path(file['file_name']).stem, segments, config.output)

for segment in segments:
Expand Down Expand Up @@ -218,7 +213,7 @@ def process_url(

continue

file_path = os.path.join(config.output.output_dir, f"{element['id']}.wav")
file_path = os.path.join(config.output.output_dir, f"{element['id']}.{element['ext']}")

if config.use_wit():
recognize_generator = WitRecognizer(verbose=config.input.verbose).recognize(file_path, config.wit)
Expand Down
Empty file removed tafrigh/utils/wit/__init__.py
Empty file.
10 changes: 0 additions & 10 deletions tafrigh/utils/wit/file_utils.py

This file was deleted.

0 comments on commit bdb59b0

Please sign in to comment.