Skip to content

Commit

Permalink
Remove the need for mp3 intermediate conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
AliOsm committed Jun 27, 2024
1 parent bb319d7 commit 20fe210
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 69 deletions.
63 changes: 23 additions & 40 deletions src/audio_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,36 +14,26 @@ def split(
max_dur: float = 15,
max_silence: float = 0.5,
energy_threshold: float = 50,
expand_segments_with_noise: bool = False,
noise_seconds: int = 1,
noise_amplitude: int = 0,
) -> list[tuple[str, float, float]]:
segments = split(
file_path,
min_dur=min_dur,
max_dur=max_dur,
max_silence=max_silence,
energy_threshold=energy_threshold,
)

if expand_segments_with_noise:
segments = [
(
self._expand_segment_with_noise(segment, noise_seconds, noise_amplitude),
segment.meta.start,
segment.meta.end,
) for segment in segments
]

return self._semgnets_to_data(segments)

def _expand_segment_with_noise(
self,
segment: AudioRegion,
noise_seconds: int,
noise_amplitude: int,
) -> AudioSegment:

) -> list[tuple[bytes, float, float]]:
segments = [
(
self._expand_segment_with_noise(segment, noise_seconds, noise_amplitude),
segment.meta.start,
segment.meta.end,
) for segment in split(
file_path,
min_dur=min_dur,
max_dur=max_dur,
max_silence=max_silence,
energy_threshold=energy_threshold,
)
]

return self._segments_to_data(segments)

def _expand_segment_with_noise(self, segment: AudioRegion, noise_seconds: int, noise_amplitude: int) -> AudioSegment:
audio_segment = AudioSegment(
segment._data,
frame_rate=segment.sampling_rate,
Expand All @@ -56,20 +46,13 @@ def _expand_segment_with_noise(

return pre_noise + audio_segment + post_noise

def _semgnets_to_data(
self,
segments: list[AudioSegment | tuple[AudioSegment, float, float]],
) -> list[tuple[bytes, float, float]]:
segment_data = []
def _segments_to_data(self, segments: list[tuple[AudioSegment, float, float]]) -> list[tuple[bytes, float, float]]:
segments_data = []

for segment in segments:
output_buffer = io.BytesIO()

if isinstance(segment, tuple):
segment[0].export(output_buffer, format='mp3')
segment_data.append((output_buffer.getvalue(), segment[1], segment[2]))
else:
segment.export(output_buffer, format='mp3')
segment_data.append((output_buffer.getvalue(), segment.meta.start, segment.meta.end))
segment[0].export(output_buffer, format='mp3')
segments_data.append((output_buffer.getvalue(), segment[1], segment[2]))

return segment_data
return segments_data
11 changes: 3 additions & 8 deletions src/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import requests

from .recognizers.wit_recognizer import WitRecognizer
from .utils.wit import file_utils as wit_file_utils
except ModuleNotFoundError:
pass

Expand Down Expand Up @@ -150,8 +149,7 @@ def process_local(
file_path = str(file['file_path'].absolute())

if config.use_wit():
mp3_file_path = str(wit_file_utils.convert_to_mp3(file['file_path']).absolute())
recognize_generator = WitRecognizer(verbose=config.input.verbose).recognize(mp3_file_path, config.wit)
recognize_generator = WitRecognizer(verbose=config.input.verbose).recognize(file_path, config.wit)
else:
recognize_generator = WhisperRecognizer(verbose=config.input.verbose).recognize(
file_path,
Expand All @@ -167,9 +165,6 @@ def process_local(
segments: list[SegmentType] = exception.value
break

if config.use_wit() and file['file_path'].suffix != '.mp3':
Path(mp3_file_path).unlink(missing_ok=True)

writer.write_all(Path(file['file_name']).stem, segments, config.output)

for segment in segments:
Expand Down Expand Up @@ -212,13 +207,13 @@ def process_url(
yield new_progress_info, []

writer = Writer()
if config.input.skip_if_output_exist and writer.is_output_exist(element['id'], config.output):
if config.input.skip_if_output_exist and writer.is_output_exist(element['id'], element['audio_ext']):
new_progress_info['inner_status'] = 'completed'
yield new_progress_info, []

continue

file_path = os.path.join(config.output.output_dir, f"{element['id']}.mp3")
file_path = os.path.join(config.output.output_dir, f"{element['id']}.{element['audio_ext']}")

if config.use_wit():
recognize_generator = WitRecognizer(verbose=config.input.verbose).recognize(file_path, config.wit)
Expand Down
6 changes: 0 additions & 6 deletions src/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,6 @@ def _config(self, download_archive: str | bool) -> dict[str, Any]:
'ignoreerrors': True,
'download_archive': download_archive,
'playlist_items': self.playlist_items,
'postprocessors': [
{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
},
],
}

def download(self, url: str, save_response: bool = False) -> dict[str, Any]:
Expand Down
6 changes: 1 addition & 5 deletions src/recognizers/wit_recognizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,7 @@ def recognize(
file_path: str,
wit_config: Config.Wit,
) -> Generator[dict[str, float], None, list[SegmentType]]:
segments = AudioSplitter().split(
file_path,
max_dur=wit_config.max_cutting_duration,
expand_segments_with_noise=True,
)
segments = AudioSplitter().split(file_path, max_dur=wit_config.max_cutting_duration)

retry_strategy = Retry(
total=5,
Expand Down
Empty file removed src/utils/wit/__init__.py
Empty file.
10 changes: 0 additions & 10 deletions src/utils/wit/file_utils.py

This file was deleted.

0 comments on commit 20fe210

Please sign in to comment.