diff --git a/src/audio_splitter.py b/src/audio_splitter.py index 7ec0285..e52893b 100644 --- a/src/audio_splitter.py +++ b/src/audio_splitter.py @@ -14,36 +14,26 @@ def split( max_dur: float = 15, max_silence: float = 0.5, energy_threshold: float = 50, - expand_segments_with_noise: bool = False, noise_seconds: int = 1, noise_amplitude: int = 0, - ) -> list[tuple[str, float, float]]: - segments = split( - file_path, - min_dur=min_dur, - max_dur=max_dur, - max_silence=max_silence, - energy_threshold=energy_threshold, - ) - - if expand_segments_with_noise: - segments = [ - ( - self._expand_segment_with_noise(segment, noise_seconds, noise_amplitude), - segment.meta.start, - segment.meta.end, - ) for segment in segments - ] - - return self._semgnets_to_data(segments) - - def _expand_segment_with_noise( - self, - segment: AudioRegion, - noise_seconds: int, - noise_amplitude: int, - ) -> AudioSegment: - + ) -> list[tuple[bytes, float, float]]: + segments = [ + ( + self._expand_segment_with_noise(segment, noise_seconds, noise_amplitude), + segment.meta.start, + segment.meta.end, + ) for segment in split( + file_path, + min_dur=min_dur, + max_dur=max_dur, + max_silence=max_silence, + energy_threshold=energy_threshold, + ) + ] + + return self._segments_to_data(segments) + + def _expand_segment_with_noise(self, segment: AudioRegion, noise_seconds: int, noise_amplitude: int) -> AudioSegment: audio_segment = AudioSegment( segment._data, frame_rate=segment.sampling_rate, @@ -56,20 +46,13 @@ def _expand_segment_with_noise( return pre_noise + audio_segment + post_noise - def _semgnets_to_data( - self, - segments: list[AudioSegment | tuple[AudioSegment, float, float]], - ) -> list[tuple[bytes, float, float]]: - segment_data = [] + def _segments_to_data(self, segments: list[tuple[AudioSegment, float, float]]) -> list[tuple[bytes, float, float]]: + segments_data = [] for segment in segments: output_buffer = io.BytesIO() - if isinstance(segment, tuple): - segment[0].export(output_buffer, format='mp3') - segment_data.append((output_buffer.getvalue(), segment[1], segment[2])) - else: - segment.export(output_buffer, format='mp3') - segment_data.append((output_buffer.getvalue(), segment.meta.start, segment.meta.end)) + segment[0].export(output_buffer, format='mp3') + segments_data.append((output_buffer.getvalue(), segment[1], segment[2])) - return segment_data + return segments_data diff --git a/src/cli.py b/src/cli.py index dd177b2..f7b9261 100644 --- a/src/cli.py +++ b/src/cli.py @@ -22,7 +22,6 @@ import requests from .recognizers.wit_recognizer import WitRecognizer - from .utils.wit import file_utils as wit_file_utils except ModuleNotFoundError: pass @@ -150,8 +149,7 @@ def process_local( file_path = str(file['file_path'].absolute()) if config.use_wit(): - mp3_file_path = str(wit_file_utils.convert_to_mp3(file['file_path']).absolute()) - recognize_generator = WitRecognizer(verbose=config.input.verbose).recognize(mp3_file_path, config.wit) + recognize_generator = WitRecognizer(verbose=config.input.verbose).recognize(file_path, config.wit) else: recognize_generator = WhisperRecognizer(verbose=config.input.verbose).recognize( file_path, @@ -167,9 +165,6 @@ def process_local( segments: list[SegmentType] = exception.value break - if config.use_wit() and file['file_path'].suffix != '.mp3': - Path(mp3_file_path).unlink(missing_ok=True) - writer.write_all(Path(file['file_name']).stem, segments, config.output) for segment in segments: @@ -212,13 +207,13 @@ def process_url( yield new_progress_info, [] writer = Writer() - if config.input.skip_if_output_exist and writer.is_output_exist(element['id'], config.output): + if config.input.skip_if_output_exist and writer.is_output_exist(element['id'], element['audio_ext']): new_progress_info['inner_status'] = 'completed' yield new_progress_info, [] continue - file_path = os.path.join(config.output.output_dir, f"{element['id']}.mp3") + file_path = os.path.join(config.output.output_dir, f"{element['id']}.{element['audio_ext']}") if config.use_wit(): recognize_generator = WitRecognizer(verbose=config.input.verbose).recognize(file_path, config.wit) diff --git a/src/downloader.py b/src/downloader.py index 1f4839c..31d45cf 100644 --- a/src/downloader.py +++ b/src/downloader.py @@ -23,12 +23,6 @@ def _config(self, download_archive: str | bool) -> dict[str, Any]: 'ignoreerrors': True, 'download_archive': download_archive, 'playlist_items': self.playlist_items, - 'postprocessors': [ - { - 'key': 'FFmpegExtractAudio', - 'preferredcodec': 'mp3', - }, - ], } def download(self, url: str, save_response: bool = False) -> dict[str, Any]: diff --git a/src/recognizers/wit_recognizer.py b/src/recognizers/wit_recognizer.py index 21f7f8d..d35965f 100644 --- a/src/recognizers/wit_recognizer.py +++ b/src/recognizers/wit_recognizer.py @@ -33,11 +33,7 @@ def recognize( file_path: str, wit_config: Config.Wit, ) -> Generator[dict[str, float], None, list[SegmentType]]: - segments = AudioSplitter().split( - file_path, - max_dur=wit_config.max_cutting_duration, - expand_segments_with_noise=True, - ) + segments = AudioSplitter().split(file_path, max_dur=wit_config.max_cutting_duration) retry_strategy = Retry( total=5, diff --git a/src/utils/wit/__init__.py b/src/utils/wit/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/src/utils/wit/file_utils.py b/src/utils/wit/file_utils.py deleted file mode 100644 index 88f9ae6..0000000 --- a/src/utils/wit/file_utils.py +++ /dev/null @@ -1,10 +0,0 @@ -from pathlib import Path - -from pydub import AudioSegment - - -def convert_to_mp3(file: Path) -> Path: - audio_file = AudioSegment.from_file(str(file)) - converted_file_path = file.with_suffix('.mp3') - audio_file.export(str(converted_file_path), format='mp3') - return converted_file_path