From f7b6feeccd5d1813d38283114a007c9224ebc665 Mon Sep 17 00:00:00 2001 From: diogosena Date: Tue, 26 Nov 2024 12:18:15 -0300 Subject: [PATCH] Extract all subtitles in a single pass of ffmpeg Extract all subtitles in a single pass of ffmpeg, and store the subtitles in Subs folder, next to videofile. --- ffsubsync/speech_transformers.py | 94 +++++++++++++++++--------------- 1 file changed, 50 insertions(+), 44 deletions(-) diff --git a/ffsubsync/speech_transformers.py b/ffsubsync/speech_transformers.py index 38883d1..d1f7e4a 100644 --- a/ffsubsync/speech_transformers.py +++ b/ffsubsync/speech_transformers.py @@ -253,51 +253,59 @@ def __init__( def try_fit_using_embedded_subs(self, fname: str) -> None: embedded_subs = [] embedded_subs_times = [] - if self.ref_stream is None: - # check first 5; should cover 99% of movies - streams_to_try: List[str] = list(map("0:s:{}".format, range(5))) + ffmpeg_base_args = [ + ffmpeg_bin_path("ffmpeg", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path), + "-loglevel", "fatal", "-nostdin", "-y", "-i", fname + ] + video_dir = os.path.dirname(fname) + subs_dir = os.path.join(video_dir, "Subs") + os.makedirs(subs_dir, exist_ok=True) + subtitles_files = [] + if self.ref_stream: + subtitle_filename = os.path.join(subs_dir, f"{os.path.splitext(os.path.basename(fname))[0]}.ref.srt") + subtitles_files.append(subtitle_filename) + ffmpeg_base_args.extend(["-map", str(self.ref_stream), "-f", "srt", subtitle_filename]) else: - streams_to_try = [self.ref_stream] - for stream in streams_to_try: - ffmpeg_args = [ - ffmpeg_bin_path( - "ffmpeg", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path - ) + ffprobe_args = [ + ffmpeg_bin_path("ffprobe", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path), + "-v", "error", "-select_streams", "s", "-show_entries", "stream=index:stream_tags=language,title", + "-of", "csv=p=0", fname ] - ffmpeg_args.extend( - [ - "-loglevel", - "fatal", - "-nostdin", - "-i", - fname, - "-map", - "{}".format(stream), - "-f", - "srt", - "-", - ] - ) - process = subprocess.Popen( - ffmpeg_args, **subprocess_args(include_stdout=True) - ) - output = io.BytesIO(process.communicate()[0]) - if process.returncode != 0: - break - pipe = cast( - Pipeline, - make_subtitle_speech_pipeline(start_seconds=self.start_seconds), - ).fit(output) - speech_step = pipe.steps[-1][1] - embedded_subs.append(speech_step) - embedded_subs_times.append(speech_step.max_time_) - if len(embedded_subs) == 0: - if self.ref_stream is None: - error_msg = "Video file appears to lack subtitle stream" - else: - error_msg = "Stream {} not found".format(self.ref_stream) + logger.info(f"Running ffprobe command: {' '.join(ffprobe_args)}") + process = subprocess.Popen(ffprobe_args, **subprocess_args(include_stdout=True)) + output = process.communicate()[0].decode("utf-8").strip().splitlines() + if process.returncode != 0 or not output: + raise ValueError("Failed to detect subtitle streams in the video file") + for line in output: + parts = line.split(",") + if len(parts) < 2: + logger.warning(f"Unexpected ffprobe output: {line}") + continue + stream_index, language_code = parts[:2] + language_title = parts[2] if len(parts) > 2 else None + subtitle_filename = os.path.join( + subs_dir, + f"{os.path.splitext(os.path.basename(fname))[0]}.{language_code}" + + (f".{language_title}" if language_title else "") + + ".srt" + ) + subtitles_files.append(subtitle_filename) + ffmpeg_base_args.extend(["-map", f"0:{stream_index}", "-f", "srt", subtitle_filename]) + logger.info(f"Running ffmpeg command: {' '.join(ffmpeg_base_args)}") + process = subprocess.Popen(ffmpeg_base_args, **subprocess_args(include_stdout=True)) + process.communicate() + if process.returncode != 0: + raise ValueError("Failed to extract subtitles from the video file") + for subtitle_file in subtitles_files: + with open(subtitle_file, 'rb') as file: + pipe = cast(Pipeline, make_subtitle_speech_pipeline(start_seconds=self.start_seconds)) + pipe_output = pipe.fit(file) + for speech_step in pipe_output.steps[-1:]: + embedded_subs.append(speech_step[1]) + embedded_subs_times.append(speech_step[1].max_time_) + if not embedded_subs: + error_msg = "Video file appears to lack subtitle stream" if self.ref_stream is None else f"Stream {self.ref_stream} not found" raise ValueError(error_msg) - # use longest set of embedded subs subs_to_use = embedded_subs[int(np.argmax(embedded_subs_times))] self.video_speech_results_ = subs_to_use.subtitle_speech_results_ @@ -367,8 +375,6 @@ def fit(self, fname: str, *_) -> "VideoSpeechTransformer": "1", "-acodec", "pcm_s16le", - "-af", - "aresample=async=1", "-ar", str(self.frame_rate), "-",