Skip to content

Commit

Permalink
Extract all subtitles in a single pass of ffmpeg
Browse files Browse the repository at this point in the history
Extract all subtitles in a single pass of ffmpeg, and store the subtitles in Subs folder, next to videofile.
  • Loading branch information
diogosena authored Nov 26, 2024
1 parent bafd232 commit f7b6fee
Showing 1 changed file with 50 additions and 44 deletions.
94 changes: 50 additions & 44 deletions ffsubsync/speech_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,51 +253,59 @@ def __init__(
def try_fit_using_embedded_subs(self, fname: str) -> None:
embedded_subs = []
embedded_subs_times = []
if self.ref_stream is None:
# check first 5; should cover 99% of movies
streams_to_try: List[str] = list(map("0:s:{}".format, range(5)))
ffmpeg_base_args = [
ffmpeg_bin_path("ffmpeg", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path),
"-loglevel", "fatal", "-nostdin", "-y", "-i", fname
]
video_dir = os.path.dirname(fname)
subs_dir = os.path.join(video_dir, "Subs")
os.makedirs(subs_dir, exist_ok=True)
subtitles_files = []
if self.ref_stream:
subtitle_filename = os.path.join(subs_dir, f"{os.path.splitext(os.path.basename(fname))[0]}.ref.srt")
subtitles_files.append(subtitle_filename)
ffmpeg_base_args.extend(["-map", str(self.ref_stream), "-f", "srt", subtitle_filename])
else:
streams_to_try = [self.ref_stream]
for stream in streams_to_try:
ffmpeg_args = [
ffmpeg_bin_path(
"ffmpeg", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path
)
ffprobe_args = [
ffmpeg_bin_path("ffprobe", self.gui_mode, ffmpeg_resources_path=self.ffmpeg_path),
"-v", "error", "-select_streams", "s", "-show_entries", "stream=index:stream_tags=language,title",
"-of", "csv=p=0", fname
]
ffmpeg_args.extend(
[
"-loglevel",
"fatal",
"-nostdin",
"-i",
fname,
"-map",
"{}".format(stream),
"-f",
"srt",
"-",
]
)
process = subprocess.Popen(
ffmpeg_args, **subprocess_args(include_stdout=True)
)
output = io.BytesIO(process.communicate()[0])
if process.returncode != 0:
break
pipe = cast(
Pipeline,
make_subtitle_speech_pipeline(start_seconds=self.start_seconds),
).fit(output)
speech_step = pipe.steps[-1][1]
embedded_subs.append(speech_step)
embedded_subs_times.append(speech_step.max_time_)
if len(embedded_subs) == 0:
if self.ref_stream is None:
error_msg = "Video file appears to lack subtitle stream"
else:
error_msg = "Stream {} not found".format(self.ref_stream)
logger.info(f"Running ffprobe command: {' '.join(ffprobe_args)}")
process = subprocess.Popen(ffprobe_args, **subprocess_args(include_stdout=True))
output = process.communicate()[0].decode("utf-8").strip().splitlines()
if process.returncode != 0 or not output:
raise ValueError("Failed to detect subtitle streams in the video file")
for line in output:
parts = line.split(",")
if len(parts) < 2:
logger.warning(f"Unexpected ffprobe output: {line}")
continue
stream_index, language_code = parts[:2]
language_title = parts[2] if len(parts) > 2 else None
subtitle_filename = os.path.join(
subs_dir,
f"{os.path.splitext(os.path.basename(fname))[0]}.{language_code}"
+ (f".{language_title}" if language_title else "")
+ ".srt"
)
subtitles_files.append(subtitle_filename)
ffmpeg_base_args.extend(["-map", f"0:{stream_index}", "-f", "srt", subtitle_filename])
logger.info(f"Running ffmpeg command: {' '.join(ffmpeg_base_args)}")
process = subprocess.Popen(ffmpeg_base_args, **subprocess_args(include_stdout=True))
process.communicate()
if process.returncode != 0:
raise ValueError("Failed to extract subtitles from the video file")
for subtitle_file in subtitles_files:
with open(subtitle_file, 'rb') as file:
pipe = cast(Pipeline, make_subtitle_speech_pipeline(start_seconds=self.start_seconds))
pipe_output = pipe.fit(file)
for speech_step in pipe_output.steps[-1:]:
embedded_subs.append(speech_step[1])
embedded_subs_times.append(speech_step[1].max_time_)
if not embedded_subs:
error_msg = "Video file appears to lack subtitle stream" if self.ref_stream is None else f"Stream {self.ref_stream} not found"
raise ValueError(error_msg)
# use longest set of embedded subs
subs_to_use = embedded_subs[int(np.argmax(embedded_subs_times))]
self.video_speech_results_ = subs_to_use.subtitle_speech_results_

Expand Down Expand Up @@ -367,8 +375,6 @@ def fit(self, fname: str, *_) -> "VideoSpeechTransformer":
"1",
"-acodec",
"pcm_s16le",
"-af",
"aresample=async=1",
"-ar",
str(self.frame_rate),
"-",
Expand Down

0 comments on commit f7b6fee

Please sign in to comment.