diff --git a/subsync/speech_transformers.py b/subsync/speech_transformers.py index 0ce5ef1..b79b4be 100644 --- a/subsync/speech_transformers.py +++ b/subsync/speech_transformers.py @@ -1,4 +1,4 @@ -# -*- coding: utf-8 -*- +# -*- coding: utf-8 -*- import logging from io import BytesIO import subprocess @@ -131,21 +131,31 @@ def __init__(self, vad, sample_rate, frame_rate, start_seconds=0, vlc_mode=False self.video_speech_results_ = None def try_fit_using_embedded_subs(self, fname): - ffmpeg_args = ['ffmpeg'] - ffmpeg_args.extend([ - '-loglevel', 'fatal', - '-nostdin', - '-i', fname, - '-map', '0:s:0', - '-f', 'srt', - '-' - ]) - process = subprocess.Popen(ffmpeg_args, stdin=None, stdout=subprocess.PIPE, stderr=None) - output = BytesIO(process.communicate()[0]) - if process.returncode != 0: + embedded_subs = [] + embedded_subs_times = [] + # check first 5; should cover 99% of movies + for stream in range(5): + ffmpeg_args = ['ffmpeg'] + ffmpeg_args.extend([ + '-loglevel', 'fatal', + '-nostdin', + '-i', fname, + '-map', '0:s:{}'.format(stream), + '-f', 'srt', + '-' + ]) + process = subprocess.Popen(ffmpeg_args, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + output = BytesIO(process.communicate()[0]) + if process.returncode != 0: + break + pipe = make_subtitle_speech_pipeline(start_seconds=self.start_seconds).fit(output) + speech_step = pipe.steps[-1][1] + embedded_subs.append(speech_step.subtitle_speech_results_) + embedded_subs_times.append(speech_step.max_time_) + if len(embedded_subs) == 0: raise ValueError('Video file appears to lack subtitle stream') - pipe = make_subtitle_speech_pipeline(start_seconds=self.start_seconds).fit(output) - self.video_speech_results_ = pipe.steps[-1][1].subtitle_speech_results_ + # use longest set of embedded subs + self.video_speech_results_ = embedded_subs[int(np.argmax(embedded_subs_times))] def fit(self, fname, *_): try: