-
Notifications
You must be signed in to change notification settings - Fork 10
/
subslider.py
381 lines (328 loc) · 14.9 KB
/
subslider.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# SubSlider - a simple script to apply offsets to subtitles
#
# Copyright (C) 2014 - Michele Bonazza <http://michelebonazza.com>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>
from datetime import datetime
import argparse
import collections
import shutil
import os
import re
import sys
import chardet
class MyParser(argparse.ArgumentParser):
"""A parser that displays argparse's help message by default."""
def error(self, message):
self.print_help()
sys.exit(1)
class SubSlider:
"""A simple script to apply offsets to subtitles.
Subtitles can be moved forward or back in time depending on the parameters
passed."""
LINES_TO_SHOW = 10
SUB_TIME_FORMAT = "(\d{2}:\d{2}:\d{2},\d{3}) \-\-> "\
"(\d{2}:\d{2}:\d{2},\d{3})"
DEFAULT_START_AT = "same as input; original .srt file will be copied to "\
"ORIGINAL_SRT_NAME_orig.srt"
DATE_ZERO = datetime.strptime('2000/1/1', '%Y/%m/%d')
def __init__(self):
self.parser = MyParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
group = self.parser.add_mutually_exclusive_group(required=True)
group.add_argument("-ds", "--delay_subs",
help="""make subtitles appear later.
OFFSET format is
[mm:]SS[,sss].
Examples: "1:23,456" (subs delayed of 1 minute, 23
seconds, 456 milliseconds); "100" (subs delayed of 100
seconds, or 1 minute, 40 seconds); "12,43" (subs
delayed of 12 seconds, 430 milliseconds)""",
metavar='OFFSET')
group.add_argument("-dv", "--delay_video",
help="""make subtitles appear sooner.
OFFSET format is
[mm:]SS[,sss].
Examples: "1:23,456" (subs displayed 1 minute,
23 seconds, 456 milliseconds sooner); "100" (subs
displayed 100 seconds, or 1 minute, 40 seconds sooner);
"12,43" (subs are displayed 12 seconds, 430
milliseconds sooner)""",
metavar='OFFSET')
group.add_argument("-s", "--start_at",
help="""make the first subtitle appear at a specific
time. The script will show a list of lines taken
from the .srt file to choose what's the first line
to be displayed at TIME""",
metavar="TIME")
self.parser.add_argument("-o", "--output",
help="the output .srt subtitles file",
default=self.DEFAULT_START_AT)
self.parser.add_argument("input_file", type=str,
help="the .srt subtitles file")
def main(self):
args = self.parser.parse_args()
parsed = self.check_args(args)
if not parsed:
print('')
self.parser.error('Bad arguments.')
(self.input_subs, self.output_subs, self.output_temp,
minutes, seconds, millis) = parsed
if self.input_subs == self.output_subs:
# if input is my_movie.srt copy to my_movie_orig.srt
original = '%s_orig.srt' % os.path.splitext(self.input_subs)[0]
shutil.copyfile(self.input_subs, original)
self.first_valid = 0
# if start was specified, we need to know what's the first line that
# the offset needs to be applied to
subtract_offset = False
if args.start_at:
first_starts_at = self.get_offset_from_start_at(args.start_at)
starting_at = self.get_date(minutes, seconds, millis)
if first_starts_at > starting_at:
# subtitles begin later than they should, negative offset
offset = first_starts_at - starting_at
subtract_offset = True
else:
# subtitles begin sooner than they should, positive offset
offset = starting_at - first_starts_at
print('Applying {} as offset'.format(offset))
else:
offset = self.get_date(minutes, seconds, millis) - self.DATE_ZERO
if subtract_offset or args.delay_video:
def offset_func(start, end): return (start - offset, end - offset)
else:
def offset_func(start, end): return (start + offset, end + offset)
self.parse_subs(offset_func)
self.fix_file()
# clean up the temp file
os.remove(self.output_temp)
print('Success! Offset subs have been written to {}'
.format(os.path.abspath(self.output_subs)))
if self.input_subs == self.output_subs:
print('The original subs have been copied to {}'.format(original))
def check_args(self, args):
"""
Checks that command-line arguments are valid.
The syntax for parameters is checked by argparse; this method checks
that the values provided are valid (e.g., file paths point to actual
files, offsets have been specified following our format, etc.).
"""
error = None
input_file = args.input_file
input_offset = args.start_at or args.delay_subs or args.delay_video
input_offset = input_offset.strip()
if not os.path.isfile(input_file):
print('{} does not exist'.format(input_file))
error = True
else:
if not args.output or args.output == self.DEFAULT_START_AT:
output_subs = input_file
else:
output_subs = args.output
output_temp = '{}_temp.srt'.format(os.path.splitext(input_file)[0])
offset_ok = re.match('(\d{1,2}:)?\d+(,\d{1,3})?$', input_offset)
if not offset_ok:
print('{} is not a valid offset, format is [MM:]SS[,sss], see help'
'dialog for some examples'.format(input_offset))
error = True
else:
offset = re.search('((\d{1,2}):)?(\d+)(,(\d{1,3}))?', input_offset)
def nsafe(x): return offset.group(x) if offset.group(x) else "0"
# the ljust call is because we want e.g. '2.5' to be interpreted as
# 2 seconds, 500 millis
minutes, seconds, millis = (nsafe(2), nsafe(3),
nsafe(5).ljust(3, '0'))
if re.match('^\d+(,(\d{1,3}))?$', input_offset):
# format is seconds(,millis), convert to minutes
secs = int(seconds)
minutes = str(secs // 60)
seconds = str(secs % 60)
if error:
return None
return_me = collections.namedtuple('Params',
['input', 'output', 'output_tmp',
'mins', 'secs', 'millis'])
return return_me(input_file, output_subs, output_temp,
minutes, seconds, millis)
def get_offset_from_start_at(self, start_at):
"""
Shows a prompt to the user for her to choose the reference line that
should start at the specified time, and returns the time at which the
chosen line was originally shown.
"""
lines, times = self.get_first_lines(self.LINES_TO_SHOW)
# python3 has no "raw_input()"
try:
_input = raw_input
except NameError:
_input = input
choices = []
for idx, val in enumerate(lines):
choices.append('%d: {%s}\n' % (idx + 1, val[:-1]))
prompt = "These are the first {0} lines:\n\n{1}\n\nWhich one should "\
"start at {2}?\nYour choice 1-{0} [1]: "\
.format(len(choices), '\n'.join(choices), start_at)
choice = _input(prompt)
if not choice:
# default choice is 1, which is at index #0 in the array
choice = 0
else:
try:
choice = int(choice)
if choice < 1 or choice > len(choices):
print('Expected a number between 1 and {}, but {} was '
'entered. Exiting'.format(len(choices), choice))
sys.exit(1)
else:
# list is 0-based, choices are 1-based
choice -= 1
except ValueError:
print('Expected a number between 1 and {}, but "{}" was '
'entered. Exiting'.format(len(choices), choice))
sys.exit(1)
# parse the string to get the start value
parsed = re.search(self.SUB_TIME_FORMAT, times[choice])
# group(1) is start, group(2) is end
return self.parse_time(parsed.group(1))
def get_first_lines(self, line_count):
"""
Parses the input subs file and returns the first 10 entries, together
with the time at which they're shown.
"""
found = 0
lines = []
times = []
buf = []
with SubSlider().file_open(self.input_subs, 'rt') as _input:
for line in _input:
parsed = re.search(self.SUB_TIME_FORMAT, line)
if parsed:
if found:
# don't append the UTF header
lines.append('\n'.join(buf[:-1]))
found += 1
times.append(line)
buf = []
else:
buf.append(line.strip())
if found > line_count:
return lines, times
return lines, times
def parse_subs(self, offset_func):
"""
Parses the original subs file and applies the offset using the argument
function, writing the output to a temp file.
The method sets self.first_valid to the first block in the temp file
that has a timestamp greater than zero; this is done in case some lines
in the output subs ended up being displayed at negative time.
"""
with SubSlider().file_open(self.input_subs, 'r') as _input:
with SubSlider().file_open(self.output_temp, 'w') as output:
block = 0
for line in _input:
parsed = re.search(self.SUB_TIME_FORMAT, line)
if parsed:
block += 1
start, end = (self.parse_time(parsed.group(1)),
self.parse_time(parsed.group(2)))
(start, end) = offset_func(start, end)
offset_start, offset_end = (self.format_time(start),
self.format_time(end))
if not self.first_valid:
if end >= self.DATE_ZERO:
# this line will start at 0, and is going to be
# displayed until end
self.first_valid = block
if start < self.DATE_ZERO:
offset_start = '00:00:00,000'
output.write('{} --> {}\n'.format(offset_start,
offset_end))
else:
output.write(line)
def fix_file(self):
"""
Parses the temp file created by parse_subs and renumbers blocks in case
lines were dropped because the offset pushed them to negative
timestamps.
"""
with SubSlider().file_open(self.output_temp, 'r') as _input:
with SubSlider().file_open(self.output_subs, 'w') as output:
# we can drop all lines found before the first valid block
# (set by parse_subs())
start_output = False
for line in _input:
if re.match('\d+$', line.strip()):
block_num = int(line.strip())
if block_num >= self.first_valid:
# ok, start parsing
if not start_output:
start_output = True
# and renumber blocks so that they start at 1, no
# matter what
output.write(
'{}\r\n'
.format(block_num - self.first_valid + 1)
)
elif start_output:
output.write(line)
@staticmethod
def format_time(value):
"""
Parses a date using the format '%H:%M:%S,%f'.
"""
formatted = datetime.strftime(value, '%H:%M:%S,%f')
return formatted[:-3]
@staticmethod
def get_date(minutes, seconds, millis):
"""
Returns a date that can be used for comparisons with timestamps in the
.srt file.
"""
def nsafe(s): return int(s) if s else 0
return datetime(2000, 1, 1, 0, nsafe(minutes), nsafe(seconds),
nsafe(millis) * 1000)
@staticmethod
def parse_time(time):
"""
Parses a date using the format '%H:%M:%S,%f' and sets the year to 2000
to avoid trouble.
"""
parsed = datetime.strptime(time, '%H:%M:%S,%f')
return parsed.replace(year=2000)
@staticmethod
def get_python_version():
"""
Returns the major version of the python executing the code
"""
return sys.version_info[0]
@staticmethod
def file_open(file_path, file_mode):
"""
Find a file's encoding, open it in a specific mode, and return that object
"""
python_major_version = SubSlider().get_python_version()
file_encoding = None
if python_major_version == 3:
file_encoding = "utf-8" # Default encoding in Python 3
if os.path.isfile(file_path): # If file already exists
raw_file_data = open(file_path, 'rb').read()
file_encoding = chardet.detect(raw_file_data)['encoding'] # Check encoding of existing file
return open(file = file_path, mode = file_mode, encoding = file_encoding)
if __name__ == '__main__':
SubSlider().main()