Skip to content

Commit 3bdc9f9

Browse files
committed
Add parsing of Chapter element.
Also update unit tests, and some other changes.
1 parent d022dd7 commit 3bdc9f9

15 files changed

+231
-177
lines changed

atomic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@
66
from os import SEEK_SET
77
from struct import pack, unpack
88
from datetime import datetime, timedelta
9-
from jdr_lib.container import SortedList
109

1110
from . import DecodeError, Inconsistent, MAX_DATA_SIZE
1211
from .utility import hex_bytes, numbytes_var_int, encode_var_int, decode_var_int
1312
from .element import Element, STATE_LOADED
1413
from .tags import MATROSKA_TAGS
14+
from .sortedlist import SortedList
1515

1616
__all__ = ['ElementAtomic', 'ElementRaw', 'ElementUnsigned', 'ElementSigned',
1717
'ElementBoolean', 'ElementEnum', 'ElementBitField', 'ElementFloat',

container.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,11 @@
77
from io import IOBase
88
from os import SEEK_SET, SEEK_CUR, SEEK_END
99
from datetime import datetime
10-
from jdr_lib.container import SortedList
1110

1211
from . import Inconsistent, DecodeError
1312
from .header import Header
1413
from .tags import MATROSKA_TAGS
14+
from .sortedlist import SortedList
1515

1616
__all__ = ['Container', 'File']
1717

@@ -87,6 +87,13 @@ def children_named(self, name):
8787
"Return an iterator over all children with a given name."
8888
return (child for child in self if child.name == name)
8989

90+
def child_named(self, name):
91+
"Return the first child with the given name, or None."
92+
try:
93+
return next(self.children_named(name))
94+
except StopIteration:
95+
return None
96+
9097
def children_with_id(self, ebml_id):
9198
"Return an iterator over all children with a given ebml_id."
9299
return (child for child in self if child.ebml_id == ebml_id)

data_elements.py

+135-10
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#pylint: disable=too-many-public-methods,too-many-ancestors
2+
#pylint: disable=logging-format-interpolation,too-many-lines
23
"""
34
Elements that exist as accessors for their data, using the Parsed property:
45
EBML, Segment, Seek, Info, TrackEntry, Video, Audio, AttachedFile, Tag, Targets,
@@ -9,21 +10,21 @@
910
from os import SEEK_SET
1011
from itertools import chain
1112
from operator import attrgetter, itemgetter
12-
from jdr_lib.container import SortedList
1313

1414
from . import Inconsistent
15-
from .utility import hex_bytes, encode_var_int
15+
from .utility import hex_bytes, encode_var_int, fmt_time
1616
from .tags import MATROSKA_TAGS
1717
from .element import ElementMaster, ElementPlaceholder, ElementVoid, \
1818
STATE_UNLOADED, STATE_SUMMARY
1919
from .parsed import Parsed, create_atomic
20+
from .sortedlist import SortedList
2021

2122
__all__ = ['ElementEBML', 'ElementSegment', 'ElementSeek', 'ElementInfo',
2223
'ElementTrackEntry', 'ElementVideo', 'ElementAudio',
2324
'ElementAttachedFile', 'ElementTag', 'ElementTargets',
24-
'ElementSimpleTag']
25+
'ElementSimpleTag', 'ElementEditionEntry']
2526

26-
import logging
27+
import logging #pylint: disable=wrong-import-order,wrong-import-position
2728
LOG = logging.getLogger(__name__)
2829
LOG.setLevel(logging.INFO)
2930

@@ -95,7 +96,10 @@ class ElementSegment(ElementMaster):
9596
+ attachments: Iterator over AttachedFile elements.
9697
+ attachments_byname: Dict of AttachedFile elements, stored by FileName.
9798
+ attachments_byuid: Dict of AttachedFile elements, stored by FileUID.
98-
+ chapters: The Chapters element, if any.
99+
+ editions: Iterator over EditionEntry elements from the Chapters element,
100+
if any.
101+
+ chapters: Iterator over ChapterAtom elements from the first EditionEntry
102+
in the Chapters element, if any.
99103
+ tags: Iterator over Tag elements, i.e. tag groups.
100104
101105
Extracted from Info elements:
@@ -202,8 +206,6 @@ def attachments_byuid(self):
202206
ret[attachment.file_uid] = attachment
203207
return ret
204208

205-
chapters = Parsed('Chapters', '')
206-
207209
def duration_getter(self, child):
208210
"Get child.duration, scaling to seconds."
209211
return child.duration * self.timecode_scale / 1e9
@@ -224,6 +226,20 @@ def delete_title(self, _):
224226
muxing_app = Parsed('Info', 'muxing_app', 'muxing_app', skip=None)
225227
writing_app = Parsed('Info', 'writing_app', 'writing_app', skip=None)
226228

229+
# From Chapters element
230+
@property
231+
def editions(self):
232+
"Iterate over the children of the Chapters element, if any."
233+
elt = self.child_named('Chapters')
234+
if elt is None:
235+
raise StopIteration
236+
yield from elt.children_named('EditionEntry')
237+
@property
238+
def chapters(self):
239+
"Iterate over the ChapterAtom children of the first EditionEntry."
240+
edition = next(self.editions) # May raise StopIteration
241+
yield from edition.chapters
242+
227243
# Manipulating children
228244

229245
def add_attachment(self, file_name, mime_type, description=None):
@@ -239,9 +255,8 @@ def add_attachment(self, file_name, mime_type, description=None):
239255
if description is not None:
240256
attachment.file_description = description
241257
return attachment
242-
try:
243-
attachments = next(self.children_named('Attachments'))
244-
except StopIteration:
258+
attachments = self.child_named('Attachments')
259+
if attachments is None:
245260
attachments = ElementMaster.new('Attachments', self, 0)
246261
attached_file = ElementAttachedFile.new('AttachedFile', attachments)
247262
attached_file.file_name = file_name
@@ -296,6 +311,9 @@ def summary(self, indent=0):
296311
ret += ind_str + "Tags:\n"
297312
for tags in self.tags:
298313
ret += tags.summary(indent+8) + "\n"
314+
ret += ind_str + "Chapters:\n"
315+
for chapter in self.chapters:
316+
ret += chapter.summary(indent+8) + "\n"
299317
return ret[:-1]
300318

301319
# Reading and writing
@@ -641,6 +659,8 @@ class ElementTrackEntry(ElementMaster):
641659
+ flag_lacing: The value of the FlagLacing element (bool).
642660
+ video: ElementVideo instance, for tracks of type 'video'.
643661
+ audio: ElementAudio instance, for tracks of type 'audio'.
662+
+ track_index: The index of this TrackEntry in the list of tracks in its
663+
segment.
644664
"""
645665

646666
track_type = Parsed('TrackType', 'string_val', 'value',
@@ -658,6 +678,16 @@ class ElementTrackEntry(ElementMaster):
658678
video = Parsed('Video', '')
659679
audio = Parsed('Audio', '')
660680

681+
@property
682+
def track_index(self):
683+
"Return the index of this TrackEntry in its containing segment."
684+
segment = self.parent.parent
685+
if not isinstance(segment, ElementSegment):
686+
raise ValueError("Track is not contained in a segment")
687+
for idx, other in enumerate(segment.tracks):
688+
if other is self:
689+
return idx
690+
661691
def __str__(self):
662692
ret = "{}: {} lang={} codec={} num={} uid={}" \
663693
.format(self.__class__.__name__, self.track_type,
@@ -965,3 +995,98 @@ def summary(self, indent=0):
965995
for tag in self.sub_tags:
966996
ret += tag.summary(indent+4) + "\n"
967997
return ret[:-1]
998+
999+
1000+
class ElementChapterAtom(ElementMaster):
1001+
"""Class to extract metadata from a ChapterAtom element.
1002+
1003+
This class represents a single chapter definition. It consists of, among
1004+
other things, the following attributes:
1005+
+ time_start: the start time of the chapter (nanoseconds, unscaled).
1006+
+ time_end: the end time of the chapter (nanoseconds, unscaled; optional).
1007+
+ identifier: the string ID for WebVTT cue identifier storage.
1008+
+ display names in different languages
1009+
"""
1010+
1011+
chapter_uid = Parsed('ChapterUID', 'value', 'value', create_atomic())
1012+
identifier = Parsed('ChapterStringUID', 'value', 'value', create_atomic())
1013+
time_start = Parsed('ChapterTimeStart', 'value', 'value', create_atomic())
1014+
time_end = Parsed('ChapterTimeEnd', 'value', 'value', create_atomic())
1015+
flag_hidden = Parsed('ChapterFlagHidden', 'value', 'value', create_atomic())
1016+
flag_enabled = Parsed('ChapterFlagEnabled', 'value', 'value',
1017+
create_atomic())
1018+
segment_uid = Parsed('ChapterSegmentUID', 'value', 'value', create_atomic())
1019+
segment_edition_uid = Parsed('ChapterSegmentEditionUID', 'value', 'value',
1020+
create_atomic())
1021+
physical_equiv = Parsed('ChapterPhysicalEquiv', 'value', 'value',
1022+
create_atomic())
1023+
1024+
@property
1025+
def chapter_tracks(self):
1026+
"Return a list of track numbers to which this chapter applies."
1027+
chapter_track = self.child_named('ChapterTrack')
1028+
if chapter_track is None:
1029+
return []
1030+
return [c.value for c in chapter_track]
1031+
1032+
def display_name(self, lang='eng'):
1033+
"""Return the name of the chapter in the specified language, or None.
1034+
1035+
Note that the display name is an optional child of a ChapterAtom, and
1036+
there may be more than one display name for a given language. In the
1037+
latter case, the first such is returned.
1038+
1039+
The language is the ISO-639-2 alpha-3 form.
1040+
"""
1041+
for display in self.children_named('ChapterDisplay'):
1042+
langs = [l.value for l in display.children_named('ChapLanguage')] \
1043+
or ['eng']
1044+
if lang in langs:
1045+
return display.child_named('ChapString').value
1046+
return None
1047+
1048+
def __str__(self):
1049+
return "{} id={!r} {} --> {} {}hid {}enab" \
1050+
.format(self.__class__.__name__, self.identifier,
1051+
fmt_time(self.time_start, 3),
1052+
fmt_time(self.time_end, 3)
1053+
if self.time_end is not None else "[--]",
1054+
'!' if not self.flag_hidden else '',
1055+
'!' if not self.flag_enabled else '')
1056+
1057+
def summary(self, indent=0):
1058+
ret = super().summary(indent) + "\n"
1059+
for display in self.children_named('ChapterDisplay'):
1060+
langs = [l.value for l in display.children_named('ChapLanguage')] \
1061+
or ['eng']
1062+
langs = ",".join(langs)
1063+
ret += " " * (indent+4) + "{}: {!r}\n" \
1064+
.format(langs, display.child_named('ChapString').value)
1065+
return ret[:-1]
1066+
1067+
1068+
class ElementEditionEntry(ElementMaster):
1069+
"""Class to extract metadata from an EditionEntry element.
1070+
1071+
An EditionEntry contains one set of chapter definitions. The important
1072+
attribute is 'chapters'.
1073+
"""
1074+
1075+
edition_uid = Parsed('EditionUID', 'value', 'value', create_atomic())
1076+
flag_hidden = Parsed('EditionFlagHidden', 'value', 'value', create_atomic())
1077+
flag_default = Parsed('EditionFlagDefault', 'value', 'value',
1078+
create_atomic())
1079+
flag_ordered = Parsed('EditionFlagOrdered', 'value', 'value',
1080+
create_atomic())
1081+
1082+
@property
1083+
def chapters(self):
1084+
"Return an iterator of ElementChapterAtom instances."
1085+
yield from self.children_named('ChapterAtom')
1086+
1087+
def __str__(self):
1088+
return "{} {}hid {}def ord={!r}: {} chapters" \
1089+
.format(self.__class__.__name__, self.flag_hidden,
1090+
'!' if not self.flag_default else '',
1091+
'!' if not self.flag_ordered else '',
1092+
len(list(self.chapters)))

element.py

+4
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,10 @@ def summary(self, indent=0):
231231
"Return a pretty string summarizing this element."
232232
return (" " * indent) + str(self)
233233

234+
def summ(self):
235+
"Short for print(self.summary())."
236+
print(self.summary())
237+
234238
# Reimplement maybe
235239

236240
def check_consistency(self):

jdr_lib/__init__.py

-1
This file was deleted.

0 commit comments

Comments
 (0)