Skip to content

Commit

Permalink
Changes suggested by pylint
Browse files Browse the repository at this point in the history
  • Loading branch information
tjunttila committed Feb 23, 2021
1 parent 38d209b commit 33ac0ab
Show file tree
Hide file tree
Showing 6 changed files with 314 additions and 225 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ One can use `pip` to install `pdf2video` directly from GitHub:
```
python3 -m pip install git+https://github.com/tjunttila/pdf2video.git
```
See the [PyPA Installing Packages tutorial](https://packaging.python.org/tutorials/installing-packages/) if you are not familiar with installing Python packages.
See the [PyPA Installing Packages tutorial](https://packaging.python.org/tutorials/installing-packages/) for information on installing Python packages and on Python virtual environments.

# Usage

Expand Down
179 changes: 105 additions & 74 deletions pdf2video/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,39 @@
License: The MIT License
"""

from abc import ABC, abstractmethod
import re
import sys

class AST:
def __init__(self):
pass
class AST(ABC):
"""Base class for abstract syntax tree nodes."""

@abstractmethod
def to_ssml(self, neural):
assert False
"""Get the SSML representation of the sub-tree."""

@abstractmethod
def to_words(self):
assert False
"""Get the plain words representation of the sub-tree."""

@abstractmethod
def to_sub(self):
assert False
"""Get the sub-titles representation of the sub-tree."""

class ASTWord(AST):
"""An AST node for a word."""
def __init__(self, text):
super().__init__()
self.text = text
def to_ssml(self, neural):
return self.text
def to_words(self):
return [self.text]
def to_sub(self):
return self.text

class ASTBreak(AST):
"""An AST node for a break."""
def __init__(self, time):
self.time = time
def to_ssml(self, neural):
Expand All @@ -33,7 +45,9 @@ def to_words(self):
return []
def to_sub(self):
return ''

class ASTDelim(AST):
"""An AST node for a delimiter."""
def __init__(self, text):
self.text = text
def to_ssml(self, neural):
Expand All @@ -42,7 +56,9 @@ def to_words(self):
return []
def to_sub(self):
return self.text

class ASTSpace(AST):
"""An AST node for a white space."""
def __init__(self):
pass
def to_ssml(self, neural):
Expand All @@ -51,32 +67,38 @@ def to_words(self):
return []
def to_sub(self):
return ' '

class ASTEmph(AST):
"""An AST node for emphasized text."""
def __init__(self, children):
self.children = children
def to_ssml(self, neural):
children_ssml = "".join([child.to_ssml(neural) for child in self.children])
if neural:
return '<prosody rate="90%" volume="loud">'+children_ssml+'</prosody>'
else:
return '<prosody pitch="high" volume="loud">'+children_ssml+'</prosody>'
return '<prosody pitch="high" volume="loud">'+children_ssml+'</prosody>'
def to_words(self):
result = []
for child in self.children: result += child.to_words()
for child in self.children:
result += child.to_words()
return result
def to_sub(self):
return "".join([child.to_sub() for child in self.children])

class ASTPhoneme(AST):
"""An AST node for text read with phonemes."""
def __init__(self, text, xsampa):
self.text = text
self.xsampa = xsampa
def to_ssml(self, neural):
return f'<phoneme alphabet="x-sampa" ph="{self.xsampa}">{self.text}</phoneme>'
def to_words(self):
return re.split('\s+', self.text.strip())
return re.split(r'\s+', self.text.strip())
def to_sub(self):
return self.text

class ASTSub(AST):
"""An AST node for text with different sub-title representation."""
def __init__(self, children, subtitles):
self.children = children
self.subtitles = subtitles
Expand All @@ -85,159 +107,168 @@ def to_ssml(self, neural):
return "".join(children_ssml)
def to_words(self):
result = []
for child in self.children: result += child.to_words()
for child in self.children:
result += child.to_words()
return result
def to_sub(self):
return self.subtitles

class ASTSlow(AST):
"""An AST node for text read slowly."""
def __init__(self, children):
self.children = children
def to_ssml(self, neural):
children_ssml = "".join([child.to_ssml(neural) for child in self.children])
return '<prosody rate="80%">'+children_ssml+'</prosody>'
def to_words(self):
result = []
for child in self.children: result += child.to_words()
for child in self.children:
result += child.to_words()
return result
def to_sub(self):
return "".join([child.to_sub() for child in self.children])

class ASTLow(AST):
"""An AST node for text read in low pitch."""
def __init__(self, children):
self.children = children
def to_ssml(self, neural):
children_ssml = "".join([child.to_ssml(neural) for child in self.children])
if neural:
# prosody pitch not yet in neural TTS, make it slightly slower
return '<prosody rate="80%">'+children_ssml+'</prosody>'
else:
return '<prosody pitch="low">'+children_ssml+'</prosody>'
return '<prosody pitch="low">'+children_ssml+'</prosody>'
def to_words(self):
result = []
for child in self.children: result += child.to_words()
for child in self.children:
result += child.to_words()
return result
def to_sub(self):
return "".join([child.to_sub() for child in self.children])

class ASTHigh(AST):
"""An AST node for text read in high pitch."""
def __init__(self, children):
self.children = children
def to_ssml(self, neural):
children_ssml = "".join([child.to_ssml(neural) for child in self.children])
if neural:
# prosody pitch not yet in neural TTS, make it slightly faster
return '<prosody rate="120%">'+children_ssml+'</prosody>'
else:
return '<prosody pitch="high">'+children_ssml+'</prosody>'
return '<prosody pitch="high">'+children_ssml+'</prosody>'
def to_words(self):
result = []
for child in self.children: result += child.to_words()
for child in self.children:
result += child.to_words()
return result
def to_sub(self):
return "".join([child.to_sub() for child in self.children])

class ASTSayAs(AST):
"""An AST node for text read as letters."""
def __init__(self, letters):
self.letters = letters
def to_ssml(self, neural):
return '<say-as interpret-as="characters">'+self.letters+'</say-as>'
def to_words(self):
return re.split('\s+', self.letters.strip())
return re.split(r'\s+', self.letters.strip())
def to_sub(self):
return self.letters


def parse_to_ast(string, err_linenum = None):
"""Parse the script text string into a sequence of AST nodes."""
i = 0
n = len(string)
string_length = len(string)
def read_until(chars):
nonlocal i
tmp = i
while i < n and string[i] not in chars:
while i < string_length and string[i] not in chars:
i += 1
return string[tmp:i]
def err(msg):
ln = f'On line {err_linenum}: ' if err_linenum != None else ''
print(ln+msg)
exit(1)
linenum_text = '' if err_linenum is None else f'On line {err_linenum}: '
print(linenum_text+msg)
sys.exit(1)
#assert False, msg
result = []
while i < n:
while i < string_length:
if string[i] == '#':
if string[i:i+4] == '#sub':
m = re.match('^#sub(.)(?P<text>((?!\1).)*?)\\1(?P<sub>((?!\1).)+?)\\1', string[i:])
if m == None:
match = re.match(
'^#sub(.)(?P<text>((?!\1).)*?)\\1(?P<sub>((?!\1).)+?)\\1',
string[i:])
if match is None:
err(f'Malformed #sub "{string[i:]}"')
t = parse_to_ast(m['text'])
result.append(ASTSub(t, m['sub']))
i += len(m.group(0))
result.append(ASTSub(parse_to_ast(match['text']), match['sub']))
i += len(match.group(0))
continue
if string[i:i+5] == '#slow':
m = re.match('^#slow(.)(?P<text>((?!\1).)+?)\\1', string[i:])
if m == None:
match = re.match('^#slow(.)(?P<text>((?!\1).)+?)\\1', string[i:])
if match is None:
err(f'Malformed #slow "{string[i:]}"')
t = parse_to_ast(m['text'])
result.append(ASTSlow(t))
i += len(m.group(0))
result.append(ASTSlow(parse_to_ast(match['text'])))
i += len(match.group(0))
continue
if string[i:i+4] == '#low':
m = re.match('^#low(.)(?P<text>((?!\1).)+?)\\1', string[i:])
if m == None:
match = re.match('^#low(.)(?P<text>((?!\1).)+?)\\1', string[i:])
if match is None:
err(f'Malformed #low "{string[i:]}"')
t = parse_to_ast(m['text'])
result.append(ASTLow(t))
i += len(m.group(0))
result.append(ASTLow(parse_to_ast(match['text'])))
i += len(match.group(0))
continue
if string[i:i+5] == '#high':
m = re.match('^#high(.)(?P<text>((?!\1).)+?)\\1', string[i:])
if m == None:
match = re.match('^#high(.)(?P<text>((?!\1).)+?)\\1', string[i:])
if match is None:
err(f'Malformed #high "{string[i:]}"')
t = parse_to_ast(m['text'])
result.append(ASTHigh(t))
i += len(m.group(0))
result.append(ASTHigh(parse_to_ast(match['text'])))
i += len(match.group(0))
continue
if string[i:i+3] == '#ph':
m = re.match('^#ph(.)(?P<text>((?!\1).)+?)\\1(?P<ph>((?!\1).)+?)\\1', string[i:])
if m == None:
match = re.match(
'^#ph(.)(?P<text>((?!\1).)+?)\\1(?P<ph>((?!\1).)+?)\\1',
string[i:])
if match is None:
err(f'Malformed #ph "{string[i:]}"')
result.append(ASTPhoneme(m['text'], m['ph']))
i += len(m.group(0))
result.append(ASTPhoneme(match['text'], match['ph']))
i += len(match.group(0))
continue
# Break #10
m = re.match('^#(?P<time>\d+)', string[i:])
if m != None:
result.append(ASTBreak(int(m['time'])))
i += len(m.group(0))
match = re.match(r'^#(?P<time>\d+)', string[i:])
if match:
result.append(ASTBreak(int(match['time'])))
i += len(match.group(0))
continue
err(f'Unrecognized script command "{string[i:]}"')
elif string[i] == '*':
m = re.match('^\*(?P<text>[^\*]+)\*', string[i:])
if m == None:
match = re.match(r'^\*(?P<text>[^\*]+)\*', string[i:])
if match is None:
err(f'Malformed emphasis "{string[i:]}"')
t = parse_to_ast(m['text'])
result.append(ASTEmph(t))
i += len(m.group(0))
result.append(ASTEmph(parse_to_ast(match['text'])))
i += len(match.group(0))
elif string[i] == '@':
m = re.match('^@(?P<text>[^@]+)@', string[i:])
if m == None:
err(f'Malformed say-ass "{string[i:]}"')
result.append(ASTSayAs(m['text']))
i += len(m.group(0))
match = re.match(r'^@(?P<text>[^@]+)@', string[i:])
if match is None:
err(f'Malformed say-as "{string[i:]}"')
result.append(ASTSayAs(match['text']))
i += len(match.group(0))
else:
m = re.match('^\s+', string[i:])
if m != None:
match = re.match(r'^\s+', string[i:])
if match:
result.append(ASTSpace())
i += len(m.group(0))
i += len(match.group(0))
continue
# Negative numbers are words
m = re.match('^-\d+', string[i:])
if m != None:
result.append(ASTWord(m.group(0)))
i += len(m.group(0))
match = re.match(r'^-\d+', string[i:])
if match:
result.append(ASTWord(match.group(0)))
i += len(match.group(0))
continue
# Delimiters
m = re.match('^[-.,:;!?"]', string[i:])
if m != None:
result.append(ASTDelim(m.group(0)))
i += len(m.group(0))
match = re.match('^[-.,:;!?"]', string[i:])
if match:
result.append(ASTDelim(match.group(0)))
i += len(match.group(0))
continue
word = read_until([' ','\t','#','*','@','"','.',',',':',';','!','?'])
result.append(ASTWord(word))
Expand Down
Loading

0 comments on commit 33ac0ab

Please sign in to comment.