Skip to content

Allow htmldocck.py to run using Python 3 #44086

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 26, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 20 additions & 13 deletions src/etc/htmldocck.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@

In order to avoid one-off dependencies for this task, this script uses
a reasonably working HTML parser and the existing XPath implementation
from Python 2's standard library. Hopefully we won't render
from Python's standard library. Hopefully we won't render
non-well-formed HTML.

# Commands
Expand Down Expand Up @@ -110,11 +110,17 @@
import re
import shlex
from collections import namedtuple
from HTMLParser import HTMLParser
try:
from html.parser import HTMLParser
except ImportError:
from HTMLParser import HTMLParser
from xml.etree import cElementTree as ET

# ⇤/⇥ are not in HTML 4 but are in HTML 5
from htmlentitydefs import entitydefs
try:
from html.entities import entitydefs
except ImportError:
from htmlentitydefs import entitydefs
entitydefs['larrb'] = u'\u21e4'
entitydefs['rarrb'] = u'\u21e5'
entitydefs['nbsp'] = ' '
Expand All @@ -123,6 +129,11 @@
VOID_ELEMENTS = set(['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'keygen',
'link', 'menuitem', 'meta', 'param', 'source', 'track', 'wbr'])

# Python 2 -> 3 compatibility
try:
unichr
except NameError:
unichr = chr

class CustomHTMLParser(HTMLParser):
"""simplified HTML parser.
Expand Down Expand Up @@ -184,12 +195,8 @@ def concat_multi_lines(f):

# strip the common prefix from the current line if needed
if lastline is not None:
maxprefix = 0
for i in xrange(min(len(line), len(lastline))):
if line[i] != lastline[i]:
break
maxprefix += 1
line = line[maxprefix:].lstrip()
common_prefix = os.path.commonprefix([line, lastline])
line = line[len(common_prefix):].lstrip()

firstlineno = firstlineno or lineno
if line.endswith('\\'):
Expand All @@ -213,7 +220,7 @@ def concat_multi_lines(f):


def get_commands(template):
with open(template, 'rUb') as f:
with open(template, 'rU') as f:
for lineno, line in concat_multi_lines(f):
m = LINE_PATTERN.search(line)
if not m:
Expand Down Expand Up @@ -372,7 +379,7 @@ def check_command(c, cache):
cache.get_file(c.args[0])
ret = True
except FailedCheck as err:
cerr = err.message
cerr = str(err)
ret = False
elif len(c.args) == 2: # @has/matches <path> <pat> = string test
cerr = "`PATTERN` did not match"
Expand Down Expand Up @@ -413,9 +420,9 @@ def check_command(c, cache):

except FailedCheck as err:
message = '@{}{} check failed'.format('!' if c.negated else '', c.cmd)
print_err(c.lineno, c.context, err.message, message)
print_err(c.lineno, c.context, str(err), message)
except InvalidCheck as err:
print_err(c.lineno, c.context, err.message)
print_err(c.lineno, c.context, str(err))

def check(target, commands):
cache = CachedFiles(target)
Expand Down