-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathhtmlformat.py
35 lines (27 loc) · 873 Bytes
/
htmlformat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# -*- coding: utf-8 -*-
# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals, division, absolute_import, print_function
import textwrap
import regex
import sigil_gumbo_bs4_adapter as gumbo_bs4
HTML = textwrap.dedent('''<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
{0}
<title></title>
</head>
<body>
{1}
</body>
</html>''')
LINK_TEXT = '<link href="stylesheet.css" type="text/css" rel="stylesheet"/>'
def build_html(fragment, css=False):
fragment = regex.sub(r'<p([^>]*)></p>', r'<p\1> </p>', fragment)
css_link = ''
if css:
css_link = LINK_TEXT
new = HTML.format(css_link, fragment)
soup = gumbo_bs4.parse(new)
return soup.serialize_xhtml()