-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmonkey-patch.py
94 lines (77 loc) · 2.81 KB
/
monkey-patch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""
As the name suggests, this file contains fixes that I need but couldn't do in pelican platform.
Should run this after `make publish`.
"""
import os, re
from shutil import copyfile
from lxml import html
from publishconf import *
from subprocess import call
def modify_url():
"""Adds `rel=nofollow` and `target=_blank` to external urls, mostly used inside blog posts."""
files = create_list()
for each in files:
with open(each, 'r') as f:
root = html.fromstring(f.read())
for el in root.iter('a'):
try:
if re.search('^http', el.attrib['href']):
if not(re.search('^' + SITEURL ,el.attrib['href'])):
el.attrib['rel'] = 'nofollow'
el.attrib['target'] = '_blank'
#print(html.tostring(root, pretty_print=True))
except:
pass
f.close()
with open(each, 'wb') as f:
f.write(b'<!DOCTYPE html>')
f.write(html.tostring(root))
f.close()
dirpath = '/'.join(each.split('/')[:-1])
name = each.split('/')[-1]
optimize(dirpath, name)
def modify_img():
"""Replaces `<p><img></p>' with '<img>'."""
files = create_list()
for each in files:
with open(each, 'r') as f:
original = f.read()
find = re.findall('<p><img.+>', original)
split = re.split('<p><img.+>', original)
find2 = []
for substr in find:
find2.append(substr[3:] + '<p>')
result = ''
for substr in find2:
result += split[find2.index(substr)]
result += substr
result += split[-1]
f.close()
with open(each, 'w') as f:
f.write(result)
f.close()
dirpath = '/'.join(each.split('/')[:-1])
name = each.split('/')[-1]
optimize(dirpath, name)
def create_list():
"""Create list of html files in public directory."""
walk_dir = 'public'
walk_dir = os.path.abspath(walk_dir)
html_list = []
for root, dirs, files in os.walk(walk_dir):
for name in files:
if name.split('.')[-1] == 'html':
html_list.append(os.path.join(root, name))
return html_list
def optimize(dirpath, filename):
"""Minifies and standardizes html files after lxml does its job."""
command = ('minify "{filename}" -o "{filename}"', '--quiet', '')
filepath = os.path.join(dirpath, filename)
ext = os.path.splitext(filename)[1]
command, silent, verbose = command
flags = silent
command = command.format(filename=filepath, flags=flags)
call(command, shell=True)
if __name__ == '__main__':
modify_url()
modify_img()