-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathtraverse_html_regex_replace_and_replace.py
115 lines (92 loc) · 3.11 KB
/
traverse_html_regex_replace_and_replace.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
"""
# Python 3
# 2021-09-22
Author: MasterHowToLearn: https://github.com/MasterHowToLearn
What this script does:
- traversing a list of HTML
- using regex for find and replace
For more please read its accompanying article: https://www.masterhowtolearn.com/2021-09-22-use-this-one-trick-to-find-supermemo-element-location
Credit: the base traverse code is from Xah Lee's http://xahlee.info/python/findreplace_regex.html
"""
import os, datetime, traceback, shutil, re, ntpath
do_backup = True
BACKUP_LOCATION = (
"" # string of the directory of where you want to store the backup HTML
)
BACKUP_SUFFIX = "~~"
# Smart single quote
smart_single_quote = (
re.compile(r"̵[67];", re.U | re.I),
r"'",
)
# Smart double quote
smart_double_quote = (
re.compile(r"̶[01];", re.U | re.I),
r'"',
)
# remove the SuperMemo-generated blue colon that appears after splitting
remove_blue_colon_after_splitting = (
re.compile(r'<strong><font color="blue">\s*:\s*</font></strong>', re.U | re.I),
r"",
)
file_list = [
r"s:\supermemo\systems\zenmode\elements\2\27.HTM"
# r"s:\supermemo\systems\zenmode\elements\2\27.HTM"
# r"s:\supermemo\systems\zenmode\elements\2\27.HTM"
# r"s:\supermemo\systems\zenmode\elements\2\27.HTM"
]
find_replace_list = [
smart_single_quote,
smart_double_quote,
# remove_blue_colon_after_splitting
]
##################################################
# MODIFY BELOW AT YOUR OWN RISK
def _replace_string_in_file(
fpath, find_replace_list, do_backup, BACKUP_LOCATION, BACKUP_SUFFIX
):
"Replaces all strings by regex in find_replace_list at fpath."
with open(fpath, "r", encoding="utf-8") as input_file:
try:
file_content = input_file.read()
except UnicodeDecodeError:
print("UnicodeDecodeError:{:s}".format(input_file))
return
num_replaced = 0
for a_pair in find_replace_list:
tem_tuple = re.subn(a_pair[0], a_pair[1], file_content)
output_text = tem_tuple[0]
num_replaced += tem_tuple[1]
file_content = output_text
if num_replaced > 0:
print(("◆ changed %d %s" % (num_replaced, fpath)))
if do_backup:
shutil.copy2(
fpath,
BACKUP_LOCATION + ntpath.basename(fpath) + BACKUP_SUFFIX,
)
with open(fpath, "r+", encoding="utf-8") as output_file:
output_file.read() # to preserve file creation date
output_file.seek(0)
output_file.write(output_text)
output_file.truncate()
return None
##################################################
def main():
print(datetime.datetime.now())
if len(file_list) != 0:
for ff in file_list:
try:
_replace_string_in_file(
os.path.normpath(ff),
find_replace_list,
do_backup,
BACKUP_LOCATION,
BACKUP_SUFFIX,
)
except:
traceback.print_exc()
print("Done.")
return None
if __name__ == "__main__":
main()