forked from jbzdarkid/TFWiki-scripts
-
Notifications
You must be signed in to change notification settings - Fork 0
/
displaytitles.py
66 lines (56 loc) · 2.08 KB
/
displaytitles.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from re import search
from utils import pagescraper_queue, time_and_date
from wikitools import wiki
verbose = False
LANGS = ['ar', 'cs', 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja', 'ko', 'nl', 'no', 'pl', 'pt', 'pt-br', 'ro', 'ru', 'sv', 'tr', 'zh-hans', 'zh-hant']
def pagescraper(page, errors, overflow):
if __name__ != '__main__':
# When running as part of automation, wiki text will be cached, so it is faster to query the wikitext
# before making another network call to get the page source.
# ... but this prevents finding other errors
wikitext = page.get_wiki_text()
if 'DISPLAYTITLE' not in wikitext:
return
html = page.get_raw_html()
m = search('<span class="error">(.*?)</span>', html)
if not m:
return
if 'Display title' in m.group(0):
errors.append(page)
else:
overflow[m.group(1)] = page
def main(w):
errors = []
overflow = {}
with pagescraper_queue(pagescraper, errors, overflow) as pages:
for page in w.get_all_pages():
pages.put(page)
duplicate_errors = {lang: [] for lang in LANGS}
for page in errors:
lang = page.title.rpartition('/')[2]
if lang not in LANGS:
lang = 'en'
duplicate_errors[lang].append(page)
output = """\
{{{{DISPLAYTITLE: {count} pages with duplicate DISPLAYTITLEs}}}}
<onlyinclude>{count}</onlyinclude> pages with two (or more) display titles. Data as of {date}.
{{{{TOC limit|2}}}}
""".format(
count=len(errors),
date=time_and_date())
if len(overflow) > 0:
output += '== Other errors ==\n'
for error, page in overflow.items():
output += f'=== [[{page.title}]] ===\n{error}\n'
for language in LANGS:
if len(duplicate_errors[language]) > 0:
output += '== {{lang name|name|%s}} ==\n' % language
for page in duplicate_errors[language]:
output += f'* [{page.get_edit_url()} {page.title}]\n'
return output
if __name__ == '__main__':
verbose = True
w = wiki.Wiki('https://wiki.teamfortress.com/w/api.php')
with open('wiki_displaytitles.txt', 'w', encoding='utf-8') as f:
f.write(main(w))
print(f'Article written to {f.name}')