Skip to content

Commit f90582e

Browse files
committed
Add: check if new stats are published
1 parent 51ceaf6 commit f90582e

File tree

2 files changed

+135
-33
lines changed

2 files changed

+135
-33
lines changed

wikipedia_contributors.py

+68-10
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
Extract users of active wikipedia languages. Save them to csv file.
88
"""
99

10-
import urllib2, csv
10+
import urllib2, csv, filecmp, os
11+
from datetime import datetime
1112
from sys import argv
1213
from bs4 import BeautifulSoup
1314
import lang_not_wikiq_codes
@@ -70,8 +71,13 @@ def get_contributors(wiki_links):
7071
Using links, find all users from each language page. Save them to csv file.
7172
"""
7273
#wiki_links = [['http://stats.wikimedia.org/EN/TablesWikipediaEO.htm', 'en', '0']]
73-
74-
out = open('wiki_contributors.csv', 'wb')
74+
print 'DAAAAATEEEEEEE', datetime.now()
75+
date = str(datetime.now())[0:10]
76+
filename = 'wikipedia_contributors_%s.csv' % (date)
77+
if os.path.exists(filename):
78+
filename = filename[:-4] + 'D' + '.csv'
79+
80+
out = open(filename, 'wb')
7581
writer = csv.DictWriter(out, fieldnames = ['username', 'edits, articles, 30 dy', 'edits, other, 30 dy', 'creates, articles, 30 dy', 'creates, other, 30 dy', 'link', 'lang'], dialect='excel')
7682
writer.writeheader()
7783

@@ -128,17 +134,69 @@ def get_contributors(wiki_links):
128134
for e in errors:
129135
print e[6]
130136

131-
return 'Done.'
137+
return 1
138+
139+
def find_csv():
140+
csv = []
141+
for f in os.listdir(os.getcwd()):
142+
if f.startswith("wikipedia_contributors") and f.endswith(".csv"):
143+
csv.append(f)
144+
return csv
132145

133146
if __name__ == "__main__":
134-
if len(argv) > 1:
135-
if argv[1] == 'diff':
147+
"""
148+
Either make a list of Wikipedia languages codes not present on Wiktionary
149+
by calling find_diff from lang_not_wikiq_codes file or if no
150+
argument is given just get all languages codes from Wikipedia stats page and
151+
make a list of contributors from most active languages on Wikipedia.
152+
"""
153+
files = find_csv()
154+
155+
if len(files) == 0:
156+
print "No wikipedia contributors list was found in the directory."
157+
else:
158+
latest = max(files, key=os.path.getctime)
159+
date = datetime.fromtimestamp(os.path.getctime(latest))
160+
print 'Most recent list of Wiktionary contributors was generated on:', date
161+
162+
do = ''
163+
while True:
164+
do = raw_input('Do you want to get a new list (y/n)? ')
165+
if do == 'y' or do == 'n':
166+
break
167+
168+
if do == 'y':
169+
do = ''
170+
171+
while True:
172+
do = raw_input('Do you want get only languages not on Wiktionary (y/n)? ')
173+
if do == 'y' or do == 'n':
174+
break
175+
176+
if do == 'y':
136177
codes = lang_not_wikiq_codes.find_diff()
137-
print "DIFFERENCE:", codes
178+
else:
179+
codes = get_wiki_languages()
180+
181+
links = create_links(codes)
182+
if get_contributors(links) == 1:
183+
updated_files = find_csv()
184+
185+
if len(updated_files) == 0:
186+
print "No files to compare."
187+
else:
188+
latest2 = max(updated_files, key=os.path.getctime)
189+
res = filecmp.cmp(latest, latest2, shallow = False)
190+
191+
if res == True:
192+
print 'No new data.'
193+
else:
194+
print 'New contributors info acquired.'
138195
else:
139-
codes = get_wiki_languages()
140-
links = create_links(codes)
141-
get_contributors(links)
196+
print "Thank you, have a nice and productive day!"
197+
198+
199+
142200

143201

144202

wiktionary_contributors.py

+67-23
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
Wiktionary contributors
88
"""
99

10-
import urllib2, csv
10+
import urllib2, csv, os, filecmp
11+
from datetime import datetime
1112
from bs4 import BeautifulSoup
1213

1314
def get_active_languages():
@@ -64,13 +65,19 @@ def get_contributors(wiktio_links):
6465

6566
#wiktio_links = [['http://stats.wikimedia.org/wiktionary/EN/TablesWikipediaJA.htm', 'en', '0']]
6667

67-
out = open('wiktionary_contributors.csv', 'wb')
68+
print 'DAAAAATEEEEEEE', datetime.now()
69+
date = str(datetime.now())[0:10]
70+
filename = 'wiktionary_contributors_%s.csv' % (date)
71+
if os.path.exists(filename):
72+
filename = filename[:-4] + 'D' + '.csv'
73+
out = open(filename, 'wb')
6874
writer = csv.DictWriter(out, fieldnames = ['username', 'edits, articles, 30 dy', 'edits, other, 30 dy', 'creates, articles, 30 dy', 'creates, other, 30 dy', 'link', 'lang'], dialect='excel')
6975
writer.writeheader()
7076

7177
errors = []
7278

7379
for l in wiktio_links:
80+
print l
7481
lang_link = l[0]
7582
page = urllib2.urlopen(lang_link).read()
7683
soup = BeautifulSoup(page, "html.parser")
@@ -95,27 +102,64 @@ def get_contributors(wiktio_links):
95102
except IndexError:
96103
print "Index Error!"
97104
print "user_data:", user_data, "user:", name, "language:", l, "\n"
98-
99-
#for e in errors:
100-
# print e[0]
101-
#print '*******************************'
102-
#for e in errors:
103-
# print e[1]
104-
#print '*******************************'
105-
#for e in errors:
106-
# print e[2]
107-
#print '*******************************'
108-
#for e in errors:
109-
# print e[3]
110-
#print '*******************************'
111-
#for e in errors:
112-
# print e[4]
113-
for e in errors:
114-
print e[5]
115-
#for e in errors:
116-
# print e[6]
105+
return 1
106+
# for e in errors:
107+
# print e[0]
108+
# print '*******************************'
109+
# for e in errors:
110+
# print e[1]
111+
# print '*******************************'
112+
# for e in errors:
113+
# print e[2]
114+
# print '*******************************'
115+
# for e in errors:
116+
# print e[3]
117+
# print '*******************************'
118+
# for e in errors:
119+
# print e[4]
120+
# for e in errors:
121+
# print e[5]
122+
# for e in errors:
123+
# print e[6]
124+
125+
def find_csv():
126+
csv = []
127+
for f in os.listdir(os.getcwd()):
128+
if f.startswith("wiktionary_contributors") and f.endswith(".csv"):
129+
csv.append(f)
130+
return csv
117131

118132
if __name__ == "__main__":
119-
langs = get_active_languages()
120-
get_contributors(langs)
133+
files = find_csv()
134+
if len(files) > 0:
135+
latest = max(files, key=os.path.getctime)
136+
date = datetime.fromtimestamp(os.path.getctime(latest))
137+
print 'Most recent list of Wiktionary contributors was generated on:', date
138+
else:
139+
print "no list of wiktionary contributors was found in the directory"
140+
do = ''
141+
while True:
142+
do = raw_input('Do you want to get a new list (y/n)? ')
143+
if do == 'y' or do == 'n':
144+
break
145+
if do == 'y':
146+
langs = get_active_languages()
147+
if get_contributors(langs) == 1:
148+
updated_files = find_csv()
149+
if len(updated_files) > 0:
150+
latest2 = max(updated_files, key=os.path.getctime)
151+
res = filecmp.cmp(latest, latest2, shallow = False)
152+
if res == True:
153+
print 'No new data.'
154+
else:
155+
print 'New contributors info acquired.'
156+
else:
157+
print "no files to compare."
158+
else:
159+
print "Thank you, have a nice and productive day!"
160+
# for c in cvs:
161+
# t = os.path.getctime(c)
162+
# print datetime.fromtimestamp(t)
163+
# print datetime.now() - datetime.fromtimestamp(t)
164+
121165

0 commit comments

Comments
 (0)