7
7
Wiktionary contributors
8
8
"""
9
9
10
- import urllib2 , csv
10
+ import urllib2 , csv , os , filecmp
11
+ from datetime import datetime
11
12
from bs4 import BeautifulSoup
12
13
13
14
def get_active_languages ():
@@ -64,13 +65,19 @@ def get_contributors(wiktio_links):
64
65
65
66
#wiktio_links = [['http://stats.wikimedia.org/wiktionary/EN/TablesWikipediaJA.htm', 'en', '0']]
66
67
67
- out = open ('wiktionary_contributors.csv' , 'wb' )
68
+ print 'DAAAAATEEEEEEE' , datetime .now ()
69
+ date = str (datetime .now ())[0 :10 ]
70
+ filename = 'wiktionary_contributors_%s.csv' % (date )
71
+ if os .path .exists (filename ):
72
+ filename = filename [:- 4 ] + 'D' + '.csv'
73
+ out = open (filename , 'wb' )
68
74
writer = csv .DictWriter (out , fieldnames = ['username' , 'edits, articles, 30 dy' , 'edits, other, 30 dy' , 'creates, articles, 30 dy' , 'creates, other, 30 dy' , 'link' , 'lang' ], dialect = 'excel' )
69
75
writer .writeheader ()
70
76
71
77
errors = []
72
78
73
79
for l in wiktio_links :
80
+ print l
74
81
lang_link = l [0 ]
75
82
page = urllib2 .urlopen (lang_link ).read ()
76
83
soup = BeautifulSoup (page , "html.parser" )
@@ -95,27 +102,64 @@ def get_contributors(wiktio_links):
95
102
except IndexError :
96
103
print "Index Error!"
97
104
print "user_data:" , user_data , "user:" , name , "language:" , l , "\n "
98
-
99
- #for e in errors:
100
- # print e[0]
101
- #print '*******************************'
102
- #for e in errors:
103
- # print e[1]
104
- #print '*******************************'
105
- #for e in errors:
106
- # print e[2]
107
- #print '*******************************'
108
- #for e in errors:
109
- # print e[3]
110
- #print '*******************************'
111
- #for e in errors:
112
- # print e[4]
113
- for e in errors :
114
- print e [5 ]
115
- #for e in errors:
116
- # print e[6]
105
+ return 1
106
+ # for e in errors:
107
+ # print e[0]
108
+ # print '*******************************'
109
+ # for e in errors:
110
+ # print e[1]
111
+ # print '*******************************'
112
+ # for e in errors:
113
+ # print e[2]
114
+ # print '*******************************'
115
+ # for e in errors:
116
+ # print e[3]
117
+ # print '*******************************'
118
+ # for e in errors:
119
+ # print e[4]
120
+ # for e in errors:
121
+ # print e[5]
122
+ # for e in errors:
123
+ # print e[6]
124
+
125
+ def find_csv ():
126
+ csv = []
127
+ for f in os .listdir (os .getcwd ()):
128
+ if f .startswith ("wiktionary_contributors" ) and f .endswith (".csv" ):
129
+ csv .append (f )
130
+ return csv
117
131
118
132
if __name__ == "__main__" :
119
- langs = get_active_languages ()
120
- get_contributors (langs )
133
+ files = find_csv ()
134
+ if len (files ) > 0 :
135
+ latest = max (files , key = os .path .getctime )
136
+ date = datetime .fromtimestamp (os .path .getctime (latest ))
137
+ print 'Most recent list of Wiktionary contributors was generated on:' , date
138
+ else :
139
+ print "no list of wiktionary contributors was found in the directory"
140
+ do = ''
141
+ while True :
142
+ do = raw_input ('Do you want to get a new list (y/n)? ' )
143
+ if do == 'y' or do == 'n' :
144
+ break
145
+ if do == 'y' :
146
+ langs = get_active_languages ()
147
+ if get_contributors (langs ) == 1 :
148
+ updated_files = find_csv ()
149
+ if len (updated_files ) > 0 :
150
+ latest2 = max (updated_files , key = os .path .getctime )
151
+ res = filecmp .cmp (latest , latest2 , shallow = False )
152
+ if res == True :
153
+ print 'No new data.'
154
+ else :
155
+ print 'New contributors info acquired.'
156
+ else :
157
+ print "no files to compare."
158
+ else :
159
+ print "Thank you, have a nice and productive day!"
160
+ # for c in cvs:
161
+ # t = os.path.getctime(c)
162
+ # print datetime.fromtimestamp(t)
163
+ # print datetime.now() - datetime.fromtimestamp(t)
164
+
121
165
0 commit comments