Add: check if new stats are published

MaliRobot · MaliRobot · commit f90582e18cd8 · 2015-06-02T03:24:45.000+02:00
diff --git a/wikipedia_contributors.py b/wikipedia_contributors.py
@@ -7,7 +7,8 @@
 Extract users of active wikipedia languages. Save them to csv file.
 """
 
-import urllib2, csv
+import urllib2, csv, filecmp, os
+from datetime import datetime
 from sys import argv
 from bs4 import BeautifulSoup
 import lang_not_wikiq_codes
@@ -70,8 +71,13 @@ def get_contributors(wiki_links):
     Using links, find all users from each language page. Save them to csv file.
     """
     #wiki_links = [['http://stats.wikimedia.org/EN/TablesWikipediaEO.htm', 'en', '0']]        
-    
-    out = open('wiki_contributors.csv', 'wb')
+    print 'DAAAAATEEEEEEE', datetime.now()
+    date = str(datetime.now())[0:10]
+    filename = 'wikipedia_contributors_%s.csv' % (date)
+    if os.path.exists(filename):
+        filename = filename[:-4] + 'D' + '.csv'
+       
+    out = open(filename, 'wb')
     writer = csv.DictWriter(out, fieldnames = ['username', 'edits, articles, 30 dy', 'edits, other, 30 dy', 'creates, articles, 30 dy', 'creates, other, 30 dy', 'link', 'lang'], dialect='excel')
     writer.writeheader()
     
@@ -128,17 +134,69 @@ def get_contributors(wiki_links):
     for e in errors:
         print e[6]
         
-    return 'Done.'
+    return 1
+    
+def find_csv():
+    csv = []
+    for f in os.listdir(os.getcwd()):
+        if f.startswith("wikipedia_contributors") and f.endswith(".csv"):
+            csv.append(f)
+    return csv
     
 if __name__ == "__main__":  
-    if len(argv) > 1:
-        if argv[1] == 'diff':
+    """
+    Either make a list of Wikipedia languages codes not present on Wiktionary 
+    by calling find_diff from lang_not_wikiq_codes file or if no
+    argument is given just get all languages codes from Wikipedia stats page and
+    make a list of contributors from most active languages on Wikipedia. 
+    """
+    files = find_csv()
+    
+    if len(files) == 0:
+        print "No wikipedia contributors list was found in the directory."
+    else:
+        latest = max(files, key=os.path.getctime)
+        date = datetime.fromtimestamp(os.path.getctime(latest))
+        print 'Most recent list of Wiktionary contributors was generated on:', date
+        
+    do = ''
+    while True:
+        do = raw_input('Do you want to get a new list (y/n)? ')
+        if do == 'y' or do == 'n':
+            break
+        
+    if do == 'y':
+        do = ''
+        
+        while True:
+            do = raw_input('Do you want get only languages not on Wiktionary (y/n)? ')
+            if do == 'y' or do == 'n':
+                break
+            
+        if do == 'y':
             codes = lang_not_wikiq_codes.find_diff()
-            print "DIFFERENCE:", codes
+        else:
+            codes = get_wiki_languages()
+            
+        links = create_links(codes)
+        if get_contributors(links) == 1:
+            updated_files = find_csv()
+            
+            if len(updated_files) == 0:
+                print "No files to compare."
+            else:
+                latest2 = max(updated_files, key=os.path.getctime)
+                res = filecmp.cmp(latest, latest2, shallow = False)
+                
+                if res == True:
+                    print 'No new data.'
+                else:
+                    print 'New contributors info acquired.'
     else:
-        codes = get_wiki_languages()
-    links = create_links(codes)
-    get_contributors(links)
+        print "Thank you, have a nice and productive day!"
+    
+    
+    
     
 
     
diff --git a/wiktionary_contributors.py b/wiktionary_contributors.py
@@ -7,7 +7,8 @@
 Wiktionary contributors
 """
 
-import urllib2, csv
+import urllib2, csv, os, filecmp
+from datetime import datetime
 from bs4 import BeautifulSoup
 
 def get_active_languages():
@@ -64,13 +65,19 @@ def get_contributors(wiktio_links):
     
     #wiktio_links = [['http://stats.wikimedia.org/wiktionary/EN/TablesWikipediaJA.htm', 'en', '0']]        
     
-    out = open('wiktionary_contributors.csv', 'wb')
+    print 'DAAAAATEEEEEEE', datetime.now()
+    date = str(datetime.now())[0:10]
+    filename = 'wiktionary_contributors_%s.csv' % (date)
+    if os.path.exists(filename):
+        filename = filename[:-4] + 'D' + '.csv'
+    out = open(filename, 'wb')
     writer = csv.DictWriter(out, fieldnames = ['username', 'edits, articles, 30 dy', 'edits, other, 30 dy', 'creates, articles, 30 dy', 'creates, other, 30 dy', 'link', 'lang'], dialect='excel')
     writer.writeheader()
     
     errors = []
     
     for l in wiktio_links:
+        print l
         lang_link = l[0]
         page = urllib2.urlopen(lang_link).read()
         soup = BeautifulSoup(page, "html.parser")
@@ -95,27 +102,64 @@ def get_contributors(wiktio_links):
             except IndexError:
                 print "Index Error!"
                 print "user_data:", user_data, "user:", name, "language:", l, "\n"
-    
-    #for e in errors:
-    #    print e[0]
-    #print '*******************************'
-    #for e in errors:
-    #    print e[1]
-    #print '*******************************'
-    #for e in errors:
-    #    print e[2]
-    #print '*******************************'
-    #for e in errors:
-    #    print e[3]
-    #print '*******************************'
-    #for e in errors:
-    #    print e[4]
-    for e in errors:
-        print e[5]
-    #for e in errors:
-    #    print e[6]
+    return 1
+#    for e in errors:
+#        print e[0]
+#    print '*******************************'
+#    for e in errors:
+#        print e[1]
+#    print '*******************************'
+#    for e in errors:
+#        print e[2]
+#    print '*******************************'
+#    for e in errors:
+#        print e[3]
+#    print '*******************************'
+#    for e in errors:
+#        print e[4]
+#    for e in errors:
+#        print e[5]
+#    for e in errors:
+#        print e[6]
+
+def find_csv():
+    csv = []
+    for f in os.listdir(os.getcwd()):
+        if f.startswith("wiktionary_contributors") and f.endswith(".csv"):
+            csv.append(f)
+    return csv
 
 if __name__ == "__main__":
-    langs = get_active_languages()
-    get_contributors(langs)
+    files = find_csv()
+    if len(files) > 0:
+        latest = max(files, key=os.path.getctime)
+        date = datetime.fromtimestamp(os.path.getctime(latest))
+        print 'Most recent list of Wiktionary contributors was generated on:', date
+    else:
+        print "no list of wiktionary contributors was found in the directory"
+    do = ''
+    while True:
+        do = raw_input('Do you want to get a new list (y/n)? ')
+        if do == 'y' or do == 'n':
+            break
+    if do == 'y':
+        langs = get_active_languages()
+        if get_contributors(langs) == 1:
+            updated_files = find_csv()
+            if len(updated_files) > 0:
+                latest2 = max(updated_files, key=os.path.getctime)
+                res = filecmp.cmp(latest, latest2, shallow = False)
+                if res == True:
+                    print 'No new data.'
+                else:
+                    print 'New contributors info acquired.'
+            else:
+                print "no files to compare."
+    else:
+        print "Thank you, have a nice and productive day!"
+#    for c in cvs:
+#        t = os.path.getctime(c) 
+#        print datetime.fromtimestamp(t)
+#        print datetime.now() - datetime.fromtimestamp(t)
+