From 69ed7b48c80231d50f16945710aba6bb1dcbbb8c Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Thu, 6 Feb 2014 19:42:44 -0500 Subject: [PATCH 01/10] Here's a one-off script for #1936 --- scripts/update_user_info.py | 57 +++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 scripts/update_user_info.py diff --git a/scripts/update_user_info.py b/scripts/update_user_info.py new file mode 100644 index 0000000000..11f9620e8f --- /dev/null +++ b/scripts/update_user_info.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +"""This is a one-off script to update user_info for #1936. + +This could be generalized for #900. + +""" +import os +import time + +import requests +from gittip import wireup +from requests_oauthlib import OAuth1 + + +db = wireup.db() +oauth = OAuth1( os.environ['TWITTER_CONSUMER_KEY'] + , os.environ['TWITTER_CONSUMER_SECRET'] + , os.environ['TWITTER_ACCESS_TOKEN'] + , os.environ['TWITTER_ACCESS_TOKEN_SECRET'] + ) +elsewhere = db.all("SELECT user_id FROM ELSEWHERE WHERE platform='twitter';") +url = "https://api.twitter.com/1.1/users/show.json?user_id=%s" + +for user_id in elsewhere: + response = requests.get(url % user_id, auth=oauth) + + if response.status_code != 200: + # Who knows what happened? Bail. + # (Supposedly we shouldn't hit 429, at least). + raise SystemExit + + + # Update! + # ======= + + user_info = response.json() + + # flatten per upsert method in gittip/elsewhere/__init__.py + for k, v in user_info.items(): + user_info[k] = unicode(v) + + db.run("UPDATE elsewhere SET user_info=%s WHERE user_id=%s", (user_info, user_id)) + + + # Stay under our rate limit. + # ========================= + # We get 180 per 15 minutes for the users/show endpoint, per: + # + # https://dev.twitter.com/docs/rate-limiting/1.1/limits + + print response.headers['X-RATE-LIMIT-REMAINING'] + nremaining = int(response.headers['X-RATE-LIMIT-REMAINING']) + sleep_for = 5 + if nremaining < 180: + reset = int(response.headers['X-RATE-LIMIT-RESET']) + sleep_for = reset - time.time() + time.sleep(sleep_for) From 71e8f1a86d4325192c5cebc521fa28a855989c28 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Thu, 6 Feb 2014 19:51:13 -0500 Subject: [PATCH 02/10] Account for clock skew --- scripts/update_user_info.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/update_user_info.py b/scripts/update_user_info.py index 11f9620e8f..501d77bf00 100644 --- a/scripts/update_user_info.py +++ b/scripts/update_user_info.py @@ -54,4 +54,5 @@ if nremaining < 180: reset = int(response.headers['X-RATE-LIMIT-RESET']) sleep_for = reset - time.time() + sleep_for += 10 # Account for potential clock skew between us and Twitter. time.sleep(sleep_for) From 97b7f09d59774f638d3fc364403dc3c1edb7ac91 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Thu, 6 Feb 2014 19:54:41 -0500 Subject: [PATCH 03/10] Output *something* for HTTP failures --- scripts/update_user_info.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/update_user_info.py b/scripts/update_user_info.py index 501d77bf00..2d32c068da 100644 --- a/scripts/update_user_info.py +++ b/scripts/update_user_info.py @@ -27,6 +27,8 @@ if response.status_code != 200: # Who knows what happened? Bail. # (Supposedly we shouldn't hit 429, at least). + print response.status_code + print response.text raise SystemExit From 8769cbfa7ac87a6108d01bf7559d6952371ca6e8 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Thu, 6 Feb 2014 20:01:15 -0500 Subject: [PATCH 04/10] Oops. Left a debugging conditional in there. --- scripts/update_user_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/update_user_info.py b/scripts/update_user_info.py index 2d32c068da..b4339e306b 100644 --- a/scripts/update_user_info.py +++ b/scripts/update_user_info.py @@ -53,7 +53,7 @@ print response.headers['X-RATE-LIMIT-REMAINING'] nremaining = int(response.headers['X-RATE-LIMIT-REMAINING']) sleep_for = 5 - if nremaining < 180: + if nremaining == 0: reset = int(response.headers['X-RATE-LIMIT-RESET']) sleep_for = reset - time.time() sleep_for += 10 # Account for potential clock skew between us and Twitter. From 908cf2fdffa71a25d1968430d5dde968a2938f04 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Thu, 6 Feb 2014 20:04:15 -0500 Subject: [PATCH 05/10] Emit more helpful log --- scripts/update_user_info.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/scripts/update_user_info.py b/scripts/update_user_info.py index b4339e306b..49e04bedb8 100644 --- a/scripts/update_user_info.py +++ b/scripts/update_user_info.py @@ -18,7 +18,7 @@ , os.environ['TWITTER_ACCESS_TOKEN'] , os.environ['TWITTER_ACCESS_TOKEN_SECRET'] ) -elsewhere = db.all("SELECT user_id FROM ELSEWHERE WHERE platform='twitter';") +elsewhere = db.all("SELECT user_id FROM ELSEWHERE WHERE platform='twitter' ORDER BY id;") url = "https://api.twitter.com/1.1/users/show.json?user_id=%s" for user_id in elsewhere: @@ -44,13 +44,18 @@ db.run("UPDATE elsewhere SET user_info=%s WHERE user_id=%s", (user_info, user_id)) + # Emit a log line. + # ================ + + print response.headers['X-RATE-LIMIT-REMAINING'], user_id, user_info['screen_name'] + + # Stay under our rate limit. # ========================= # We get 180 per 15 minutes for the users/show endpoint, per: # # https://dev.twitter.com/docs/rate-limiting/1.1/limits - print response.headers['X-RATE-LIMIT-REMAINING'] nremaining = int(response.headers['X-RATE-LIMIT-REMAINING']) sleep_for = 5 if nremaining == 0: From 1d658889b46c6c18c367f3574e5bc6b9d10ffaf5 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Thu, 6 Feb 2014 20:09:54 -0500 Subject: [PATCH 06/10] Log errors instead of bailing --- scripts/update_user_info.py | 31 ++++++++++++++++++------------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/scripts/update_user_info.py b/scripts/update_user_info.py index 49e04bedb8..ebcb78941b 100644 --- a/scripts/update_user_info.py +++ b/scripts/update_user_info.py @@ -24,30 +24,35 @@ for user_id in elsewhere: response = requests.get(url % user_id, auth=oauth) + if response.status_code != 200: - # Who knows what happened? Bail. - # (Supposedly we shouldn't hit 429, at least). - print response.status_code - print response.text - raise SystemExit + # Who knows what happened? + # ======================== + # Supposedly we shouldn't hit 429, at least. + + msg = "{} {}".format(response.status_code, response.text) + + else: + + # Update! + # ======= - # Update! - # ======= + user_info = response.json() - user_info = response.json() + # flatten per upsert method in gittip/elsewhere/__init__.py + for k, v in user_info.items(): + user_info[k] = unicode(v) - # flatten per upsert method in gittip/elsewhere/__init__.py - for k, v in user_info.items(): - user_info[k] = unicode(v) + db.run("UPDATE elsewhere SET user_info=%s WHERE user_id=%s", (user_info, user_id)) - db.run("UPDATE elsewhere SET user_info=%s WHERE user_id=%s", (user_info, user_id)) + msg = user_info['screen_name'] # Emit a log line. # ================ - print response.headers['X-RATE-LIMIT-REMAINING'], user_id, user_info['screen_name'] + print response.headers['X-RATE-LIMIT-REMAINING'], user_id, msg # Stay under our rate limit. From caef92a49b6dc1f19c18fe82c5b88aec29e6d951 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Thu, 6 Feb 2014 20:12:26 -0500 Subject: [PATCH 07/10] Emit rate limit timing info --- scripts/update_user_info.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/update_user_info.py b/scripts/update_user_info.py index ebcb78941b..148112cd39 100644 --- a/scripts/update_user_info.py +++ b/scripts/update_user_info.py @@ -52,7 +52,10 @@ # Emit a log line. # ================ - print response.headers['X-RATE-LIMIT-REMAINING'], user_id, msg + nremaining = int(response.headers['X-RATE-LIMIT-REMAINING']) + reset = int(response.headers['X-RATE-LIMIT-RESET']) + + print nremaining, reset, time.time(), user_id, msg # Stay under our rate limit. @@ -61,10 +64,8 @@ # # https://dev.twitter.com/docs/rate-limiting/1.1/limits - nremaining = int(response.headers['X-RATE-LIMIT-REMAINING']) sleep_for = 5 if nremaining == 0: - reset = int(response.headers['X-RATE-LIMIT-RESET']) sleep_for = reset - time.time() sleep_for += 10 # Account for potential clock skew between us and Twitter. time.sleep(sleep_for) From 1dc47560dc64325a919b22b0a0a52557a4dc02af Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Fri, 7 Feb 2014 13:01:40 -0500 Subject: [PATCH 08/10] Modify script to use lookup instead of show; #1989 This gives us 100 at a time instead of 1! --- scripts/update_user_info.py | 46 ++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/scripts/update_user_info.py b/scripts/update_user_info.py index 148112cd39..35a5bf09a3 100644 --- a/scripts/update_user_info.py +++ b/scripts/update_user_info.py @@ -5,6 +5,7 @@ """ import os +import sys import time import requests @@ -18,11 +19,23 @@ , os.environ['TWITTER_ACCESS_TOKEN'] , os.environ['TWITTER_ACCESS_TOKEN_SECRET'] ) -elsewhere = db.all("SELECT user_id FROM ELSEWHERE WHERE platform='twitter' ORDER BY id;") -url = "https://api.twitter.com/1.1/users/show.json?user_id=%s" +elsewhere = db.all("SELECT user_id FROM ELSEWHERE WHERE platform='twitter' ORDER BY id LIMIT 120;") +url = "https://api.twitter.com/1.1/users/lookup.json" -for user_id in elsewhere: - response = requests.get(url % user_id, auth=oauth) +while elsewhere: + batch = elsewhere[:100] + elsewhere = elsewhere[100:] + user_ids = ','.join([str(user_id) for user_id in batch]) + + response = requests.post(url, data={'user_id': user_ids}, auth=oauth) + + + # Log the rate-limit. + # =================== + + nremaining = int(response.headers['X-RATE-LIMIT-REMAINING']) + reset = int(response.headers['X-RATE-LIMIT-RESET']) + print nremaining, reset, time.time() if response.status_code != 200: @@ -31,36 +44,31 @@ # ======================== # Supposedly we shouldn't hit 429, at least. - msg = "{} {}".format(response.status_code, response.text) + print response.status_code, response.text else: # Update! # ======= - user_info = response.json() - - # flatten per upsert method in gittip/elsewhere/__init__.py - for k, v in user_info.items(): - user_info[k] = unicode(v) - - db.run("UPDATE elsewhere SET user_info=%s WHERE user_id=%s", (user_info, user_id)) + users = response.json() - msg = user_info['screen_name'] + for user_info in users: + # flatten per upsert method in gittip/elsewhere/__init__.py + for k, v in user_info.items(): + user_info[k] = unicode(v) - # Emit a log line. - # ================ + user_id = user_info['id'] - nremaining = int(response.headers['X-RATE-LIMIT-REMAINING']) - reset = int(response.headers['X-RATE-LIMIT-RESET']) + db.run("UPDATE elsewhere SET user_info=%s WHERE user_id=%s", (user_info, user_id)) - print nremaining, reset, time.time(), user_id, msg + print "updated {} ({})".format(user_info['screen_name'], user_id) # Stay under our rate limit. # ========================= - # We get 180 per 15 minutes for the users/show endpoint, per: + # We get 180 per 15 minutes for the users/lookup endpoint, per: # # https://dev.twitter.com/docs/rate-limiting/1.1/limits From 50cb540b5c2e7bc66c3148fdb226fd9768c6929d Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Fri, 7 Feb 2014 13:04:12 -0500 Subject: [PATCH 09/10] Take out debugging limit. :-) --- scripts/update_user_info.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/update_user_info.py b/scripts/update_user_info.py index 35a5bf09a3..0862d3952d 100644 --- a/scripts/update_user_info.py +++ b/scripts/update_user_info.py @@ -19,7 +19,7 @@ , os.environ['TWITTER_ACCESS_TOKEN'] , os.environ['TWITTER_ACCESS_TOKEN_SECRET'] ) -elsewhere = db.all("SELECT user_id FROM ELSEWHERE WHERE platform='twitter' ORDER BY id LIMIT 120;") +elsewhere = db.all("SELECT user_id FROM ELSEWHERE WHERE platform='twitter' ORDER BY id;") url = "https://api.twitter.com/1.1/users/lookup.json" while elsewhere: From 6784ed8eb7afc026323f24d580cd3ebe03d56246 Mon Sep 17 00:00:00 2001 From: Chad Whitacre Date: Fri, 7 Feb 2014 13:11:53 -0500 Subject: [PATCH 10/10] Remove errant import --- scripts/update_user_info.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/update_user_info.py b/scripts/update_user_info.py index 0862d3952d..3bdd88e560 100644 --- a/scripts/update_user_info.py +++ b/scripts/update_user_info.py @@ -5,7 +5,6 @@ """ import os -import sys import time import requests