forked from rrenaud/dominionstats
-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathupdate.py
160 lines (126 loc) · 5.25 KB
/
update.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/python
"""Standard update script to keep data up-to-date."""
import celery
import collections
import datetime
import logging
import time
import analyze
import background.tasks
import count_buys
import dominionstats.utils.log
import goal_stats
import load_leaderboard
import optimal_card_ratios
import run_trueskill
import scrape_leaderboard
import utils
# Module-level logging instance
log = logging.getLogger(__name__)
log.addHandler(logging.NullHandler())
def summarize_task_status(c):
"""Return a string summarize the state of the task and its children"""
return "{tot_tasks} subtasks: {detail}".format(tot_tasks=sum(c.values()),
detail=str(c))
def watch_and_log(signature, log_interval=15, timeout=600):
"""Invoke the celery task via the passed signature, wait for it an
all its children to complete, and log progress along the way.
log_interval: number of seconds between checking and logging the
status
timeout: number of seconds after which to return, when there have
been no subtask status updates"""
task_name = signature.task
log.info("Calling background task %s", task_name)
async_result = signature.apply_async()
all_done = False
last_status_summary = None
last_status_update = time.time()
while not all_done:
# Wait for the log_interval, then check the status
time.sleep(log_interval)
c = collections.Counter()
try:
# Setting intermediate to False should cause the
# IncompleteStream exception to be thrown if the task and
# its children aren't all complete.
for parent, child in async_result.iterdeps(intermediate=False):
c[child.state] += 1
all_done = True
except celery.exceptions.IncompleteStream:
status_summary = summarize_task_status(c)
log.info("Waiting for %s: %s", task_name, status_summary)
# Check on timeout condition
if (last_status_summary is not None
and status_summary == last_status_summary
and (time.time() - last_status_update) > timeout):
break
else:
last_status_summary = status_summary
last_status_update = time.time()
if all_done:
log.info("Done with background task %s: %s", task_name, summarize_task_status(c))
else:
log.warning("Returning due to timeout during background task %s: %s", task_name, summarize_task_status(c))
return async_result
def main(parsed_args):
"""Primary update cycle"""
# Scrape and load the data from isotropic, proceeding from the
# current day backwards, until no games are inserted
log.info("Starting scrape for raw games")
for date in utils.daterange(datetime.date(2010, 10, 15),
datetime.date.today(), reverse=True):
log.info("Invoking scrape_raw_games async task for %s", date)
async_result = watch_and_log(background.tasks.scrape_raw_games.s(date))
inserted = async_result.get()
if inserted is None:
log.info("Nothing processed for %s", date)
elif inserted == 0:
log.info("No games inserted for %s", date)
break
# Invoke the analyze script
log.info("Starting analyze")
analyze.main(parsed_args)
# Check for goals
log.info("Starting search for goals acheived")
for date in utils.daterange(datetime.date(2010, 10, 15),
datetime.date.today(), reverse=True):
log.info("Invoking calc_goals_for_days async task for %s", date)
async_result = watch_and_log(background.tasks.calc_goals_for_days.s([date]))
inserted = async_result.get()
if inserted == 0:
log.info("No games parsed for goals on %s", date)
break
# Check for game_stats
log.info("Starting game_stats summarization")
for date in utils.daterange(datetime.date(2010, 10, 15),
datetime.date.today(), reverse=True):
log.info("Invoking summarize_game_stats_for_days async task for %s", date)
async_result = watch_and_log(background.tasks.summarize_game_stats_for_days.s([date]))
inserted = async_result.get()
if inserted == 0:
log.info("No new games summarized on %s", date)
break
# Invoke the count_buys script
log.info("Counting buys")
count_buys.main(parsed_args)
# Invoke the run_trueskill script
log.info("Calculating trueskill")
run_trueskill.main(parsed_args)
# Invoke the optimal_card_ratios script
log.info("Calculating optimal card ratios")
optimal_card_ratios.main(parsed_args)
# Invoke the goal_stats script
log.info("Calculating goal stats")
goal_stats.main(parsed_args)
# Invoke the scrape_leaderboard script
log.info("Scraping the leaderboard")
scrape_leaderboard.main()
# Invoke the load_leaderboard script
log.info("Loading the leaderboard")
load_leaderboard.main()
log.info("Done with the update.py process")
if __name__ == '__main__':
parser = utils.incremental_max_parser()
args = parser.parse_args()
dominionstats.utils.log.initialize_logging(args.debug)
main(args)