-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathrefresh.py
64 lines (58 loc) · 2.44 KB
/
refresh.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import orm
from orm import Logfile
import sources
import os
import logging
import time
import modelutils
from typing import Optional
from model import (
get_logfile_progress,
save_logfile_progress,
add_event
)
def _refresh_from_file(file, src, sess):
logging.debug(file)
logfile = get_logfile_progress(sess, file)
logging.info("Refreshing from: {}".format(file))
with open(logfile.source_url, 'rb') as f:
logging.debug('offset: {}'.format(logfile.current_key))
f.seek(logfile.current_key)
iter = 0
for line in f:
try:
data = modelutils.logline_to_dict(line.decode())
data["src_abbr"] = src.name
if not ('type' in data and data['type'] == 'crash'):
add_event(sess, data)
except KeyError as e:
logging.error('key {} not found'.format(e))
except Exception as e: # how scandalous! Don't want one broken line to break everything
logging.exception('Something unexpected happened, skipping this event')
iter += 1
logfile.current_key += len(line)
if iter % 1000 == 0: # don't spam commits
sess.commit()
logfile.current_key = f.tell()
sess.commit()
# fetch newest data into the DB
def refresh(sources_file: str, sources_dir: str, fetch: Optional[bool]=True):
t_i = time.time()
source_data = sources.source_data(sources_file)
if fetch:
sources.download_sources(sources_file, sources_dir)
with orm.get_session() as sess:
for src in os.scandir(sources_dir):
if not src.is_file() and src.name in source_data:
expected_files = [sources.url_to_filename(x) for _, x in
source_data[src.name].items()]
logging.debug('scanning {} files, expect [{}]'.format(src.name, ','.join(expected_files)))
# it is important that this refresh first so we get begins
# before ends!
milestones = os.path.join(src.path,
sources.url_to_filename(source_data[src.name]["milestones"]))
_refresh_from_file(milestones, src, sess)
logfile = os.path.join(src.path,
sources.url_to_filename(source_data[src.name]["logfile"]))
_refresh_from_file(logfile, src, sess)
logging.info('Refreshed in {} seconds'.format(time.time() - t_i))