Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add live texts parsing to the CRON task #86

Closed
wants to merge 9 commits into from
8 changes: 5 additions & 3 deletions tlfp/format_data_for_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ def dump_success_log(output_dir, log):
f.write(log)
textid = output_dir.split('/')[-1]
api_dir = output_dir.replace('/' + textid, '')
err_log = os.path.join(api_dir, 'logs', textid)
if os.path.exists(err_log):
os.remove(err_log)

for err_dir in ('logs', 'logs-encours'):
err_log = os.path.join(api_dir, err_dir, textid)
if os.path.exists(err_log):
os.remove(err_log)


def process(dos, OUTPUT_DIR, log=io.StringIO(), skip_already_done=False):
Expand Down
16 changes: 11 additions & 5 deletions tlfp/generate_dossiers_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,23 @@
if dos.get('url_jo'):
total_promulgues += 1

total_encours = total_doslegs - total_promulgues

erreurs = len(glob.glob(os.path.join(API_DIRECTORY, 'logs/*')))
erreurs_encours = len(glob.glob(os.path.join(API_DIRECTORY, 'logs-encours/*')))

max_promulgues = total_promulgues + erreurs
max_encours = total_encours + erreurs_encours
maximum = max_promulgues + max_encours

print(total_doslegs, 'doslegs in csv')
print(total_promulgues, 'promulgués')
print(erreurs, 'parsings échoués')
print('%.1f%s OK' % (100*total_promulgues/(total_promulgues + erreurs), '%'))
print('%.1f%s (%d/%d)' % (100*total_promulgues/max_promulgues, '%', total_promulgues, max_promulgues), 'de promulgués qui passent')
print('%.1f%s (%d/%d)' % (100*total_encours/max_encours, '%', total_encours, max_encours), 'de textes en cours qui passent')

home_json_final = {
"total": total_promulgues,
"encours": total_doslegs - total_promulgues,
"maximum": total_promulgues + erreurs
"encours": total_encours,
"maximum": max_promulgues,
}
home_json_data.sort(key=lambda x: -x['total_amendements'])
home_json_final["focus"] = {
Expand Down
12 changes: 8 additions & 4 deletions tlfp/parse_one.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def __getattr__(self, attr):
sys.stderr = _stderr


def dump_error_log(url, exception, api_dir, log):
def dump_error_log(url, exception, logdir, log):
log = log.getvalue() + '\n' + ''.join(traceback.format_tb(exception.__traceback__))

url_id = url.replace('/', '')
Expand All @@ -134,8 +134,8 @@ def dump_error_log(url, exception, api_dir, log):
elif 'senat.fr' in url:
url_id = url.split('/')[-1].replace('.html', '')

mkdirs(os.path.join(api_dir, 'logs'))
logfile = os.path.join(api_dir, 'logs', url_id)
mkdirs(logdir)
logfile = os.path.join(logdir, url_id)

print('[error] parsing', url, 'failed. Details in', logfile)
open(logfile, 'w').write(log)
Expand All @@ -147,6 +147,7 @@ def process(API_DIRECTORY, url):
verbose = '--quiet' not in sys.argv
if not disable_cache:
enable_requests_cache()
dos = None
with log_print(io.StringIO()) as log:
try:
if verbose:
Expand Down Expand Up @@ -193,7 +194,10 @@ def process(API_DIRECTORY, url):
raise e
except Exception as e:
# dump log for each failed doslegs in logs/
dump_error_log(url, e, API_DIRECTORY, log)
logdir = os.path.join(API_DIRECTORY, 'logs')
if dos and not dos.get('url_jo'):
logdir = os.path.join(API_DIRECTORY, 'logs-encours')
dump_error_log(url, e, logdir, log)
raise e


Expand Down
9 changes: 9 additions & 0 deletions update_promulgues.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,17 @@ pyenv activate lafabrique

DATADIR=data

echo "Parsing new promulgated texts..."
senapy-cli doslegs_urls --min-year=$((`date +%Y`)) | tlfp-parse-many $DATADIR --only-promulgated --quiet

echo
echo "Parsing texts in discussion in the Senate..."
senapy-cli doslegs_urls --in-discussion | tlfp-parse-many $DATADIR --quiet

echo
echo "Parsing texts in discussion in the National Assembly..."
anpy-cli doslegs_urls --in-discussion | tlfp-parse-many $DATADIR --quiet

echo
python tlfp/generate_dossiers_csv.py $DATADIR
python tlfp/tools/assemble_procedures.py $DATADIR
Expand Down