From 50bdabd4f3da073ca9e26d094e088b96faca4ee7 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Wed, 4 Dec 2024 16:29:47 +0100 Subject: [PATCH 01/19] Add sentry cron job monitoring for release color script --- listenbrainz/db/similar_users.py | 32 ++-------------------- mbid_mapping/docker/consul_config.py.ctmpl | 11 ++++++++ mbid_mapping/mapping/release_colors.py | 6 ++++ 3 files changed, 20 insertions(+), 29 deletions(-) diff --git a/listenbrainz/db/similar_users.py b/listenbrainz/db/similar_users.py index 3e3451ce97..39f16b1a78 100644 --- a/listenbrainz/db/similar_users.py +++ b/listenbrainz/db/similar_users.py @@ -44,16 +44,8 @@ def import_user_similarities(data): target_user_count += len(similar.keys()) execute_values(curs, query, values, page_size=ROWS_PER_BATCH, template=None) - conn.commit() - except psycopg2.errors.OperationalError as err: - conn.rollback() - current_app.logger.error("Error: Cannot import user similarites: %s" % str(err)) - return 0, 0.0, "Error: Cannot import user similarites: %s" % str(err) - - # Next lookup user names and insert them into the new similar_users table - try: - with conn.cursor() as curs: + # Next lookup user names and insert them into the new similar_users table curs.execute( """DROP TABLE IF EXISTS recommendation.tmp_similar_user""") curs.execute("""CREATE TABLE recommendation.tmp_similar_user @@ -77,36 +69,18 @@ def import_user_similarities(data): FOREIGN KEY (user_id) REFERENCES "user" (id) ON DELETE CASCADE""" % int(time.time())) - conn.commit() - - except psycopg2.errors.OperationalError as err: - conn.rollback() - current_app.logger.error("Error: Cannot correlate user similarity user name: %s" % str(err)) - return 0, 0.0, "Error: Cannot correlate user similarity user name: %s" % str(err) - # Finally rotate the table into place - try: - with conn.cursor() as curs: curs.execute("""ALTER TABLE recommendation.similar_user RENAME TO delete_similar_user""") curs.execute("""ALTER TABLE recommendation.tmp_similar_user RENAME TO similar_user""") - conn.commit() - except psycopg2.errors.OperationalError as err: - conn.rollback() - current_app.logger.error("Error: Failed to rotate similar_users table into place: %s" % str(err)) - return 0, 0.0, "Error: Failed to rotate similar_users table into place: %s" % str(err) - - # Last, delete the old table - try: - with conn.cursor() as curs: curs.execute("""DROP TABLE recommendation.delete_similar_user CASCADE""") conn.commit() except psycopg2.errors.OperationalError as err: conn.rollback() - current_app.logger.error("Error: Failed to clean up old similar user table: %s" % str(err)) - return 0, 0.0, "Error: Failed to clean up old similar user table: %s" % str(err) + current_app.logger.error("Error: Cannot rotate similar users data: %s" % str(err)) + return 0, 0.0, "Error: Cannot rotate similar users data: %s" % str(err) return user_count, target_user_count / user_count, "" diff --git a/mbid_mapping/docker/consul_config.py.ctmpl b/mbid_mapping/docker/consul_config.py.ctmpl index 9cfd0a3d7d..3409c912e3 100644 --- a/mbid_mapping/docker/consul_config.py.ctmpl +++ b/mbid_mapping/docker/consul_config.py.ctmpl @@ -1,3 +1,5 @@ +import os + {{- define "KEY" -}} {{ key (printf "docker-server-configs/LB/config.%s.json/%s" (env "DEPLOY_ENV") .) }} {{- end -}} @@ -82,3 +84,12 @@ TYPESENSE_API_KEY = '''{{template "KEY" "typesense_api_key"}}''' # For debugging, only fetches a tiny portion of the data if True USE_MINIMAL_DATASET = False + +# Sentry config +LOG_SENTRY = { + 'dsn': '''{{template "KEY" "sentry/dsn"}}''', + 'environment': '''{{template "KEY" "sentry/environment"}}''', + 'release': os.getenv('GIT_SHA', None), + 'traces_sample_rate': {{template "KEY" "sentry/traces_sample_rate"}}, +} +DATASETS_SENTRY_DSN = '''{{template "KEY" "sentry/datasets_dsn"}}''' diff --git a/mbid_mapping/mapping/release_colors.py b/mbid_mapping/mapping/release_colors.py index 72c11d61ff..0e41953a7e 100755 --- a/mbid_mapping/mapping/release_colors.py +++ b/mbid_mapping/mapping/release_colors.py @@ -6,6 +6,9 @@ import psycopg2 from psycopg2.extensions import register_adapter import requests +import sentry_sdk +from sentry_sdk.crons import capture_checkin +from sentry_sdk.crons.consts import MonitorStatus from brainzutils import metrics, cache import config @@ -284,6 +287,8 @@ def compare_coverart(mb_query, lb_query, mb_caa_index, lb_caa_index, mb_compare_ the corresponding compare key. The starting indexes (the current comparison index into the data) must be provided and match the type of the comparison keys. """ + sentry_sdk.init(config.LOG_SENTRY["dsn"]) + check_in_id = capture_checkin(monitor_slug='caa-color-sync', status=MonitorStatus.IN_PROGRESS) with psycopg2.connect(config.MB_DATABASE_STANDBY_URI) as mb_conn, \ mb_conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as mb_curs, \ psycopg2.connect(config.SQLALCHEMY_DATABASE_URI) as lb_conn, \ @@ -373,3 +378,4 @@ def compare_coverart(mb_query, lb_query, mb_caa_index, lb_caa_index, mb_compare_ lb_caa_count=lb_count ) + capture_checkin(monitor_slug='caa-color-sync', check_in_id=check_in_id, status=MonitorStatus.OK) From e657ca96d8958af2c91d1731d45d059f12b1cbbf Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Wed, 4 Dec 2024 16:43:47 +0100 Subject: [PATCH 02/19] Undo accidental commit --- listenbrainz/db/similar_users.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/listenbrainz/db/similar_users.py b/listenbrainz/db/similar_users.py index 39f16b1a78..3e3451ce97 100644 --- a/listenbrainz/db/similar_users.py +++ b/listenbrainz/db/similar_users.py @@ -44,8 +44,16 @@ def import_user_similarities(data): target_user_count += len(similar.keys()) execute_values(curs, query, values, page_size=ROWS_PER_BATCH, template=None) + conn.commit() - # Next lookup user names and insert them into the new similar_users table + except psycopg2.errors.OperationalError as err: + conn.rollback() + current_app.logger.error("Error: Cannot import user similarites: %s" % str(err)) + return 0, 0.0, "Error: Cannot import user similarites: %s" % str(err) + + # Next lookup user names and insert them into the new similar_users table + try: + with conn.cursor() as curs: curs.execute( """DROP TABLE IF EXISTS recommendation.tmp_similar_user""") curs.execute("""CREATE TABLE recommendation.tmp_similar_user @@ -69,18 +77,36 @@ def import_user_similarities(data): FOREIGN KEY (user_id) REFERENCES "user" (id) ON DELETE CASCADE""" % int(time.time())) + conn.commit() + + except psycopg2.errors.OperationalError as err: + conn.rollback() + current_app.logger.error("Error: Cannot correlate user similarity user name: %s" % str(err)) + return 0, 0.0, "Error: Cannot correlate user similarity user name: %s" % str(err) + # Finally rotate the table into place + try: + with conn.cursor() as curs: curs.execute("""ALTER TABLE recommendation.similar_user RENAME TO delete_similar_user""") curs.execute("""ALTER TABLE recommendation.tmp_similar_user RENAME TO similar_user""") + conn.commit() + except psycopg2.errors.OperationalError as err: + conn.rollback() + current_app.logger.error("Error: Failed to rotate similar_users table into place: %s" % str(err)) + return 0, 0.0, "Error: Failed to rotate similar_users table into place: %s" % str(err) + + # Last, delete the old table + try: + with conn.cursor() as curs: curs.execute("""DROP TABLE recommendation.delete_similar_user CASCADE""") conn.commit() except psycopg2.errors.OperationalError as err: conn.rollback() - current_app.logger.error("Error: Cannot rotate similar users data: %s" % str(err)) - return 0, 0.0, "Error: Cannot rotate similar users data: %s" % str(err) + current_app.logger.error("Error: Failed to clean up old similar user table: %s" % str(err)) + return 0, 0.0, "Error: Failed to clean up old similar user table: %s" % str(err) return user_count, target_user_count / user_count, "" From 80b2e92b5f17fad48b415d4c167e6f290a7f52ab Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Fri, 20 Dec 2024 11:25:54 +0100 Subject: [PATCH 03/19] Interim check-in --- mbid_mapping/docker/crontab | 4 +-- mbid_mapping/manage.py | 14 +++++++-- mbid_mapping/mapping/cron_wrappers.py | 41 ++++++++++++++++++++++++++ mbid_mapping/mapping/release_colors.py | 7 ----- 4 files changed, 54 insertions(+), 12 deletions(-) create mode 100644 mbid_mapping/mapping/cron_wrappers.py diff --git a/mbid_mapping/docker/crontab b/mbid_mapping/docker/crontab index 4b13280129..e822ea6b84 100644 --- a/mbid_mapping/docker/crontab +++ b/mbid_mapping/docker/crontab @@ -1,8 +1,8 @@ # Create the mapping indexes (typesense, canonical data tables) each day at 4am -0 4 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py create-all >> /code/mapper/lb-cron.log 2>&1 +0 4 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-create-all >> /code/mapper/lb-cron.log 2>&1 # Run the huesound color sync hourly -10 * * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py update-coverart >> /code/mapper/lb-cron.log 2>&1 +10 * * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-update-coverart >> /code/mapper/lb-cron.log 2>&1 # Rebuild the spotify metadata index every friday at 1 A.M. 0 1 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py build-spotify-metadata-index >> /code/mapper/cron-spotify-metadata-index.log 2>&1 diff --git a/mbid_mapping/manage.py b/mbid_mapping/manage.py index 83267e6f64..cf43cb74c5 100755 --- a/mbid_mapping/manage.py +++ b/mbid_mapping/manage.py @@ -13,7 +13,7 @@ from mapping.typesense_index import build_all as action_build_index from mapping.mapping_test.mapping_test import test_mapping as action_test_mapping from mapping.utils import log, CRON_LOG_FILE -from mapping.release_colors import sync_release_color_table, incremental_update_release_color_table +from mapping.release_colors import sync_release_color_table from reports.tracks_of_the_year import calculate_tracks_of_the_year from reports.top_discoveries import calculate_top_discoveries from mapping.mb_metadata_cache import create_mb_metadata_cache, incremental_update_mb_metadata_cache, \ @@ -22,6 +22,7 @@ incremental_update_mb_release_group_cache from mapping.spotify_metadata_index import create_spotify_metadata_index from mapping.apple_metadata_index import create_apple_metadata_index +from mapping.cron_wrapper import cron_wrapper_create_all, cron_incremental_update_release_color_table from similar.tag_similarity import create_tag_similarity @@ -39,6 +40,13 @@ def create_all(): create_canonical_musicbrainz_data(True) action_build_index() +@cli.command() +def cron_create_all(): + """ + Create all canonical data in one go as a monitored cron job. First mb canonical data, then its typesense index. + """ + cron_wrapper_create_all() + @cli.command() @click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") @@ -82,11 +90,11 @@ def sync_coverart(): @cli.command() -def update_coverart(): +def cron_update_coverart(): """ Update the release_color table incrementally. Designed to be called hourly by cron. """ - incremental_update_release_color_table() + cron_incremental_update_release_color_table() @cli.command() diff --git a/mbid_mapping/mapping/cron_wrappers.py b/mbid_mapping/mapping/cron_wrappers.py new file mode 100644 index 0000000000..84aadfb1a5 --- /dev/null +++ b/mbid_mapping/mapping/cron_wrappers.py @@ -0,0 +1,41 @@ +from mapping.canonical_musicbrainz_data import create_canonical_musicbrainz_data +from mapping.release_colors import incremental_update_release_color_table +from mapping.typesense_index import build_all as action_build_index +import sentry_sdk +from sentry_sdk.crons import capture_checkin +from sentry_sdk.crons.consts import MonitorStatus + + +def cron_wrapper_create_all(): + """ Run a monitored cron job to create the canonical data and the typesense index """ + + slug='canonical-data-typesense-index' + sentry_sdk.init(config.LOG_SENTRY["dsn"]) + check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) + + try: + create_canonical_musicbrainz_data(True) + action_build_index() + except Exception as err: + print("Exception: %s" % err) + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) + return + + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + + +def cron_incremental_update_release_color_table(): + """ Update the release_colors table. """ + + slug = 'caa-color-sync' + sentry_sdk.init(config.LOG_SENTRY["dsn"]) + check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) + + try: + incremental_update_release_color_table() + except Exception as err: + print("Exception: %s" % err) + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) + return + + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) diff --git a/mbid_mapping/mapping/release_colors.py b/mbid_mapping/mapping/release_colors.py index 0e41953a7e..eff05dd85f 100755 --- a/mbid_mapping/mapping/release_colors.py +++ b/mbid_mapping/mapping/release_colors.py @@ -6,9 +6,6 @@ import psycopg2 from psycopg2.extensions import register_adapter import requests -import sentry_sdk -from sentry_sdk.crons import capture_checkin -from sentry_sdk.crons.consts import MonitorStatus from brainzutils import metrics, cache import config @@ -287,8 +284,6 @@ def compare_coverart(mb_query, lb_query, mb_caa_index, lb_caa_index, mb_compare_ the corresponding compare key. The starting indexes (the current comparison index into the data) must be provided and match the type of the comparison keys. """ - sentry_sdk.init(config.LOG_SENTRY["dsn"]) - check_in_id = capture_checkin(monitor_slug='caa-color-sync', status=MonitorStatus.IN_PROGRESS) with psycopg2.connect(config.MB_DATABASE_STANDBY_URI) as mb_conn, \ mb_conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as mb_curs, \ psycopg2.connect(config.SQLALCHEMY_DATABASE_URI) as lb_conn, \ @@ -377,5 +372,3 @@ def compare_coverart(mb_query, lb_query, mb_caa_index, lb_caa_index, mb_compare_ caa_front_count=mb_count, lb_caa_count=lb_count ) - - capture_checkin(monitor_slug='caa-color-sync', check_in_id=check_in_id, status=MonitorStatus.OK) From d4bb724dd2209fcf024f47dff9cfa4095ee56424 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Fri, 20 Dec 2024 12:55:59 +0100 Subject: [PATCH 04/19] Interim checkin --- mbid_mapping/docker/crontab | 8 +- mbid_mapping/manage.py | 62 +++++- mbid_mapping/manage_cron.py | 301 ++++++++++++++++++++++++++ mbid_mapping/mapping/cron_wrappers.py | 119 +++++++++- 4 files changed, 475 insertions(+), 15 deletions(-) create mode 100755 mbid_mapping/manage_cron.py diff --git a/mbid_mapping/docker/crontab b/mbid_mapping/docker/crontab index e822ea6b84..37bd014312 100644 --- a/mbid_mapping/docker/crontab +++ b/mbid_mapping/docker/crontab @@ -5,16 +5,16 @@ 10 * * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-update-coverart >> /code/mapper/lb-cron.log 2>&1 # Rebuild the spotify metadata index every friday at 1 A.M. -0 1 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py build-spotify-metadata-index >> /code/mapper/cron-spotify-metadata-index.log 2>&1 +0 1 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-build-spotify-metadata-index >> /code/mapper/cron-spotify-metadata-index.log 2>&1 # Rebuild the apple music metadata index every friday at 2 A.M. -0 2 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py build-apple-metadata-index >> /code/mapper/cron-apple-metadata-index.log 2>&1 +0 2 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-build-apple-metadata-index >> /code/mapper/cron-apple-metadata-index.log 2>&1 # Rebuild the soundcloud music metadata index every friday at 3 A.M. -0 3 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py build-soundcloud-metadata-index >> /code/mapper/cron-soundcloud-metadata-index.log 2>&1 +0 3 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-build-soundcloud-metadata-index >> /code/mapper/cron-soundcloud-metadata-index.log 2>&1 # Rebuild similar tag data at 2am sundays -0 2 * * 0 listenbrainz /usr/local/bin/python /code/mapper/manage.py build-tag-similarity >> /code/mapper/cron-tag-similarity.log 2>&1 +0 2 * * 0 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-build-tag-similarity >> /code/mapper/cron-tag-similarity.log 2>&1 # Build the mb entity caches from scratch on first thursday of every month, MB dumps run on Wed and Sat so avoid those days 0 15 * * 4 listenbrainz [ $(date +\%d) -le 7 ] && /usr/local/bin/python /code/mapper/manage.py cron-build-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 diff --git a/mbid_mapping/manage.py b/mbid_mapping/manage.py index cf43cb74c5..dc2dfd8dcb 100755 --- a/mbid_mapping/manage.py +++ b/mbid_mapping/manage.py @@ -22,7 +22,15 @@ incremental_update_mb_release_group_cache from mapping.spotify_metadata_index import create_spotify_metadata_index from mapping.apple_metadata_index import create_apple_metadata_index -from mapping.cron_wrapper import cron_wrapper_create_all, cron_incremental_update_release_color_table +from mapping.cron_wrappers import cron_wrapper_create_all, \ + cron_incremental_update_release_color_table, \ + cron_create_spotify_metadata_index, \ + cron_create_apple_metadata_index, \ + cron_create_soundcloud_metadata_index, \ + cron_build_all_mb_caches as cron_wrapper_cron_build_all_mb_caches, \ + cron_update_all_mb_caches as cron_wrapper_cron_update_all_mb_caches, \ + cron_wrapper_create_all, \ + cron_create_tag_similarity from similar.tag_similarity import create_tag_similarity @@ -31,6 +39,10 @@ def cli(): pass +# Add the "cron" submenu +cron_cli.short_help = "cron jobs -- do not invoke manually." +cli.add_command(cron_cli, name="cron") + @cli.command() def create_all(): @@ -195,24 +207,18 @@ def cron_build_mb_metadata_cache(): @cli.command() @click.pass_context def cron_build_all_mb_caches(ctx): + """ Build all mb entity metadata cache and tables it depends on in production in appropriate databases. After building the cache, cleanup mbid_mapping table. """ - ctx.invoke(cron_build_mb_metadata_cache) - ctx.invoke(build_mb_artist_metadata_cache) - ctx.invoke(build_mb_release_group_cache) + cron_wrapper_cron_build_all_mb_caches() @cli.command() @click.pass_context def cron_update_all_mb_caches(ctx): """ Update all mb entity metadata cache in ListenBrainz. """ - - # In this context we want to use mb_conn, not lb_conn, like the functions that follow - update_canonical_release_data(False) - ctx.invoke(update_mb_metadata_cache) - ctx.invoke(update_mb_artist_metadata_cache) - ctx.invoke(update_mb_release_group_cache) + cron_wrapper_cron_update_all_mb_caches() @cli.command() @@ -224,6 +230,15 @@ def build_spotify_metadata_index(use_lb_conn): create_spotify_metadata_index(use_lb_conn) +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def build_cron_spotify_metadata_index(use_lb_conn): + """ + Build the spotify metadata index that LB uses invoked via cron + """ + cron_create_spotify_metadata_index(use_lb_conn) + + @cli.command() @click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") def build_apple_metadata_index(use_lb_conn): @@ -233,6 +248,15 @@ def build_apple_metadata_index(use_lb_conn): create_apple_metadata_index(use_lb_conn) +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def cron_build_apple_metadata_index(use_lb_conn): + """ + Build the Apple Music metadata index that LB uses invoked from cron + """ + cron_create_apple_metadata_index(use_lb_conn) + + @cli.command() @click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") def build_soundcloud_metadata_index(use_lb_conn): @@ -241,6 +265,16 @@ def build_soundcloud_metadata_index(use_lb_conn): """ create_soundcloud_metadata_index(use_lb_conn) + +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def cron_build_soundcloud_metadata_index(use_lb_conn): + """ + Build the Soundcloud Music metadata index that LB usesa invoked from cron + """ + cron_create_soundcloud_metadata_index(use_lb_conn) + + @cli.command() def build_tag_similarity(): """ @@ -249,6 +283,14 @@ def build_tag_similarity(): create_tag_similarity() +@cli.command() +def cron_build_tag_similarity(): + """ + Build the tag similarity data invoked via cron + """ + cron_create_tag_similarity() + + def usage(command): with click.Context(command) as ctx: click.echo(command.get_help(ctx)) diff --git a/mbid_mapping/manage_cron.py b/mbid_mapping/manage_cron.py new file mode 100755 index 0000000000..dc2dfd8dcb --- /dev/null +++ b/mbid_mapping/manage_cron.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python3 + +import sys +import os +import subprocess + +import click + +from mapping.canonical_musicbrainz_data import create_canonical_musicbrainz_data, update_canonical_release_data +from mapping.mb_artist_metadata_cache import create_mb_artist_metadata_cache, \ + incremental_update_mb_artist_metadata_cache +from mapping.soundcloud_metadata_index import create_soundcloud_metadata_index +from mapping.typesense_index import build_all as action_build_index +from mapping.mapping_test.mapping_test import test_mapping as action_test_mapping +from mapping.utils import log, CRON_LOG_FILE +from mapping.release_colors import sync_release_color_table +from reports.tracks_of_the_year import calculate_tracks_of_the_year +from reports.top_discoveries import calculate_top_discoveries +from mapping.mb_metadata_cache import create_mb_metadata_cache, incremental_update_mb_metadata_cache, \ + cleanup_mbid_mapping_table +from mapping.mb_release_group_cache import create_mb_release_group_cache, \ + incremental_update_mb_release_group_cache +from mapping.spotify_metadata_index import create_spotify_metadata_index +from mapping.apple_metadata_index import create_apple_metadata_index +from mapping.cron_wrappers import cron_wrapper_create_all, \ + cron_incremental_update_release_color_table, \ + cron_create_spotify_metadata_index, \ + cron_create_apple_metadata_index, \ + cron_create_soundcloud_metadata_index, \ + cron_build_all_mb_caches as cron_wrapper_cron_build_all_mb_caches, \ + cron_update_all_mb_caches as cron_wrapper_cron_update_all_mb_caches, \ + cron_wrapper_create_all, \ + cron_create_tag_similarity +from similar.tag_similarity import create_tag_similarity + + + +@click.group() +def cli(): + pass + +# Add the "cron" submenu +cron_cli.short_help = "cron jobs -- do not invoke manually." +cli.add_command(cron_cli, name="cron") + + +@cli.command() +def create_all(): + """ + Create all canonical data in one go. First mb canonical data, then its typesense index. + """ + create_canonical_musicbrainz_data(True) + action_build_index() + +@cli.command() +def cron_create_all(): + """ + Create all canonical data in one go as a monitored cron job. First mb canonical data, then its typesense index. + """ + cron_wrapper_create_all() + + +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def canonical_data(use_lb_conn): + """ + Create the MBID Mapping tables. (mbid_mapping, mbid_mapping_release, canonical_recording, recording_canonical_release) + """ + create_canonical_musicbrainz_data(use_lb_conn) + + +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def update_canonical_releases(use_lb_conn): + """ + Update only the canonical releases table + """ + update_canonical_release_data(use_lb_conn) + +@cli.command() +def test_mapping(): + """ + Test the created mbid mapping. The MBID mapping must have been created before running this. + """ + action_test_mapping() + + +@cli.command() +def build_index(): + """ + Build the typesense index of the mbid mapping. The mbid mapping must be run first in order to build this index. + """ + action_build_index() + + +@cli.command() +def sync_coverart(): + """ + Force a re-sync of the release_color table, in case it has gone out of sync. + """ + sync_release_color_table() + + +@cli.command() +def cron_update_coverart(): + """ + Update the release_color table incrementally. Designed to be called hourly by cron. + """ + cron_incremental_update_release_color_table() + + +@cli.command() +def cron_log(): + """ + Print the internal cron log file for debugging purposes. + """ + if os.path.exists(CRON_LOG_FILE): + log("Current cron job log file:") + subprocess.run(["cat", CRON_LOG_FILE]) + else: + log("Log file is empty") + + +@cli.command() +@click.argument('year', type=int) +def top_discoveries(year): + """ + Top discoveries for year -- this creates a table in the mapping schema of the provided mb-docker database + that lists all the tracks that a user listened to the first time in the given year. + """ + calculate_top_discoveries(year) + + +@cli.command() +@click.argument('year', type=int) +def top_tracks(year): + """ + Tracks for the year -- this also creates a table in the mapping schema, where this one creates a historgram + of which tracks and how many times a user played for a given year. + """ + calculate_tracks_of_the_year(year) + + +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def build_mb_metadata_cache(use_lb_conn): + """ + Build the MB metadata cache that LB uses + """ + create_mb_metadata_cache(use_lb_conn) + + +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def build_mb_release_group_cache(use_lb_conn): + """ + Build the MB release group cache that LB uses + """ + create_mb_release_group_cache(use_lb_conn) + + +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def build_mb_artist_metadata_cache(use_lb_conn): + """ + Build the MB release group cache that LB uses + """ + create_mb_artist_metadata_cache(use_lb_conn) + + +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def update_mb_metadata_cache(use_lb_conn): + """ + Update the MB metadata cache that LB uses incrementally. + """ + incremental_update_mb_metadata_cache(use_lb_conn) + + +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def update_mb_release_group_cache(use_lb_conn): + """ + Update the MB metadata cache that LB uses incrementally. + """ + incremental_update_mb_release_group_cache(use_lb_conn) + + +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def update_mb_artist_metadata_cache(use_lb_conn): + """ + Update the MB metadata cache that LB uses incrementally. + """ + incremental_update_mb_artist_metadata_cache(use_lb_conn) + + +@cli.command() +def cron_build_mb_metadata_cache(): + """ Build the mb metadata cache and tables it depends on in production in appropriate databases. + After building the cache, cleanup mbid_mapping table. + """ + create_mb_metadata_cache(True) + cleanup_mbid_mapping_table() + + +@cli.command() +@click.pass_context +def cron_build_all_mb_caches(ctx): + + """ Build all mb entity metadata cache and tables it depends on in production in appropriate + databases. After building the cache, cleanup mbid_mapping table. + """ + cron_wrapper_cron_build_all_mb_caches() + + +@cli.command() +@click.pass_context +def cron_update_all_mb_caches(ctx): + """ Update all mb entity metadata cache in ListenBrainz. """ + cron_wrapper_cron_update_all_mb_caches() + + +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def build_spotify_metadata_index(use_lb_conn): + """ + Build the spotify metadata index that LB uses + """ + create_spotify_metadata_index(use_lb_conn) + + +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def build_cron_spotify_metadata_index(use_lb_conn): + """ + Build the spotify metadata index that LB uses invoked via cron + """ + cron_create_spotify_metadata_index(use_lb_conn) + + +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def build_apple_metadata_index(use_lb_conn): + """ + Build the Apple Music metadata index that LB uses + """ + create_apple_metadata_index(use_lb_conn) + + +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def cron_build_apple_metadata_index(use_lb_conn): + """ + Build the Apple Music metadata index that LB uses invoked from cron + """ + cron_create_apple_metadata_index(use_lb_conn) + + +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def build_soundcloud_metadata_index(use_lb_conn): + """ + Build the Soundcloud Music metadata index that LB uses + """ + create_soundcloud_metadata_index(use_lb_conn) + + +@cli.command() +@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") +def cron_build_soundcloud_metadata_index(use_lb_conn): + """ + Build the Soundcloud Music metadata index that LB usesa invoked from cron + """ + cron_create_soundcloud_metadata_index(use_lb_conn) + + +@cli.command() +def build_tag_similarity(): + """ + Build the tag similarity data + """ + create_tag_similarity() + + +@cli.command() +def cron_build_tag_similarity(): + """ + Build the tag similarity data invoked via cron + """ + cron_create_tag_similarity() + + +def usage(command): + with click.Context(command) as ctx: + click.echo(command.get_help(ctx)) + + +if __name__ == "__main__": + cli() + sys.exit(0) diff --git a/mbid_mapping/mapping/cron_wrappers.py b/mbid_mapping/mapping/cron_wrappers.py index 84aadfb1a5..44e1b30855 100644 --- a/mbid_mapping/mapping/cron_wrappers.py +++ b/mbid_mapping/mapping/cron_wrappers.py @@ -1,6 +1,16 @@ -from mapping.canonical_musicbrainz_data import create_canonical_musicbrainz_data +from mapping.canonical_musicbrainz_data import create_canonical_musicbrainz_data, update_canonical_release_data from mapping.release_colors import incremental_update_release_color_table from mapping.typesense_index import build_all as action_build_index +from mapping.spotify_metadata_index import create_spotify_metadata_index +from mapping.apple_metadata_index import create_apple_metadata_index +from mapping.soundcloud_metadata_index import create_soundcloud_metadata_index +from mapping.mb_metadata_cache import cleanup_mbid_mapping_table, create_mb_metadata_cache, incremental_update_mb_metadata_cache +from mapping.mb_artist_metadata_cache import create_mb_artist_metadata_cache, \ + incremental_update_mb_artist_metadata_cache +from mapping.mb_release_group_cache import create_mb_release_group_cache, incremental_update_mb_release_group_cache +from similar.tag_similarity import create_tag_similarity +import config + import sentry_sdk from sentry_sdk.crons import capture_checkin from sentry_sdk.crons.consts import MonitorStatus @@ -39,3 +49,110 @@ def cron_incremental_update_release_color_table(): return capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + + +def cron_create_spotify_metadata_index(use_lb_conn): + """ Update the spotify metadata index via cron """ + + slug = 'create-spotify-metadata-index' + sentry_sdk.init(config.LOG_SENTRY["dsn"]) + check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) + + try: + create_spotify_metadata_index(use_lb_conn) + except Exception as err: + print("Exception: %s" % err) + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) + return + + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + + +def cron_create_apple_metadata_index(use_lb_conn): + """ Update the apple metadata index via cron """ + + slug = 'create-apple-metadata-index' + sentry_sdk.init(config.LOG_SENTRY["dsn"]) + check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) + + try: + create_apple_metadata_index(use_lb_conn) + except Exception as err: + print("Exception: %s" % err) + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) + return + + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + + +def cron_create_soundcloud_metadata_index(use_lb_conn): + """ Update the soundcloud metadata index via cron """ + + slug = 'create-soundcloud-metadata-index' + sentry_sdk.init(config.LOG_SENTRY["dsn"]) + check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) + + try: + create_soundcloud_metadata_index(use_lb_conn) + except Exception as err: + print("Exception: %s" % err) + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) + return + + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + + +def cron_create_tag_similarity(): + """ Update the tag similarity data via cron """ + + slug = 'create-tag-similarity' + sentry_sdk.init(config.LOG_SENTRY["dsn"]) + check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) + + try: + create_tag_similarity() + except Exception as err: + print("Exception: %s" % err) + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) + return + + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + +def cron_build_all_mb_caches(): + """ build all mb caches via cron """ + + slug = 'build-all-mb-caches' + sentry_sdk.init(config.LOG_SENTRY["dsn"]) + check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) + + try: + create_mb_metadata_cache(True) + cleanup_mbid_mapping_table() + create_mb_artist_metadata_cache(True) + create_mb_release_group_cache(True) + except Exception as err: + print("Exception: %s" % err) + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) + return + + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + +def cron_update_all_mb_caches(): + """ Update all mb caches via cron """ + + slug = 'update-all-mb-caches' + sentry_sdk.init(config.LOG_SENTRY["dsn"]) + check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) + + try: + update_canonical_release_data(False) + incremental_update_mb_metadata_cache(True) + incremental_update_mb_artist_metadata_cache(True) + incremental_update_mb_release_group_cache(True) + + except Exception as err: + print("Exception: %s" % err) + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) + return + + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) From ded3db4d153d2922a68f2579c7cc09de558f7da7 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Fri, 20 Dec 2024 13:01:06 +0100 Subject: [PATCH 05/19] Move cron to a submenu to avoid accidental user invocation --- mbid_mapping/docker/crontab | 16 +-- mbid_mapping/manage.py | 101 +------------- mbid_mapping/manage_cron.py | 193 +------------------------- mbid_mapping/mapping/cron_wrappers.py | 4 +- 4 files changed, 15 insertions(+), 299 deletions(-) diff --git a/mbid_mapping/docker/crontab b/mbid_mapping/docker/crontab index 37bd014312..44dd9f9f2c 100644 --- a/mbid_mapping/docker/crontab +++ b/mbid_mapping/docker/crontab @@ -1,23 +1,23 @@ # Create the mapping indexes (typesense, canonical data tables) each day at 4am -0 4 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-create-all >> /code/mapper/lb-cron.log 2>&1 +0 4 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-create-all >> /code/mapper/lb-cron.log 2>&1 # Run the huesound color sync hourly -10 * * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-update-coverart >> /code/mapper/lb-cron.log 2>&1 +10 * * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-update-coverart >> /code/mapper/lb-cron.log 2>&1 # Rebuild the spotify metadata index every friday at 1 A.M. -0 1 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-build-spotify-metadata-index >> /code/mapper/cron-spotify-metadata-index.log 2>&1 +0 1 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-spotify-metadata-index >> /code/mapper/cron-spotify-metadata-index.log 2>&1 # Rebuild the apple music metadata index every friday at 2 A.M. -0 2 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-build-apple-metadata-index >> /code/mapper/cron-apple-metadata-index.log 2>&1 +0 2 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-apple-metadata-index >> /code/mapper/cron-apple-metadata-index.log 2>&1 # Rebuild the soundcloud music metadata index every friday at 3 A.M. -0 3 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-build-soundcloud-metadata-index >> /code/mapper/cron-soundcloud-metadata-index.log 2>&1 +0 3 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-soundcloud-metadata-index >> /code/mapper/cron-soundcloud-metadata-index.log 2>&1 # Rebuild similar tag data at 2am sundays -0 2 * * 0 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-build-tag-similarity >> /code/mapper/cron-tag-similarity.log 2>&1 +0 2 * * 0 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-tag-similarity >> /code/mapper/cron-tag-similarity.log 2>&1 # Build the mb entity caches from scratch on first thursday of every month, MB dumps run on Wed and Sat so avoid those days -0 15 * * 4 listenbrainz [ $(date +\%d) -le 7 ] && /usr/local/bin/python /code/mapper/manage.py cron-build-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 +0 15 * * 4 listenbrainz [ $(date +\%d) -le 7 ] && /usr/local/bin/python /code/mapper/manage.py cron cron-build-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 # Update the mb entity caches incrementally every 4 hours -0 */4 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-update-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 +0 */4 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-update-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 diff --git a/mbid_mapping/manage.py b/mbid_mapping/manage.py index dc2dfd8dcb..38bb58b6f3 100755 --- a/mbid_mapping/manage.py +++ b/mbid_mapping/manage.py @@ -22,23 +22,15 @@ incremental_update_mb_release_group_cache from mapping.spotify_metadata_index import create_spotify_metadata_index from mapping.apple_metadata_index import create_apple_metadata_index -from mapping.cron_wrappers import cron_wrapper_create_all, \ - cron_incremental_update_release_color_table, \ - cron_create_spotify_metadata_index, \ - cron_create_apple_metadata_index, \ - cron_create_soundcloud_metadata_index, \ - cron_build_all_mb_caches as cron_wrapper_cron_build_all_mb_caches, \ - cron_update_all_mb_caches as cron_wrapper_cron_update_all_mb_caches, \ - cron_wrapper_create_all, \ - cron_create_tag_similarity from similar.tag_similarity import create_tag_similarity - +from manage_cron import cli as cron_cli @click.group() def cli(): pass + # Add the "cron" submenu cron_cli.short_help = "cron jobs -- do not invoke manually." cli.add_command(cron_cli, name="cron") @@ -52,13 +44,6 @@ def create_all(): create_canonical_musicbrainz_data(True) action_build_index() -@cli.command() -def cron_create_all(): - """ - Create all canonical data in one go as a monitored cron job. First mb canonical data, then its typesense index. - """ - cron_wrapper_create_all() - @cli.command() @click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") @@ -77,6 +62,7 @@ def update_canonical_releases(use_lb_conn): """ update_canonical_release_data(use_lb_conn) + @cli.command() def test_mapping(): """ @@ -101,26 +87,6 @@ def sync_coverart(): sync_release_color_table() -@cli.command() -def cron_update_coverart(): - """ - Update the release_color table incrementally. Designed to be called hourly by cron. - """ - cron_incremental_update_release_color_table() - - -@cli.command() -def cron_log(): - """ - Print the internal cron log file for debugging purposes. - """ - if os.path.exists(CRON_LOG_FILE): - log("Current cron job log file:") - subprocess.run(["cat", CRON_LOG_FILE]) - else: - log("Log file is empty") - - @cli.command() @click.argument('year', type=int) def top_discoveries(year): @@ -195,32 +161,6 @@ def update_mb_artist_metadata_cache(use_lb_conn): incremental_update_mb_artist_metadata_cache(use_lb_conn) -@cli.command() -def cron_build_mb_metadata_cache(): - """ Build the mb metadata cache and tables it depends on in production in appropriate databases. - After building the cache, cleanup mbid_mapping table. - """ - create_mb_metadata_cache(True) - cleanup_mbid_mapping_table() - - -@cli.command() -@click.pass_context -def cron_build_all_mb_caches(ctx): - - """ Build all mb entity metadata cache and tables it depends on in production in appropriate - databases. After building the cache, cleanup mbid_mapping table. - """ - cron_wrapper_cron_build_all_mb_caches() - - -@cli.command() -@click.pass_context -def cron_update_all_mb_caches(ctx): - """ Update all mb entity metadata cache in ListenBrainz. """ - cron_wrapper_cron_update_all_mb_caches() - - @cli.command() @click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") def build_spotify_metadata_index(use_lb_conn): @@ -230,15 +170,6 @@ def build_spotify_metadata_index(use_lb_conn): create_spotify_metadata_index(use_lb_conn) -@cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") -def build_cron_spotify_metadata_index(use_lb_conn): - """ - Build the spotify metadata index that LB uses invoked via cron - """ - cron_create_spotify_metadata_index(use_lb_conn) - - @cli.command() @click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") def build_apple_metadata_index(use_lb_conn): @@ -248,15 +179,6 @@ def build_apple_metadata_index(use_lb_conn): create_apple_metadata_index(use_lb_conn) -@cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") -def cron_build_apple_metadata_index(use_lb_conn): - """ - Build the Apple Music metadata index that LB uses invoked from cron - """ - cron_create_apple_metadata_index(use_lb_conn) - - @cli.command() @click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") def build_soundcloud_metadata_index(use_lb_conn): @@ -266,15 +188,6 @@ def build_soundcloud_metadata_index(use_lb_conn): create_soundcloud_metadata_index(use_lb_conn) -@cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") -def cron_build_soundcloud_metadata_index(use_lb_conn): - """ - Build the Soundcloud Music metadata index that LB usesa invoked from cron - """ - cron_create_soundcloud_metadata_index(use_lb_conn) - - @cli.command() def build_tag_similarity(): """ @@ -283,14 +196,6 @@ def build_tag_similarity(): create_tag_similarity() -@cli.command() -def cron_build_tag_similarity(): - """ - Build the tag similarity data invoked via cron - """ - cron_create_tag_similarity() - - def usage(command): with click.Context(command) as ctx: click.echo(command.get_help(ctx)) diff --git a/mbid_mapping/manage_cron.py b/mbid_mapping/manage_cron.py index dc2dfd8dcb..6da19bf46c 100755 --- a/mbid_mapping/manage_cron.py +++ b/mbid_mapping/manage_cron.py @@ -6,22 +6,6 @@ import click -from mapping.canonical_musicbrainz_data import create_canonical_musicbrainz_data, update_canonical_release_data -from mapping.mb_artist_metadata_cache import create_mb_artist_metadata_cache, \ - incremental_update_mb_artist_metadata_cache -from mapping.soundcloud_metadata_index import create_soundcloud_metadata_index -from mapping.typesense_index import build_all as action_build_index -from mapping.mapping_test.mapping_test import test_mapping as action_test_mapping -from mapping.utils import log, CRON_LOG_FILE -from mapping.release_colors import sync_release_color_table -from reports.tracks_of_the_year import calculate_tracks_of_the_year -from reports.top_discoveries import calculate_top_discoveries -from mapping.mb_metadata_cache import create_mb_metadata_cache, incremental_update_mb_metadata_cache, \ - cleanup_mbid_mapping_table -from mapping.mb_release_group_cache import create_mb_release_group_cache, \ - incremental_update_mb_release_group_cache -from mapping.spotify_metadata_index import create_spotify_metadata_index -from mapping.apple_metadata_index import create_apple_metadata_index from mapping.cron_wrappers import cron_wrapper_create_all, \ cron_incremental_update_release_color_table, \ cron_create_spotify_metadata_index, \ @@ -31,26 +15,12 @@ cron_update_all_mb_caches as cron_wrapper_cron_update_all_mb_caches, \ cron_wrapper_create_all, \ cron_create_tag_similarity -from similar.tag_similarity import create_tag_similarity - @click.group() def cli(): pass -# Add the "cron" submenu -cron_cli.short_help = "cron jobs -- do not invoke manually." -cli.add_command(cron_cli, name="cron") - - -@cli.command() -def create_all(): - """ - Create all canonical data in one go. First mb canonical data, then its typesense index. - """ - create_canonical_musicbrainz_data(True) - action_build_index() @cli.command() def cron_create_all(): @@ -60,47 +30,6 @@ def cron_create_all(): cron_wrapper_create_all() -@cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") -def canonical_data(use_lb_conn): - """ - Create the MBID Mapping tables. (mbid_mapping, mbid_mapping_release, canonical_recording, recording_canonical_release) - """ - create_canonical_musicbrainz_data(use_lb_conn) - - -@cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") -def update_canonical_releases(use_lb_conn): - """ - Update only the canonical releases table - """ - update_canonical_release_data(use_lb_conn) - -@cli.command() -def test_mapping(): - """ - Test the created mbid mapping. The MBID mapping must have been created before running this. - """ - action_test_mapping() - - -@cli.command() -def build_index(): - """ - Build the typesense index of the mbid mapping. The mbid mapping must be run first in order to build this index. - """ - action_build_index() - - -@cli.command() -def sync_coverart(): - """ - Force a re-sync of the release_color table, in case it has gone out of sync. - """ - sync_release_color_table() - - @cli.command() def cron_update_coverart(): """ @@ -110,7 +39,7 @@ def cron_update_coverart(): @cli.command() -def cron_log(): +def log(): """ Print the internal cron log file for debugging purposes. """ @@ -121,80 +50,6 @@ def cron_log(): log("Log file is empty") -@cli.command() -@click.argument('year', type=int) -def top_discoveries(year): - """ - Top discoveries for year -- this creates a table in the mapping schema of the provided mb-docker database - that lists all the tracks that a user listened to the first time in the given year. - """ - calculate_top_discoveries(year) - - -@cli.command() -@click.argument('year', type=int) -def top_tracks(year): - """ - Tracks for the year -- this also creates a table in the mapping schema, where this one creates a historgram - of which tracks and how many times a user played for a given year. - """ - calculate_tracks_of_the_year(year) - - -@cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") -def build_mb_metadata_cache(use_lb_conn): - """ - Build the MB metadata cache that LB uses - """ - create_mb_metadata_cache(use_lb_conn) - - -@cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") -def build_mb_release_group_cache(use_lb_conn): - """ - Build the MB release group cache that LB uses - """ - create_mb_release_group_cache(use_lb_conn) - - -@cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") -def build_mb_artist_metadata_cache(use_lb_conn): - """ - Build the MB release group cache that LB uses - """ - create_mb_artist_metadata_cache(use_lb_conn) - - -@cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") -def update_mb_metadata_cache(use_lb_conn): - """ - Update the MB metadata cache that LB uses incrementally. - """ - incremental_update_mb_metadata_cache(use_lb_conn) - - -@cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") -def update_mb_release_group_cache(use_lb_conn): - """ - Update the MB metadata cache that LB uses incrementally. - """ - incremental_update_mb_release_group_cache(use_lb_conn) - - -@cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") -def update_mb_artist_metadata_cache(use_lb_conn): - """ - Update the MB metadata cache that LB uses incrementally. - """ - incremental_update_mb_artist_metadata_cache(use_lb_conn) - - @cli.command() def cron_build_mb_metadata_cache(): """ Build the mb metadata cache and tables it depends on in production in appropriate databases. @@ -207,7 +62,6 @@ def cron_build_mb_metadata_cache(): @cli.command() @click.pass_context def cron_build_all_mb_caches(ctx): - """ Build all mb entity metadata cache and tables it depends on in production in appropriate databases. After building the cache, cleanup mbid_mapping table. """ @@ -221,15 +75,6 @@ def cron_update_all_mb_caches(ctx): cron_wrapper_cron_update_all_mb_caches() -@cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") -def build_spotify_metadata_index(use_lb_conn): - """ - Build the spotify metadata index that LB uses - """ - create_spotify_metadata_index(use_lb_conn) - - @cli.command() @click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") def build_cron_spotify_metadata_index(use_lb_conn): @@ -239,15 +84,6 @@ def build_cron_spotify_metadata_index(use_lb_conn): cron_create_spotify_metadata_index(use_lb_conn) -@cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") -def build_apple_metadata_index(use_lb_conn): - """ - Build the Apple Music metadata index that LB uses - """ - create_apple_metadata_index(use_lb_conn) - - @cli.command() @click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") def cron_build_apple_metadata_index(use_lb_conn): @@ -257,15 +93,6 @@ def cron_build_apple_metadata_index(use_lb_conn): cron_create_apple_metadata_index(use_lb_conn) -@cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") -def build_soundcloud_metadata_index(use_lb_conn): - """ - Build the Soundcloud Music metadata index that LB uses - """ - create_soundcloud_metadata_index(use_lb_conn) - - @cli.command() @click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") def cron_build_soundcloud_metadata_index(use_lb_conn): @@ -275,27 +102,9 @@ def cron_build_soundcloud_metadata_index(use_lb_conn): cron_create_soundcloud_metadata_index(use_lb_conn) -@cli.command() -def build_tag_similarity(): - """ - Build the tag similarity data - """ - create_tag_similarity() - - @cli.command() def cron_build_tag_similarity(): """ Build the tag similarity data invoked via cron """ cron_create_tag_similarity() - - -def usage(command): - with click.Context(command) as ctx: - click.echo(command.get_help(ctx)) - - -if __name__ == "__main__": - cli() - sys.exit(0) diff --git a/mbid_mapping/mapping/cron_wrappers.py b/mbid_mapping/mapping/cron_wrappers.py index 44e1b30855..df2514d34d 100644 --- a/mbid_mapping/mapping/cron_wrappers.py +++ b/mbid_mapping/mapping/cron_wrappers.py @@ -19,7 +19,7 @@ def cron_wrapper_create_all(): """ Run a monitored cron job to create the canonical data and the typesense index """ - slug='canonical-data-typesense-index' + slug = 'canonical-data-typesense-index' sentry_sdk.init(config.LOG_SENTRY["dsn"]) check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) @@ -118,6 +118,7 @@ def cron_create_tag_similarity(): capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + def cron_build_all_mb_caches(): """ build all mb caches via cron """ @@ -137,6 +138,7 @@ def cron_build_all_mb_caches(): capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + def cron_update_all_mb_caches(): """ Update all mb caches via cron """ From 5d876d36edb21c0179a74fef03a8f047aad0e2f2 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Fri, 20 Dec 2024 16:26:46 +0100 Subject: [PATCH 06/19] Use decorators --- mbid_mapping/mapping/cron_wrappers.py | 152 +++++++------------------- 1 file changed, 42 insertions(+), 110 deletions(-) diff --git a/mbid_mapping/mapping/cron_wrappers.py b/mbid_mapping/mapping/cron_wrappers.py index df2514d34d..8a3e7aef38 100644 --- a/mbid_mapping/mapping/cron_wrappers.py +++ b/mbid_mapping/mapping/cron_wrappers.py @@ -16,145 +16,77 @@ from sentry_sdk.crons.consts import MonitorStatus -def cron_wrapper_create_all(): - """ Run a monitored cron job to create the canonical data and the typesense index """ +def cron(slug): - slug = 'canonical-data-typesense-index' - sentry_sdk.init(config.LOG_SENTRY["dsn"]) - check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) + def wrapper(func): - try: - create_canonical_musicbrainz_data(True) - action_build_index() - except Exception as err: - print("Exception: %s" % err) - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) - return + def wrapped_f(*args): + sentry_sdk.init(config.LOG_SENTRY["dsn"]) + check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) + try: + func(*args) + except Exception as err: + print("Exception: %s" % err) + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) + return - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + return wrapped_f -def cron_incremental_update_release_color_table(): - """ Update the release_colors table. """ + return wrapper - slug = 'caa-color-sync' - sentry_sdk.init(config.LOG_SENTRY["dsn"]) - check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) - try: - incremental_update_release_color_table() - except Exception as err: - print("Exception: %s" % err) - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) - return +@cron("canonical-data-typesense-index") +def cron_wrapper_create_all(): + """ Run a monitored cron job to create the canonical data and the typesense index """ + create_canonical_musicbrainz_data(True) + action_build_index() + - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) +@cron("caa-color-sync") +def cron_incremental_update_release_color_table(): + """ Update the release_colors table. """ + incremental_update_release_color_table() +@cron("create-spotify-metadata-index") def cron_create_spotify_metadata_index(use_lb_conn): """ Update the spotify metadata index via cron """ - - slug = 'create-spotify-metadata-index' - sentry_sdk.init(config.LOG_SENTRY["dsn"]) - check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) - - try: - create_spotify_metadata_index(use_lb_conn) - except Exception as err: - print("Exception: %s" % err) - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) - return - - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + create_spotify_metadata_index(use_lb_conn) +@cron("create-apple-metadata-index") def cron_create_apple_metadata_index(use_lb_conn): """ Update the apple metadata index via cron """ - - slug = 'create-apple-metadata-index' - sentry_sdk.init(config.LOG_SENTRY["dsn"]) - check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) - - try: - create_apple_metadata_index(use_lb_conn) - except Exception as err: - print("Exception: %s" % err) - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) - return - - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + create_apple_metadata_index(use_lb_conn) +@cron("create-soundcloud-metadata-index") def cron_create_soundcloud_metadata_index(use_lb_conn): """ Update the soundcloud metadata index via cron """ - - slug = 'create-soundcloud-metadata-index' - sentry_sdk.init(config.LOG_SENTRY["dsn"]) - check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) - - try: - create_soundcloud_metadata_index(use_lb_conn) - except Exception as err: - print("Exception: %s" % err) - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) - return - - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + create_soundcloud_metadata_index(use_lb_conn) +@cron("create-tag-similarity") def cron_create_tag_similarity(): """ Update the tag similarity data via cron """ - - slug = 'create-tag-similarity' - sentry_sdk.init(config.LOG_SENTRY["dsn"]) - check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) - - try: - create_tag_similarity() - except Exception as err: - print("Exception: %s" % err) - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) - return - - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + create_tag_similarity() +@cron("build-all-mb-caches") def cron_build_all_mb_caches(): """ build all mb caches via cron """ - - slug = 'build-all-mb-caches' - sentry_sdk.init(config.LOG_SENTRY["dsn"]) - check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) - - try: - create_mb_metadata_cache(True) - cleanup_mbid_mapping_table() - create_mb_artist_metadata_cache(True) - create_mb_release_group_cache(True) - except Exception as err: - print("Exception: %s" % err) - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) - return - - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + create_mb_metadata_cache(True) + cleanup_mbid_mapping_table() + create_mb_artist_metadata_cache(True) + create_mb_release_group_cache(True) +@cron("update-all-mb-caches") def cron_update_all_mb_caches(): """ Update all mb caches via cron """ - - slug = 'update-all-mb-caches' - sentry_sdk.init(config.LOG_SENTRY["dsn"]) - check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) - - try: - update_canonical_release_data(False) - incremental_update_mb_metadata_cache(True) - incremental_update_mb_artist_metadata_cache(True) - incremental_update_mb_release_group_cache(True) - - except Exception as err: - print("Exception: %s" % err) - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) - return - - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + update_canonical_release_data(False) + incremental_update_mb_metadata_cache(True) + incremental_update_mb_artist_metadata_cache(True) + incremental_update_mb_release_group_cache(True) From 20d914482e91032cc82793980fa6e4714aff1133 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Fri, 20 Dec 2024 16:47:09 +0100 Subject: [PATCH 07/19] Removed cron_wrappers.py --- mbid_mapping/manage_cron.py | 92 +++++++++++++++++++-------- mbid_mapping/mapping/cron_wrappers.py | 92 --------------------------- 2 files changed, 65 insertions(+), 119 deletions(-) delete mode 100644 mbid_mapping/mapping/cron_wrappers.py diff --git a/mbid_mapping/manage_cron.py b/mbid_mapping/manage_cron.py index 6da19bf46c..d8c57c939b 100755 --- a/mbid_mapping/manage_cron.py +++ b/mbid_mapping/manage_cron.py @@ -1,20 +1,46 @@ #!/usr/bin/env python3 - import sys import os import subprocess import click +import sentry_sdk +from sentry_sdk.crons import capture_checkin +from sentry_sdk.crons.consts import MonitorStatus + +import config +from mapping.canonical_musicbrainz_data import create_canonical_musicbrainz_data, update_canonical_release_data +from mapping.release_colors import incremental_update_release_color_table +from mapping.typesense_index import build_all as action_build_index +from mapping.spotify_metadata_index import create_spotify_metadata_index +from mapping.apple_metadata_index import create_apple_metadata_index +from mapping.soundcloud_metadata_index import create_soundcloud_metadata_index +from mapping.mb_metadata_cache import cleanup_mbid_mapping_table, create_mb_metadata_cache, incremental_update_mb_metadata_cache +from mapping.mb_artist_metadata_cache import create_mb_artist_metadata_cache, \ + incremental_update_mb_artist_metadata_cache +from mapping.mb_release_group_cache import create_mb_release_group_cache, incremental_update_mb_release_group_cache +from similar.tag_similarity import create_tag_similarity + + +def cron(slug): + + def wrapper(func): + + def wrapped_f(*args): + sentry_sdk.init(config.LOG_SENTRY["dsn"]) + check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) + try: + func(*args) + except Exception as err: + print("Exception: %s" % err) + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) + return -from mapping.cron_wrappers import cron_wrapper_create_all, \ - cron_incremental_update_release_color_table, \ - cron_create_spotify_metadata_index, \ - cron_create_apple_metadata_index, \ - cron_create_soundcloud_metadata_index, \ - cron_build_all_mb_caches as cron_wrapper_cron_build_all_mb_caches, \ - cron_update_all_mb_caches as cron_wrapper_cron_update_all_mb_caches, \ - cron_wrapper_create_all, \ - cron_create_tag_similarity + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + + return wrapped_f + + return wrapper @click.group() @@ -22,20 +48,23 @@ def cli(): pass +@cron("canonical-data-typesense-index") @cli.command() def cron_create_all(): """ Create all canonical data in one go as a monitored cron job. First mb canonical data, then its typesense index. """ - cron_wrapper_create_all() + create_canonical_musicbrainz_data(True) + action_build_index() +@cron("caa-color-sync") @cli.command() def cron_update_coverart(): """ Update the release_color table incrementally. Designed to be called hourly by cron. """ - cron_incremental_update_release_color_table() + incremental_update_release_color_table() @cli.command() @@ -50,6 +79,7 @@ def log(): log("Log file is empty") +@cron("build-mb-metadata-cache") @cli.command() def cron_build_mb_metadata_cache(): """ Build the mb metadata cache and tables it depends on in production in appropriate databases. @@ -59,52 +89,60 @@ def cron_build_mb_metadata_cache(): cleanup_mbid_mapping_table() +@cron("build-all-mb-caches") @cli.command() -@click.pass_context -def cron_build_all_mb_caches(ctx): +def cron_build_all_mb_caches(): """ Build all mb entity metadata cache and tables it depends on in production in appropriate databases. After building the cache, cleanup mbid_mapping table. """ - cron_wrapper_cron_build_all_mb_caches() + create_mb_metadata_cache(True) + cleanup_mbid_mapping_table() + create_mb_artist_metadata_cache(True) + create_mb_release_group_cache(True) + +@cron("update-all-mb-caches") @cli.command() -@click.pass_context -def cron_update_all_mb_caches(ctx): +def cron_update_all_mb_caches(): """ Update all mb entity metadata cache in ListenBrainz. """ - cron_wrapper_cron_update_all_mb_caches() + update_canonical_release_data(False) + incremental_update_mb_metadata_cache(True) + incremental_update_mb_artist_metadata_cache(True) + incremental_update_mb_release_group_cache(True) +@cron("create-spotify-metadata-index") @cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") def build_cron_spotify_metadata_index(use_lb_conn): """ Build the spotify metadata index that LB uses invoked via cron """ - cron_create_spotify_metadata_index(use_lb_conn) + create_spotify_metadata_index(True) +@cron("create-apple-metadata-index") @cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") -def cron_build_apple_metadata_index(use_lb_conn): +def cron_build_apple_metadata_index(): """ Build the Apple Music metadata index that LB uses invoked from cron """ - cron_create_apple_metadata_index(use_lb_conn) + create_apple_metadata_index(True) +@cron("create-soundcloud-metadata-index") @cli.command() -@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB") -def cron_build_soundcloud_metadata_index(use_lb_conn): +def cron_build_soundcloud_metadata_index(): """ Build the Soundcloud Music metadata index that LB usesa invoked from cron """ - cron_create_soundcloud_metadata_index(use_lb_conn) + create_soundcloud_metadata_index(True) +@cron("create-tag-similarity") @cli.command() def cron_build_tag_similarity(): """ Build the tag similarity data invoked via cron """ - cron_create_tag_similarity() + create_tag_similarity() diff --git a/mbid_mapping/mapping/cron_wrappers.py b/mbid_mapping/mapping/cron_wrappers.py deleted file mode 100644 index 8a3e7aef38..0000000000 --- a/mbid_mapping/mapping/cron_wrappers.py +++ /dev/null @@ -1,92 +0,0 @@ -from mapping.canonical_musicbrainz_data import create_canonical_musicbrainz_data, update_canonical_release_data -from mapping.release_colors import incremental_update_release_color_table -from mapping.typesense_index import build_all as action_build_index -from mapping.spotify_metadata_index import create_spotify_metadata_index -from mapping.apple_metadata_index import create_apple_metadata_index -from mapping.soundcloud_metadata_index import create_soundcloud_metadata_index -from mapping.mb_metadata_cache import cleanup_mbid_mapping_table, create_mb_metadata_cache, incremental_update_mb_metadata_cache -from mapping.mb_artist_metadata_cache import create_mb_artist_metadata_cache, \ - incremental_update_mb_artist_metadata_cache -from mapping.mb_release_group_cache import create_mb_release_group_cache, incremental_update_mb_release_group_cache -from similar.tag_similarity import create_tag_similarity -import config - -import sentry_sdk -from sentry_sdk.crons import capture_checkin -from sentry_sdk.crons.consts import MonitorStatus - - -def cron(slug): - - def wrapper(func): - - def wrapped_f(*args): - sentry_sdk.init(config.LOG_SENTRY["dsn"]) - check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) - try: - func(*args) - except Exception as err: - print("Exception: %s" % err) - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) - return - - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) - - return wrapped_f - - return wrapper - - -@cron("canonical-data-typesense-index") -def cron_wrapper_create_all(): - """ Run a monitored cron job to create the canonical data and the typesense index """ - create_canonical_musicbrainz_data(True) - action_build_index() - - -@cron("caa-color-sync") -def cron_incremental_update_release_color_table(): - """ Update the release_colors table. """ - incremental_update_release_color_table() - - -@cron("create-spotify-metadata-index") -def cron_create_spotify_metadata_index(use_lb_conn): - """ Update the spotify metadata index via cron """ - create_spotify_metadata_index(use_lb_conn) - - -@cron("create-apple-metadata-index") -def cron_create_apple_metadata_index(use_lb_conn): - """ Update the apple metadata index via cron """ - create_apple_metadata_index(use_lb_conn) - - -@cron("create-soundcloud-metadata-index") -def cron_create_soundcloud_metadata_index(use_lb_conn): - """ Update the soundcloud metadata index via cron """ - create_soundcloud_metadata_index(use_lb_conn) - - -@cron("create-tag-similarity") -def cron_create_tag_similarity(): - """ Update the tag similarity data via cron """ - create_tag_similarity() - - -@cron("build-all-mb-caches") -def cron_build_all_mb_caches(): - """ build all mb caches via cron """ - create_mb_metadata_cache(True) - cleanup_mbid_mapping_table() - create_mb_artist_metadata_cache(True) - create_mb_release_group_cache(True) - - -@cron("update-all-mb-caches") -def cron_update_all_mb_caches(): - """ Update all mb caches via cron """ - update_canonical_release_data(False) - incremental_update_mb_metadata_cache(True) - incremental_update_mb_artist_metadata_cache(True) - incremental_update_mb_release_group_cache(True) From ed9749c64b6f5b8433d9498cd9d9d266230474e4 Mon Sep 17 00:00:00 2001 From: Kartik Ohri Date: Mon, 23 Dec 2024 19:47:18 +0530 Subject: [PATCH 08/19] minor stylistic changes, improve decorator, update to latest sentry_sdk --- mbid_mapping/manage_cron.py | 54 +++++++++++------------------------ mbid_mapping/requirements.txt | 1 + 2 files changed, 17 insertions(+), 38 deletions(-) diff --git a/mbid_mapping/manage_cron.py b/mbid_mapping/manage_cron.py index d8c57c939b..43def2486f 100755 --- a/mbid_mapping/manage_cron.py +++ b/mbid_mapping/manage_cron.py @@ -1,12 +1,10 @@ #!/usr/bin/env python3 -import sys -import os -import subprocess + +from functools import wraps import click import sentry_sdk -from sentry_sdk.crons import capture_checkin -from sentry_sdk.crons.consts import MonitorStatus +from sentry_sdk import monitor import config from mapping.canonical_musicbrainz_data import create_canonical_musicbrainz_data, update_canonical_release_data @@ -25,19 +23,11 @@ def cron(slug): def wrapper(func): - - def wrapped_f(*args): - sentry_sdk.init(config.LOG_SENTRY["dsn"]) - check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) - try: - func(*args) - except Exception as err: - print("Exception: %s" % err) - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) - return - - capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) - + @wraps(func) + def wrapped_f(*args, **kwargs): + sentry_sdk.init(**config.LOG_SENTRY) + with monitor(slug): + func(*args, **kwargs) return wrapped_f return wrapper @@ -48,8 +38,8 @@ def cli(): pass -@cron("canonical-data-typesense-index") @cli.command() +@cron("canonical-data-typesense-index") def cron_create_all(): """ Create all canonical data in one go as a monitored cron job. First mb canonical data, then its typesense index. @@ -58,8 +48,8 @@ def cron_create_all(): action_build_index() -@cron("caa-color-sync") @cli.command() +@cron("caa-color-sync") def cron_update_coverart(): """ Update the release_color table incrementally. Designed to be called hourly by cron. @@ -68,19 +58,7 @@ def cron_update_coverart(): @cli.command() -def log(): - """ - Print the internal cron log file for debugging purposes. - """ - if os.path.exists(CRON_LOG_FILE): - log("Current cron job log file:") - subprocess.run(["cat", CRON_LOG_FILE]) - else: - log("Log file is empty") - - @cron("build-mb-metadata-cache") -@cli.command() def cron_build_mb_metadata_cache(): """ Build the mb metadata cache and tables it depends on in production in appropriate databases. After building the cache, cleanup mbid_mapping table. @@ -89,8 +67,8 @@ def cron_build_mb_metadata_cache(): cleanup_mbid_mapping_table() -@cron("build-all-mb-caches") @cli.command() +@cron("build-all-mb-caches") def cron_build_all_mb_caches(): """ Build all mb entity metadata cache and tables it depends on in production in appropriate databases. After building the cache, cleanup mbid_mapping table. @@ -102,8 +80,8 @@ def cron_build_all_mb_caches(): -@cron("update-all-mb-caches") @cli.command() +@cron("update-all-mb-caches") def cron_update_all_mb_caches(): """ Update all mb entity metadata cache in ListenBrainz. """ update_canonical_release_data(False) @@ -112,8 +90,8 @@ def cron_update_all_mb_caches(): incremental_update_mb_release_group_cache(True) -@cron("create-spotify-metadata-index") @cli.command() +@cron("create-spotify-metadata-index") def build_cron_spotify_metadata_index(use_lb_conn): """ Build the spotify metadata index that LB uses invoked via cron @@ -121,8 +99,8 @@ def build_cron_spotify_metadata_index(use_lb_conn): create_spotify_metadata_index(True) -@cron("create-apple-metadata-index") @cli.command() +@cron("create-apple-metadata-index") def cron_build_apple_metadata_index(): """ Build the Apple Music metadata index that LB uses invoked from cron @@ -130,8 +108,8 @@ def cron_build_apple_metadata_index(): create_apple_metadata_index(True) -@cron("create-soundcloud-metadata-index") @cli.command() +@cron("create-soundcloud-metadata-index") def cron_build_soundcloud_metadata_index(): """ Build the Soundcloud Music metadata index that LB usesa invoked from cron @@ -139,8 +117,8 @@ def cron_build_soundcloud_metadata_index(): create_soundcloud_metadata_index(True) -@cron("create-tag-similarity") @cli.command() +@cron("create-tag-similarity") def cron_build_tag_similarity(): """ Build the tag similarity data invoked via cron diff --git a/mbid_mapping/requirements.txt b/mbid_mapping/requirements.txt index 4a247d7a4f..cf729075ba 100644 --- a/mbid_mapping/requirements.txt +++ b/mbid_mapping/requirements.txt @@ -8,3 +8,4 @@ unidecode python-dateutil==2.8.2 git+https://github.com/metabrainz/brainzutils-python.git@v2.1.0 tqdm==4.66.3 +sentry_sdk==2.19.2 From 52b56fb37adf1d37d3254353948f4891db802621 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Mon, 6 Jan 2025 14:49:53 +0100 Subject: [PATCH 09/19] Cleanup tables --- mbid_mapping/mapping/canonical_musicbrainz_data.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mbid_mapping/mapping/canonical_musicbrainz_data.py b/mbid_mapping/mapping/canonical_musicbrainz_data.py index 7264e45041..f0405c3103 100755 --- a/mbid_mapping/mapping/canonical_musicbrainz_data.py +++ b/mbid_mapping/mapping/canonical_musicbrainz_data.py @@ -81,6 +81,14 @@ def create_canonical_musicbrainz_data(use_lb_conn: bool): else: unlogged = True + # For some reason the tables are not being cleaned up. + if lb_conn is not None: + with lb_conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as curs: + curs.execute("drop table mapping.canonical_release_redirect_tmp") + curs.execute("drop table mapping.canonical_musicbrainz_data_release_support_tmp") + curs.execute("drop table mapping.canonical_musicbrainz_data_tmp") + curs.execute("drop table mapping.canonical_recording_redirect_tmp") + # Setup all the needed objects releases = CanonicalRelease(mb_conn, unlogged=False) can = CanonicalRecordingRedirect(mb_conn, lb_conn, unlogged=unlogged) From 65a766a8c1393c6d7526a48c1d4332ae15c4ea19 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Thu, 9 Jan 2025 10:49:24 +0100 Subject: [PATCH 10/19] Add IF EXISTS --- mbid_mapping/mapping/canonical_musicbrainz_data.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mbid_mapping/mapping/canonical_musicbrainz_data.py b/mbid_mapping/mapping/canonical_musicbrainz_data.py index f0405c3103..838b852488 100755 --- a/mbid_mapping/mapping/canonical_musicbrainz_data.py +++ b/mbid_mapping/mapping/canonical_musicbrainz_data.py @@ -84,10 +84,10 @@ def create_canonical_musicbrainz_data(use_lb_conn: bool): # For some reason the tables are not being cleaned up. if lb_conn is not None: with lb_conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as curs: - curs.execute("drop table mapping.canonical_release_redirect_tmp") - curs.execute("drop table mapping.canonical_musicbrainz_data_release_support_tmp") - curs.execute("drop table mapping.canonical_musicbrainz_data_tmp") - curs.execute("drop table mapping.canonical_recording_redirect_tmp") + curs.execute("DROP TABLE IF EXISTS mapping.canonical_release_redirect_tmp") + curs.execute("DROP TABLE IF EXISTS mapping.canonical_musicbrainz_data_release_support_tmp") + curs.execute("DROP TABLE IF EXISTS mapping.canonical_musicbrainz_data_tmp") + curs.execute("DROP TABLE IF EXISTS mapping.canonical_recording_redirect_tmp") # Setup all the needed objects releases = CanonicalRelease(mb_conn, unlogged=False) From 1cd5f883e5358fec7ed4a6831a4516caf5b635f4 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Fri, 10 Jan 2025 16:01:28 +0100 Subject: [PATCH 11/19] Hopefully schedule cron jobs to no longer conflict. Fix tag and spotify builds. Fix update all caches --- mbid_mapping/docker/crontab | 2 +- mbid_mapping/manage_cron.py | 4 ++-- mbid_mapping/mapping/canonical_musicbrainz_data.py | 8 -------- mbid_mapping/similar/tag_similarity.py | 4 ++-- 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/mbid_mapping/docker/crontab b/mbid_mapping/docker/crontab index 44dd9f9f2c..894e972fe3 100644 --- a/mbid_mapping/docker/crontab +++ b/mbid_mapping/docker/crontab @@ -1,5 +1,5 @@ # Create the mapping indexes (typesense, canonical data tables) each day at 4am -0 4 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-create-all >> /code/mapper/lb-cron.log 2>&1 +0 5 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-create-all >> /code/mapper/lb-cron.log 2>&1 # Run the huesound color sync hourly 10 * * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-update-coverart >> /code/mapper/lb-cron.log 2>&1 diff --git a/mbid_mapping/manage_cron.py b/mbid_mapping/manage_cron.py index 43def2486f..85fa210ed5 100755 --- a/mbid_mapping/manage_cron.py +++ b/mbid_mapping/manage_cron.py @@ -68,7 +68,7 @@ def cron_build_mb_metadata_cache(): @cli.command() -@cron("build-all-mb-caches") +@cron("build-mb-metadata-caches") def cron_build_all_mb_caches(): """ Build all mb entity metadata cache and tables it depends on in production in appropriate databases. After building the cache, cleanup mbid_mapping table. @@ -92,7 +92,7 @@ def cron_update_all_mb_caches(): @cli.command() @cron("create-spotify-metadata-index") -def build_cron_spotify_metadata_index(use_lb_conn): +def cron_build_spotify_metadata_index(): """ Build the spotify metadata index that LB uses invoked via cron """ diff --git a/mbid_mapping/mapping/canonical_musicbrainz_data.py b/mbid_mapping/mapping/canonical_musicbrainz_data.py index 838b852488..7264e45041 100755 --- a/mbid_mapping/mapping/canonical_musicbrainz_data.py +++ b/mbid_mapping/mapping/canonical_musicbrainz_data.py @@ -81,14 +81,6 @@ def create_canonical_musicbrainz_data(use_lb_conn: bool): else: unlogged = True - # For some reason the tables are not being cleaned up. - if lb_conn is not None: - with lb_conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as curs: - curs.execute("DROP TABLE IF EXISTS mapping.canonical_release_redirect_tmp") - curs.execute("DROP TABLE IF EXISTS mapping.canonical_musicbrainz_data_release_support_tmp") - curs.execute("DROP TABLE IF EXISTS mapping.canonical_musicbrainz_data_tmp") - curs.execute("DROP TABLE IF EXISTS mapping.canonical_recording_redirect_tmp") - # Setup all the needed objects releases = CanonicalRelease(mb_conn, unlogged=False) can = CanonicalRecordingRedirect(mb_conn, lb_conn, unlogged=unlogged) diff --git a/mbid_mapping/similar/tag_similarity.py b/mbid_mapping/similar/tag_similarity.py index a16f830b7f..d5efa4c007 100755 --- a/mbid_mapping/similar/tag_similarity.py +++ b/mbid_mapping/similar/tag_similarity.py @@ -29,7 +29,7 @@ def get_create_table_columns(self): ("count", "INTEGER NOT NULL")] def get_insert_queries(self): - return [("MB", """SELECT array_agg(t.name) AS tag_name + return ["""SELECT array_agg(t.name) AS tag_name FROM artist a JOIN artist_tag at ON at.artist = a.id @@ -63,7 +63,7 @@ def get_insert_queries(self): JOIN tag t ON rect.tag = t.id GROUP BY rec.gid - HAVING t.count > 0""")] + HAVING t.count > 0"""] def get_index_names(self): From 20a7dfb673b63592e574896f33e595cb33d8095a Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 14 Jan 2025 14:31:01 +0100 Subject: [PATCH 12/19] Move the cron_job wrapper over --- listenbrainz/config.py.sample | 4 + listenbrainz/misc/cron_job.py | 136 ++++++++++++++++++++++++++++++++++ 2 files changed, 140 insertions(+) create mode 100755 listenbrainz/misc/cron_job.py diff --git a/listenbrainz/config.py.sample b/listenbrainz/config.py.sample index 13d06fd8e0..a8aa5ac5c0 100644 --- a/listenbrainz/config.py.sample +++ b/listenbrainz/config.py.sample @@ -212,3 +212,7 @@ REJECT_NEW_USERS_WITHOUT_EMAIL = False # base directory for user data exports USER_DATA_EXPORT_BASE_DIR = "/code/listenbrainz/exports/" + +# Service monitoring -- only needed for MetaBrainz production +SERVICE_MONITOR_TELEGRAM_BOT_TOKEN = "" +SERVICE_MONITOR_TELEGRAM_CHAT_ID = "" diff --git a/listenbrainz/misc/cron_job.py b/listenbrainz/misc/cron_job.py new file mode 100755 index 0000000000..f5a448b7d8 --- /dev/null +++ b/listenbrainz/misc/cron_job.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 + +from collections import deque +import contextlib +import requests +import subprocess +import sys +from time import sleep +import os + +import config + +LINES_IN_LOG_SNIPPET = 500 + +FAILURE_REPORT_RETRIES = 20 +FAILURE_REPORT_DELAY = 5 # in seconds + + +def post_telegram_message(msg): + """ Post a message to the LB services Telegram channel """ + + for retry in range(FAILURE_REPORT_RETRIES): + r = requests.post(url="https://api.telegram.org/bot%s/sendMessage" % config["SERVICE_MONITOR_TELEGRAM_BOT_TOKEN"], + data={ + 'chat_id': config["SERVICE_MONITOR_TELEGRAM_CHAT_ID"], + 'text': msg + }) + if r.status_code == 200: + return + + if r.status_code in (400, 401, 403, 404, 429, 500): + sleep(FAILURE_REPORT_DELAY) + + sys.stderr.write("Failed to send error notification to the Telegram chat.\n") + + +def send_notification(script, return_code, stdout, stderr): + """ Format the logs into a single text message and send it """ + + msg = "script %s failed with error code %d:\n" % (script, return_code) + msg += "STDOUT\n" + msg += "\n".join(stdout) + msg += "\n\n" + if stderr: + msg += "STDERR\n" + msg += "\n".join(stderr) + msg += "\n\n" + + post_telegram_message(msg) + + +def monitor(proc): + """ Monitor a process by making the stdout/stderr non-blocking files. Continually read + and save the stdout/stderr output, keeping only the last LINES_IN_LOG_SNIPPET lines + of output of both. Once the called process terminates, return both stdout and stderr + logs """ + + newlines = ['\n', '\r\n', '\r'] + stdout = getattr(proc, "stdout") + os.set_blocking(stdout.fileno(), False) + stderr = getattr(proc, "stderr") + os.set_blocking(stderr.fileno(), False) + + log_stdout = deque(maxlen=LINES_IN_LOG_SNIPPET) + log_stderr = deque(maxlen=LINES_IN_LOG_SNIPPET) + + with contextlib.closing(stdout): + with contextlib.closing(stderr): + stdout_line = "" + stderr_line = "" + while True: + if proc.poll() is not None: + return list(log_stdout), list(log_stderr) + + # Process stdout + ch = stdout.read(1) + if ch == "": + continue + + if ch in newlines: + sys.stdout.write(stdout_line + ch) + log_stdout.append(stdout_line) + stdout_line = "" + continue + + stdout_line += ch + + # Process stderr + ch = stderr.read(1) + if ch == "": + continue + + if ch in newlines: + sys.stdout.write(stderr_line + ch) + log_stderr.append(stderr_line) + stderr_line = "" + continue + + stderr_line += ch + + +def monitor_process(cmd): + """ Call Popen to start monitoring a process, then monitor the proceess with the monitor method """ + + proc = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True, + ) + stdout, stderr = monitor(proc) + return proc.returncode, stdout, stderr + + +def main(): + args = sys.argv[1:] + if not args: + sys.stderr.write("Error: Must provide one program to execute.") + sys.exit(-1) + + try: + ret, stdout, stderr = monitor_process(args) + except KeyboardInterrupt: + sys.exit(-1) + + if ret == 0: + # All went well, lets leave! + sys.exit(0) + + # We did not exit successfully, so report an error + send_notification(sys.argv[0], ret, stdout, stderr) + sys.exit(ret) + + +if __name__ == "__main__": + main() From 797c666a5ba1d07f1f8cd691d4ed4c42e098a266 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Wed, 15 Jan 2025 17:08:58 +0100 Subject: [PATCH 13/19] Various improvements, fix CAA sync --- mbid_mapping/config.py.sample | 4 +++ mbid_mapping/manage_cron.py | 30 +++++++++++++++++++++-- mbid_mapping/mapping/mb_cache_base.py | 2 ++ mbid_mapping/mapping/mb_metadata_cache.py | 2 ++ mbid_mapping/mapping/release_colors.py | 15 +++++++----- 5 files changed, 45 insertions(+), 8 deletions(-) diff --git a/mbid_mapping/config.py.sample b/mbid_mapping/config.py.sample index c521a28d58..d2c22a78d3 100644 --- a/mbid_mapping/config.py.sample +++ b/mbid_mapping/config.py.sample @@ -28,3 +28,7 @@ REDIS_NAMESPACE = "listenbrainz" # For debugging, only fetches a tiny portion of the data if True USE_MINIMAL_DATASET = True + +# Service monitoring -- only needed for MetaBrainz production +SERVICE_MONITOR_TELEGRAM_BOT_TOKEN = "" +SERVICE_MONITOR_TELEGRAM_CHAT_ID = "" diff --git a/mbid_mapping/manage_cron.py b/mbid_mapping/manage_cron.py index 85fa210ed5..9f938882a2 100755 --- a/mbid_mapping/manage_cron.py +++ b/mbid_mapping/manage_cron.py @@ -1,10 +1,15 @@ #!/usr/bin/env python3 from functools import wraps +from traceback import print_exception +import sys import click import sentry_sdk from sentry_sdk import monitor +from sentry_sdk.crons import capture_checkin +from sentry_sdk.crons.consts import MonitorStatus + import config from mapping.canonical_musicbrainz_data import create_canonical_musicbrainz_data, update_canonical_release_data @@ -18,16 +23,32 @@ incremental_update_mb_artist_metadata_cache from mapping.mb_release_group_cache import create_mb_release_group_cache, incremental_update_mb_release_group_cache from similar.tag_similarity import create_tag_similarity +from mapping.utils import log -def cron(slug): +def cron(slug): + """ Cron decorator making it easy to monitor a cron job. The slug argument defines the sentry cron job identifier. """ def wrapper(func): @wraps(func) def wrapped_f(*args, **kwargs): sentry_sdk.init(**config.LOG_SENTRY) - with monitor(slug): + check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) + try: + log("cron decorator enter") func(*args, **kwargs) + log("cron decorator exit") + log("report success to sentry") + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) + log("report success to sentry done") + sys.exit(0) + except Exception as exc: + print_exception(exc) + log("report failture to sentry") + capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) + log("report failture to sentry done") + sys.exit(-1) + return wrapped_f return wrapper @@ -73,10 +94,15 @@ def cron_build_all_mb_caches(): """ Build all mb entity metadata cache and tables it depends on in production in appropriate databases. After building the cache, cleanup mbid_mapping table. """ + log("cron_build_all_mb_caches 1") create_mb_metadata_cache(True) + log("cron_build_all_mb_caches 2") cleanup_mbid_mapping_table() + log("cron_build_all_mb_caches 3") create_mb_artist_metadata_cache(True) + log("cron_build_all_mb_caches 4") create_mb_release_group_cache(True) + log("cron_build_all_mb_caches 5") diff --git a/mbid_mapping/mapping/mb_cache_base.py b/mbid_mapping/mapping/mb_cache_base.py index b3e5887d60..0e55fa874a 100644 --- a/mbid_mapping/mapping/mb_cache_base.py +++ b/mbid_mapping/mapping/mb_cache_base.py @@ -194,6 +194,7 @@ def create_metadata_cache(cache_cls, cache_key, required_tables, use_lb_conn: bo Arguments: use_lb_conn: whether to use LB conn or not """ + log("Entering create_metadata_cache") psycopg2.extras.register_uuid() if use_lb_conn: @@ -219,6 +220,7 @@ def create_metadata_cache(cache_cls, cache_key, required_tables, use_lb_conn: bo cache = cache_cls(mb_conn, lb_conn, unlogged=unlogged) cache.run() update_metadata_cache_timestamp(lb_conn or mb_conn, new_timestamp, cache_key) + log("exiting create_metadata_cache") def incremental_update_metadata_cache(cache_cls, cache_key, use_lb_conn: bool): diff --git a/mbid_mapping/mapping/mb_metadata_cache.py b/mbid_mapping/mapping/mb_metadata_cache.py index 69364ea315..2574b79353 100755 --- a/mbid_mapping/mapping/mb_metadata_cache.py +++ b/mbid_mapping/mapping/mb_metadata_cache.py @@ -604,12 +604,14 @@ def create_mb_metadata_cache(use_lb_conn: bool): Arguments: use_lb_conn: whether to use LB conn or not """ + log("entering create_mb_metadata_cache") create_metadata_cache( MusicBrainzMetadataCache, MB_METADATA_CACHE_TIMESTAMP_KEY, [CanonicalRecordingReleaseRedirect], use_lb_conn ) + log("exiting create_mb_metadata_cache") def incremental_update_mb_metadata_cache(use_lb_conn: bool): diff --git a/mbid_mapping/mapping/release_colors.py b/mbid_mapping/mapping/release_colors.py index bdcdf1407e..f319ba00fa 100755 --- a/mbid_mapping/mapping/release_colors.py +++ b/mbid_mapping/mapping/release_colors.py @@ -207,7 +207,7 @@ def sync_release_color_table(): log("cover art sync starting...") mb_query = """SELECT caa.id AS caa_id - , release AS release_id + , r.release AS release_id , release.gid AS release_mbid , mime_type , year @@ -216,7 +216,7 @@ def sync_release_color_table(): ON cat.id = caa.id JOIN musicbrainz.release r ON caa.release = release.id - JOIN release_first_release_date rfrd + JOIN musicbrainz.release_first_release_date rfrd ON rfrd.release = r.id WHERE type_id = 1 AND caa.id > %s @@ -258,8 +258,8 @@ def incremental_update_release_color_table(): log("cover art incremental update starting...") mb_query = """SELECT caa.id AS caa_id - , release AS release_id - , release.gid AS release_mbid + , r.id AS release_id + , r.gid AS release_mbid , mime_type , date_uploaded , year @@ -267,8 +267,8 @@ def incremental_update_release_color_table(): JOIN cover_art_archive.cover_art_type cat ON cat.id = caa.id JOIN musicbrainz.release r - ON caa.release = release.id - LEFT JOIN release_first_release_date rfrd + ON caa.release = r.id + LEFT JOIN musicbrainz.release_first_release_date rfrd ON rfrd.release = r.id WHERE type_id = 1 AND caa.date_uploaded > %s @@ -295,6 +295,9 @@ def compare_coverart(mb_query, lb_query, mb_caa_index, lb_caa_index, mb_compare_ psycopg2.connect(config.SQLALCHEMY_DATABASE_URI) as lb_conn, \ lb_conn.cursor(cursor_factory=psycopg2.extras.DictCursor) as lb_curs: + log("MB: ", config.MB_DATABASE_STANDBY_URI) + log("LB: ", config.SQLALCHEMY_DATABASE_URI) + mb_count, lb_count = get_cover_art_counts(mb_curs, lb_curs) log("CAA count: %d" % (mb_count,)) log("LB count: %d" % (lb_count,)) From 4bcb898116395577a425ab9952e80687f807ac69 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Thu, 16 Jan 2025 14:31:17 +0100 Subject: [PATCH 14/19] Finish cron job wrapper with telegram report --- {listenbrainz/misc => mbid_mapping}/cron_job.py | 12 +++++++----- mbid_mapping/docker/consul_config.py.ctmpl | 4 ++++ mbid_mapping/docker/crontab | 16 ++++++++-------- mbid_mapping/manage_cron.py | 11 +++-------- mbid_mapping/mapping/release_colors.py | 6 +++--- 5 files changed, 25 insertions(+), 24 deletions(-) rename {listenbrainz/misc => mbid_mapping}/cron_job.py (89%) diff --git a/listenbrainz/misc/cron_job.py b/mbid_mapping/cron_job.py similarity index 89% rename from listenbrainz/misc/cron_job.py rename to mbid_mapping/cron_job.py index f5a448b7d8..a616a0046c 100755 --- a/listenbrainz/misc/cron_job.py +++ b/mbid_mapping/cron_job.py @@ -8,6 +8,7 @@ from time import sleep import os +from mapping.utils import log import config LINES_IN_LOG_SNIPPET = 500 @@ -20,9 +21,9 @@ def post_telegram_message(msg): """ Post a message to the LB services Telegram channel """ for retry in range(FAILURE_REPORT_RETRIES): - r = requests.post(url="https://api.telegram.org/bot%s/sendMessage" % config["SERVICE_MONITOR_TELEGRAM_BOT_TOKEN"], + r = requests.post(url="https://api.telegram.org/bot%s/sendMessage" % config.SERVICE_MONITOR_TELEGRAM_BOT_TOKEN, data={ - 'chat_id': config["SERVICE_MONITOR_TELEGRAM_CHAT_ID"], + 'chat_id': config.SERVICE_MONITOR_TELEGRAM_CHAT_ID, 'text': msg }) if r.status_code == 200: @@ -31,7 +32,7 @@ def post_telegram_message(msg): if r.status_code in (400, 401, 403, 404, 429, 500): sleep(FAILURE_REPORT_DELAY) - sys.stderr.write("Failed to send error notification to the Telegram chat.\n") + log("Failed to send error notification to the Telegram chat.\n") def send_notification(script, return_code, stdout, stderr): @@ -113,9 +114,10 @@ def monitor_process(cmd): def main(): + log("cron job starting") args = sys.argv[1:] if not args: - sys.stderr.write("Error: Must provide one program to execute.") + log("Error: Must provide one program to execute.") sys.exit(-1) try: @@ -128,7 +130,7 @@ def main(): sys.exit(0) # We did not exit successfully, so report an error - send_notification(sys.argv[0], ret, stdout, stderr) + send_notification(" ".join(sys.argv[1:]), ret, stdout, stderr) sys.exit(ret) diff --git a/mbid_mapping/docker/consul_config.py.ctmpl b/mbid_mapping/docker/consul_config.py.ctmpl index 3409c912e3..dc2d45f0b1 100644 --- a/mbid_mapping/docker/consul_config.py.ctmpl +++ b/mbid_mapping/docker/consul_config.py.ctmpl @@ -93,3 +93,7 @@ LOG_SENTRY = { 'traces_sample_rate': {{template "KEY" "sentry/traces_sample_rate"}}, } DATASETS_SENTRY_DSN = '''{{template "KEY" "sentry/datasets_dsn"}}''' + +# For monitoring cron jobs +SERVICE_MONITOR_TELEGRAM_BOT_TOKEN = '''{{template "KEY" "service_monitor_telegram_bot_token"}}''' +SERVICE_MONITOR_TELEGRAM_CHAT_ID = '''{{template "KEY" "service_monitor_telegram_chat_id"}}''' diff --git a/mbid_mapping/docker/crontab b/mbid_mapping/docker/crontab index 894e972fe3..0279e2cbd0 100644 --- a/mbid_mapping/docker/crontab +++ b/mbid_mapping/docker/crontab @@ -1,23 +1,23 @@ # Create the mapping indexes (typesense, canonical data tables) each day at 4am -0 5 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-create-all >> /code/mapper/lb-cron.log 2>&1 +0 5 * * * listenbrainz /usr/local/bin/python /code/mapper/cron_job.py /usr/local/bin/python /code/mapper/manage.py cron cron-create-all >> /code/mapper/lb-cron.log 2>&1 # Run the huesound color sync hourly -10 * * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-update-coverart >> /code/mapper/lb-cron.log 2>&1 +10 * * * * listenbrainz /usr/local/bin/python /code/mapper/cron_job.py /usr/local/bin/python /code/mapper/manage.py cron cron-update-coverart >> /code/mapper/lb-cron.log 2>&1 # Rebuild the spotify metadata index every friday at 1 A.M. -0 1 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-spotify-metadata-index >> /code/mapper/cron-spotify-metadata-index.log 2>&1 +0 1 * * 5 listenbrainz /usr/local/bin/python /code/mapper/cron_job.py /usr/local/bin/python /code/mapper/manage.py cron cron-build-spotify-metadata-index >> /code/mapper/cron-spotify-metadata-index.log 2>&1 # Rebuild the apple music metadata index every friday at 2 A.M. -0 2 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-apple-metadata-index >> /code/mapper/cron-apple-metadata-index.log 2>&1 +0 2 * * 5 listenbrainz /usr/local/bin/python /code/mapper/cron_job.py /usr/local/bin/python /code/mapper/manage.py cron cron-build-apple-metadata-index >> /code/mapper/cron-apple-metadata-index.log 2>&1 # Rebuild the soundcloud music metadata index every friday at 3 A.M. -0 3 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-soundcloud-metadata-index >> /code/mapper/cron-soundcloud-metadata-index.log 2>&1 +0 3 * * 5 listenbrainz /usr/local/bin/python /code/mapper/cron_job.py /usr/local/bin/python /code/mapper/manage.py cron cron-build-soundcloud-metadata-index >> /code/mapper/cron-soundcloud-metadata-index.log 2>&1 # Rebuild similar tag data at 2am sundays -0 2 * * 0 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-tag-similarity >> /code/mapper/cron-tag-similarity.log 2>&1 +0 2 * * 0 listenbrainz /usr/local/bin/python /code/mapper/cron_job.py /usr/local/bin/python /code/mapper/manage.py cron cron-build-tag-similarity >> /code/mapper/cron-tag-similarity.log 2>&1 # Build the mb entity caches from scratch on first thursday of every month, MB dumps run on Wed and Sat so avoid those days -0 15 * * 4 listenbrainz [ $(date +\%d) -le 7 ] && /usr/local/bin/python /code/mapper/manage.py cron cron-build-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 +0 15 * * 4 listenbrainz [ $(date +\%d) -le 7 ] && /usr/local/bin/python /code/mapper/cron_job.py /usr/local/bin/python /code/mapper/manage.py cron cron-build-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 # Update the mb entity caches incrementally every 4 hours -0 */4 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-update-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 +0 */4 * * * listenbrainz /usr/local/bin/python /code/mapper/cron_job.py /usr/local/bin/python /code/mapper/manage.py cron cron-update-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 diff --git a/mbid_mapping/manage_cron.py b/mbid_mapping/manage_cron.py index 9f938882a2..478547ae4e 100755 --- a/mbid_mapping/manage_cron.py +++ b/mbid_mapping/manage_cron.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 from functools import wraps -from traceback import print_exception +from traceback import format_exc import sys import click @@ -38,15 +38,10 @@ def wrapped_f(*args, **kwargs): log("cron decorator enter") func(*args, **kwargs) log("cron decorator exit") - log("report success to sentry") capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) - log("report success to sentry done") - sys.exit(0) - except Exception as exc: - print_exception(exc) - log("report failture to sentry") + except Exception: + log(format_exc()) capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) - log("report failture to sentry done") sys.exit(-1) return wrapped_f diff --git a/mbid_mapping/mapping/release_colors.py b/mbid_mapping/mapping/release_colors.py index f319ba00fa..f151f42f2c 100755 --- a/mbid_mapping/mapping/release_colors.py +++ b/mbid_mapping/mapping/release_colors.py @@ -207,15 +207,15 @@ def sync_release_color_table(): log("cover art sync starting...") mb_query = """SELECT caa.id AS caa_id - , r.release AS release_id - , release.gid AS release_mbid + , r.id AS release_id + , r.gid AS release_mbid , mime_type , year FROM cover_art_archive.cover_art caa JOIN cover_art_archive.cover_art_type cat ON cat.id = caa.id JOIN musicbrainz.release r - ON caa.release = release.id + ON caa.release = r.id JOIN musicbrainz.release_first_release_date rfrd ON rfrd.release = r.id WHERE type_id = 1 From 1e13673fdc17674151db207f8917f93036f49a71 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 21 Jan 2025 11:58:58 +0100 Subject: [PATCH 15/19] Tweak cron jobs --- mbid_mapping/docker/crontab | 16 +++++------ mbid_mapping/mapping/mb_cache_base.py | 40 ++++++++++++++++----------- 2 files changed, 32 insertions(+), 24 deletions(-) diff --git a/mbid_mapping/docker/crontab b/mbid_mapping/docker/crontab index 0279e2cbd0..894e972fe3 100644 --- a/mbid_mapping/docker/crontab +++ b/mbid_mapping/docker/crontab @@ -1,23 +1,23 @@ # Create the mapping indexes (typesense, canonical data tables) each day at 4am -0 5 * * * listenbrainz /usr/local/bin/python /code/mapper/cron_job.py /usr/local/bin/python /code/mapper/manage.py cron cron-create-all >> /code/mapper/lb-cron.log 2>&1 +0 5 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-create-all >> /code/mapper/lb-cron.log 2>&1 # Run the huesound color sync hourly -10 * * * * listenbrainz /usr/local/bin/python /code/mapper/cron_job.py /usr/local/bin/python /code/mapper/manage.py cron cron-update-coverart >> /code/mapper/lb-cron.log 2>&1 +10 * * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-update-coverart >> /code/mapper/lb-cron.log 2>&1 # Rebuild the spotify metadata index every friday at 1 A.M. -0 1 * * 5 listenbrainz /usr/local/bin/python /code/mapper/cron_job.py /usr/local/bin/python /code/mapper/manage.py cron cron-build-spotify-metadata-index >> /code/mapper/cron-spotify-metadata-index.log 2>&1 +0 1 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-spotify-metadata-index >> /code/mapper/cron-spotify-metadata-index.log 2>&1 # Rebuild the apple music metadata index every friday at 2 A.M. -0 2 * * 5 listenbrainz /usr/local/bin/python /code/mapper/cron_job.py /usr/local/bin/python /code/mapper/manage.py cron cron-build-apple-metadata-index >> /code/mapper/cron-apple-metadata-index.log 2>&1 +0 2 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-apple-metadata-index >> /code/mapper/cron-apple-metadata-index.log 2>&1 # Rebuild the soundcloud music metadata index every friday at 3 A.M. -0 3 * * 5 listenbrainz /usr/local/bin/python /code/mapper/cron_job.py /usr/local/bin/python /code/mapper/manage.py cron cron-build-soundcloud-metadata-index >> /code/mapper/cron-soundcloud-metadata-index.log 2>&1 +0 3 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-soundcloud-metadata-index >> /code/mapper/cron-soundcloud-metadata-index.log 2>&1 # Rebuild similar tag data at 2am sundays -0 2 * * 0 listenbrainz /usr/local/bin/python /code/mapper/cron_job.py /usr/local/bin/python /code/mapper/manage.py cron cron-build-tag-similarity >> /code/mapper/cron-tag-similarity.log 2>&1 +0 2 * * 0 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-tag-similarity >> /code/mapper/cron-tag-similarity.log 2>&1 # Build the mb entity caches from scratch on first thursday of every month, MB dumps run on Wed and Sat so avoid those days -0 15 * * 4 listenbrainz [ $(date +\%d) -le 7 ] && /usr/local/bin/python /code/mapper/cron_job.py /usr/local/bin/python /code/mapper/manage.py cron cron-build-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 +0 15 * * 4 listenbrainz [ $(date +\%d) -le 7 ] && /usr/local/bin/python /code/mapper/manage.py cron cron-build-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 # Update the mb entity caches incrementally every 4 hours -0 */4 * * * listenbrainz /usr/local/bin/python /code/mapper/cron_job.py /usr/local/bin/python /code/mapper/manage.py cron cron-update-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 +0 */4 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-update-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 diff --git a/mbid_mapping/mapping/mb_cache_base.py b/mbid_mapping/mapping/mb_cache_base.py index 0e55fa874a..a07850759d 100644 --- a/mbid_mapping/mapping/mb_cache_base.py +++ b/mbid_mapping/mapping/mb_cache_base.py @@ -1,6 +1,6 @@ from abc import ABC from datetime import datetime - +from time import sleep from typing import List, Set import uuid @@ -204,22 +204,30 @@ def create_metadata_cache(cache_cls, cache_key, required_tables, use_lb_conn: bo mb_uri = config.MBID_MAPPING_DATABASE_URI unlogged = True - with psycopg2.connect(mb_uri) as mb_conn: - lb_conn = None - if use_lb_conn and config.SQLALCHEMY_TIMESCALE_URI: - lb_conn = psycopg2.connect(config.SQLALCHEMY_TIMESCALE_URI) + for attempt in range(3): + try: + with psycopg2.connect(mb_uri) as mb_conn: + lb_conn = None + if use_lb_conn and config.SQLALCHEMY_TIMESCALE_URI: + lb_conn = psycopg2.connect(config.SQLALCHEMY_TIMESCALE_URI) + + for table_cls in required_tables: + table = table_cls(mb_conn, lb_conn, unlogged=unlogged) + + if not table.table_exists(): + log(f"{table.table_name} table does not exist, first create the table normally") + return + + new_timestamp = datetime.now() + cache = cache_cls(mb_conn, lb_conn, unlogged=unlogged) + cache.run() + update_metadata_cache_timestamp(lb_conn or mb_conn, new_timestamp, cache_key) + break + except psycopg2.OperationalError: + log("DB connection failed. Retrying.") + sleep(5) + continue - for table_cls in required_tables: - table = table_cls(mb_conn, lb_conn, unlogged=unlogged) - - if not table.table_exists(): - log(f"{table.table_name} table does not exist, first create the table normally") - return - - new_timestamp = datetime.now() - cache = cache_cls(mb_conn, lb_conn, unlogged=unlogged) - cache.run() - update_metadata_cache_timestamp(lb_conn or mb_conn, new_timestamp, cache_key) log("exiting create_metadata_cache") From 1f26f5aee0fb1be2207a4d1980a43d0e6341bcfe Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Tue, 21 Jan 2025 16:59:05 +0100 Subject: [PATCH 16/19] Renew the DB connection, rather than attempting to use a stale connection --- mbid_mapping/mapping/mb_cache_base.py | 48 +++++++++++++-------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/mbid_mapping/mapping/mb_cache_base.py b/mbid_mapping/mapping/mb_cache_base.py index a07850759d..9ac6c5318e 100644 --- a/mbid_mapping/mapping/mb_cache_base.py +++ b/mbid_mapping/mapping/mb_cache_base.py @@ -1,6 +1,6 @@ from abc import ABC from datetime import datetime -from time import sleep + from typing import List, Set import uuid @@ -204,29 +204,29 @@ def create_metadata_cache(cache_cls, cache_key, required_tables, use_lb_conn: bo mb_uri = config.MBID_MAPPING_DATABASE_URI unlogged = True - for attempt in range(3): - try: - with psycopg2.connect(mb_uri) as mb_conn: - lb_conn = None - if use_lb_conn and config.SQLALCHEMY_TIMESCALE_URI: - lb_conn = psycopg2.connect(config.SQLALCHEMY_TIMESCALE_URI) - - for table_cls in required_tables: - table = table_cls(mb_conn, lb_conn, unlogged=unlogged) - - if not table.table_exists(): - log(f"{table.table_name} table does not exist, first create the table normally") - return - - new_timestamp = datetime.now() - cache = cache_cls(mb_conn, lb_conn, unlogged=unlogged) - cache.run() - update_metadata_cache_timestamp(lb_conn or mb_conn, new_timestamp, cache_key) - break - except psycopg2.OperationalError: - log("DB connection failed. Retrying.") - sleep(5) - continue + with psycopg2.connect(mb_uri) as mb_conn: + lb_conn = None + if use_lb_conn and config.SQLALCHEMY_TIMESCALE_URI: + lb_conn = psycopg2.connect(config.SQLALCHEMY_TIMESCALE_URI) + + for table_cls in required_tables: + table = table_cls(mb_conn, lb_conn, unlogged=unlogged) + + if not table.table_exists(): + log(f"{table.table_name} table does not exist, first create the table normally") + return + + new_timestamp = datetime.now() + cache = cache_cls(mb_conn, lb_conn, unlogged=unlogged) + cache.run() + + # the connection times out after the long process above, so start with a fresh connection + with psycopg2.connect(mb_uri) as mb_conn: + lb_conn = None + if use_lb_conn and config.SQLALCHEMY_TIMESCALE_URI: + lb_conn = psycopg2.connect(config.SQLALCHEMY_TIMESCALE_URI) + + update_metadata_cache_timestamp(lb_conn or mb_conn, new_timestamp, cache_key) log("exiting create_metadata_cache") From 7f6b254b40760fe4698bb2ddbd04101038fa73f5 Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Wed, 22 Jan 2025 18:07:33 +0100 Subject: [PATCH 17/19] Remove loads of prints --- mbid_mapping/manage_cron.py | 3 -- mbid_mapping/mapping/mb_cache_base.py | 40 ++++++++++++++--------- mbid_mapping/mapping/mb_metadata_cache.py | 3 +- 3 files changed, 26 insertions(+), 20 deletions(-) diff --git a/mbid_mapping/manage_cron.py b/mbid_mapping/manage_cron.py index 478547ae4e..3cf55968cd 100755 --- a/mbid_mapping/manage_cron.py +++ b/mbid_mapping/manage_cron.py @@ -35,12 +35,9 @@ def wrapped_f(*args, **kwargs): sentry_sdk.init(**config.LOG_SENTRY) check_in_id = capture_checkin(monitor_slug=slug, status=MonitorStatus.IN_PROGRESS) try: - log("cron decorator enter") func(*args, **kwargs) - log("cron decorator exit") capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.OK) except Exception: - log(format_exc()) capture_checkin(monitor_slug=slug, check_in_id=check_in_id, status=MonitorStatus.ERROR) sys.exit(-1) diff --git a/mbid_mapping/mapping/mb_cache_base.py b/mbid_mapping/mapping/mb_cache_base.py index 9ac6c5318e..8aa94eb639 100644 --- a/mbid_mapping/mapping/mb_cache_base.py +++ b/mbid_mapping/mapping/mb_cache_base.py @@ -194,7 +194,6 @@ def create_metadata_cache(cache_cls, cache_key, required_tables, use_lb_conn: bo Arguments: use_lb_conn: whether to use LB conn or not """ - log("Entering create_metadata_cache") psycopg2.extras.register_uuid() if use_lb_conn: @@ -204,21 +203,34 @@ def create_metadata_cache(cache_cls, cache_key, required_tables, use_lb_conn: bo mb_uri = config.MBID_MAPPING_DATABASE_URI unlogged = True - with psycopg2.connect(mb_uri) as mb_conn: - lb_conn = None - if use_lb_conn and config.SQLALCHEMY_TIMESCALE_URI: - lb_conn = psycopg2.connect(config.SQLALCHEMY_TIMESCALE_URI) + success = False + try: + with psycopg2.connect(mb_uri) as mb_conn: + # I think sqlalchemy captures tracebacks and obscures where the real problem is + try: + lb_conn = None + if use_lb_conn and config.SQLALCHEMY_TIMESCALE_URI: + lb_conn = psycopg2.connect(config.SQLALCHEMY_TIMESCALE_URI) - for table_cls in required_tables: - table = table_cls(mb_conn, lb_conn, unlogged=unlogged) + for table_cls in required_tables: + table = table_cls(mb_conn, lb_conn, unlogged=unlogged) - if not table.table_exists(): - log(f"{table.table_name} table does not exist, first create the table normally") - return + if not table.table_exists(): + log(f"{table.table_name} table does not exist, first create the table normally") + return - new_timestamp = datetime.now() - cache = cache_cls(mb_conn, lb_conn, unlogged=unlogged) - cache.run() + new_timestamp = datetime.now() + cache = cache_cls(mb_conn, lb_conn, unlogged=unlogged) + cache.run() + success = True + except Exception: + log(format_exc()) + + except psycopg2.OperationalError: + if not success: + raise() + + # Otherwise ignore the connection error, makde a new connection # the connection times out after the long process above, so start with a fresh connection with psycopg2.connect(mb_uri) as mb_conn: @@ -228,8 +240,6 @@ def create_metadata_cache(cache_cls, cache_key, required_tables, use_lb_conn: bo update_metadata_cache_timestamp(lb_conn or mb_conn, new_timestamp, cache_key) - log("exiting create_metadata_cache") - def incremental_update_metadata_cache(cache_cls, cache_key, use_lb_conn: bool): """ Update the MB metadata cache incrementally """ diff --git a/mbid_mapping/mapping/mb_metadata_cache.py b/mbid_mapping/mapping/mb_metadata_cache.py index 2574b79353..7aa9753ec8 100755 --- a/mbid_mapping/mapping/mb_metadata_cache.py +++ b/mbid_mapping/mapping/mb_metadata_cache.py @@ -604,14 +604,12 @@ def create_mb_metadata_cache(use_lb_conn: bool): Arguments: use_lb_conn: whether to use LB conn or not """ - log("entering create_mb_metadata_cache") create_metadata_cache( MusicBrainzMetadataCache, MB_METADATA_CACHE_TIMESTAMP_KEY, [CanonicalRecordingReleaseRedirect], use_lb_conn ) - log("exiting create_mb_metadata_cache") def incremental_update_mb_metadata_cache(use_lb_conn: bool): @@ -633,6 +631,7 @@ def cleanup_mbid_mapping_table(): WHERE mbc.recording_mbid = mm.recording_mbid ) """ + log("cleanup_mbid_mapping_table running") with psycopg2.connect(config.SQLALCHEMY_TIMESCALE_URI) as lb_conn, lb_conn.cursor() as lb_curs: lb_curs.execute(query) log(f"mbid mapping: invalidated {lb_curs.rowcount} rows") From 0c759c204c9cb3ecb39e78587d48cfb0ac7bbc0e Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Fri, 24 Jan 2025 12:47:27 +0100 Subject: [PATCH 18/19] Fix final cron job. Remove debugs --- mbid_mapping/docker/crontab | 4 ++++ mbid_mapping/manage_cron.py | 22 +++++++++++++--------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/mbid_mapping/docker/crontab b/mbid_mapping/docker/crontab index 894e972fe3..d7b5925759 100644 --- a/mbid_mapping/docker/crontab +++ b/mbid_mapping/docker/crontab @@ -17,7 +17,11 @@ 0 2 * * 0 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-tag-similarity >> /code/mapper/cron-tag-similarity.log 2>&1 # Build the mb entity caches from scratch on first thursday of every month, MB dumps run on Wed and Sat so avoid those days +<<<<<<< Updated upstream 0 15 * * 4 listenbrainz [ $(date +\%d) -le 7 ] && /usr/local/bin/python /code/mapper/manage.py cron cron-build-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 +======= +0 15 * * 4 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-build-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 +>>>>>>> Stashed changes # Update the mb entity caches incrementally every 4 hours 0 */4 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-update-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 diff --git a/mbid_mapping/manage_cron.py b/mbid_mapping/manage_cron.py index 3cf55968cd..dc33d88213 100755 --- a/mbid_mapping/manage_cron.py +++ b/mbid_mapping/manage_cron.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import datetime from functools import wraps from traceback import format_exc import sys @@ -86,22 +87,25 @@ def cron_build_all_mb_caches(): """ Build all mb entity metadata cache and tables it depends on in production in appropriate databases. After building the cache, cleanup mbid_mapping table. """ - log("cron_build_all_mb_caches 1") - create_mb_metadata_cache(True) - log("cron_build_all_mb_caches 2") - cleanup_mbid_mapping_table() - log("cron_build_all_mb_caches 3") - create_mb_artist_metadata_cache(True) - log("cron_build_all_mb_caches 4") - create_mb_release_group_cache(True) - log("cron_build_all_mb_caches 5") + # We only want this cron job to run the fist week of the month: + dt = datetime.datetime.now(datetime.timezone.utc) + if dt.day <= 7: + log("day %d: Running cron job" % dt.day) + create_mb_metadata_cache(True) + cleanup_mbid_mapping_table() + create_mb_artist_metadata_cache(True) + create_mb_release_group_cache(True) + + else: + log("day %d: skipping cron job" % dt.day) @cli.command() @cron("update-all-mb-caches") def cron_update_all_mb_caches(): """ Update all mb entity metadata cache in ListenBrainz. """ + update_canonical_release_data(False) incremental_update_mb_metadata_cache(True) incremental_update_mb_artist_metadata_cache(True) From bf8f8a3364743dd4ad929faf282a44cc804fd58e Mon Sep 17 00:00:00 2001 From: Robert Kaye Date: Fri, 24 Jan 2025 12:48:50 +0100 Subject: [PATCH 19/19] Add note to crontab --- mbid_mapping/docker/crontab | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/mbid_mapping/docker/crontab b/mbid_mapping/docker/crontab index d7b5925759..fa04ee41ad 100644 --- a/mbid_mapping/docker/crontab +++ b/mbid_mapping/docker/crontab @@ -17,11 +17,8 @@ 0 2 * * 0 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-tag-similarity >> /code/mapper/cron-tag-similarity.log 2>&1 # Build the mb entity caches from scratch on first thursday of every month, MB dumps run on Wed and Sat so avoid those days -<<<<<<< Updated upstream -0 15 * * 4 listenbrainz [ $(date +\%d) -le 7 ] && /usr/local/bin/python /code/mapper/manage.py cron cron-build-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 -======= +# NOTE: This cron job files every thursday, but the python script ensures that it only runs during the first week of the month 0 15 * * 4 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-build-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1 ->>>>>>> Stashed changes # Update the mb entity caches incrementally every 4 hours 0 */4 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-update-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1