Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sentry cron monitoring #3090

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
50bdabd
Add sentry cron job monitoring for release color script
mayhem Dec 4, 2024
e657ca9
Undo accidental commit
mayhem Dec 4, 2024
88f00ba
Merge branch 'master' into add-sentry-cron-monitoring
mayhem Dec 19, 2024
80b2e92
Interim check-in
mayhem Dec 20, 2024
d4bb724
Interim checkin
mayhem Dec 20, 2024
ded3db4
Move cron to a submenu to avoid accidental user invocation
mayhem Dec 20, 2024
5d876d3
Use decorators
mayhem Dec 20, 2024
20d9144
Removed cron_wrappers.py
mayhem Dec 20, 2024
95615f1
Merge branch 'master' into add-sentry-cron-monitoring
mayhem Dec 23, 2024
ed9749c
minor stylistic changes, improve decorator, update to latest sentry_sdk
amCap1712 Dec 23, 2024
e7c7667
Merge branch 'add-sentry-cron-monitoring' of github.com:metabrainz/li…
mayhem Dec 23, 2024
52b56fb
Cleanup tables
mayhem Jan 6, 2025
65a766a
Add IF EXISTS
mayhem Jan 9, 2025
1cd5f88
Hopefully schedule cron jobs to no longer conflict. Fix tag and spotify
mayhem Jan 10, 2025
85c1f33
Merge branch 'master' into add-sentry-cron-monitoring
mayhem Jan 14, 2025
20a7dfb
Move the cron_job wrapper over
mayhem Jan 14, 2025
797c666
Various improvements, fix CAA sync
mayhem Jan 15, 2025
4bcb898
Finish cron job wrapper with telegram report
mayhem Jan 16, 2025
1e13673
Tweak cron jobs
mayhem Jan 21, 2025
1f26f5a
Renew the DB connection, rather than attempting to use a stale
mayhem Jan 21, 2025
7f6b254
Remove loads of prints
mayhem Jan 22, 2025
0c759c2
Fix final cron job. Remove debugs
mayhem Jan 24, 2025
bf8f8a3
Add note to crontab
mayhem Jan 24, 2025
2d05cb4
Merge branch 'master' into add-sentry-cron-monitoring
mayhem Jan 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions listenbrainz/config.py.sample
Original file line number Diff line number Diff line change
Expand Up @@ -212,3 +212,7 @@ REJECT_NEW_USERS_WITHOUT_EMAIL = False

# base directory for user data exports
USER_DATA_EXPORT_BASE_DIR = "/code/listenbrainz/exports/"

# Service monitoring -- only needed for MetaBrainz production
SERVICE_MONITOR_TELEGRAM_BOT_TOKEN = ""
SERVICE_MONITOR_TELEGRAM_CHAT_ID = ""
4 changes: 4 additions & 0 deletions mbid_mapping/config.py.sample
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,7 @@ REDIS_NAMESPACE = "listenbrainz"

# For debugging, only fetches a tiny portion of the data if True
USE_MINIMAL_DATASET = True

# Service monitoring -- only needed for MetaBrainz production
SERVICE_MONITOR_TELEGRAM_BOT_TOKEN = ""
SERVICE_MONITOR_TELEGRAM_CHAT_ID = ""
138 changes: 138 additions & 0 deletions mbid_mapping/cron_job.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
#!/usr/bin/env python3

from collections import deque
import contextlib
import requests
import subprocess
import sys
from time import sleep
import os

from mapping.utils import log
import config

LINES_IN_LOG_SNIPPET = 500

FAILURE_REPORT_RETRIES = 20
FAILURE_REPORT_DELAY = 5 # in seconds


def post_telegram_message(msg):
""" Post a message to the LB services Telegram channel """

for retry in range(FAILURE_REPORT_RETRIES):
r = requests.post(url="https://api.telegram.org/bot%s/sendMessage" % config.SERVICE_MONITOR_TELEGRAM_BOT_TOKEN,
data={
'chat_id': config.SERVICE_MONITOR_TELEGRAM_CHAT_ID,
'text': msg
})
if r.status_code == 200:
return

if r.status_code in (400, 401, 403, 404, 429, 500):
sleep(FAILURE_REPORT_DELAY)

log("Failed to send error notification to the Telegram chat.\n")


def send_notification(script, return_code, stdout, stderr):
""" Format the logs into a single text message and send it """

msg = "script %s failed with error code %d:\n" % (script, return_code)
msg += "STDOUT\n"
msg += "\n".join(stdout)
msg += "\n\n"
if stderr:
msg += "STDERR\n"
msg += "\n".join(stderr)
msg += "\n\n"

post_telegram_message(msg)


def monitor(proc):
""" Monitor a process by making the stdout/stderr non-blocking files. Continually read
and save the stdout/stderr output, keeping only the last LINES_IN_LOG_SNIPPET lines
of output of both. Once the called process terminates, return both stdout and stderr
logs """

newlines = ['\n', '\r\n', '\r']
stdout = getattr(proc, "stdout")
os.set_blocking(stdout.fileno(), False)
stderr = getattr(proc, "stderr")
os.set_blocking(stderr.fileno(), False)

log_stdout = deque(maxlen=LINES_IN_LOG_SNIPPET)
log_stderr = deque(maxlen=LINES_IN_LOG_SNIPPET)

with contextlib.closing(stdout):
with contextlib.closing(stderr):
stdout_line = ""
stderr_line = ""
while True:
if proc.poll() is not None:
return list(log_stdout), list(log_stderr)

# Process stdout
ch = stdout.read(1)
if ch == "":
continue

if ch in newlines:
sys.stdout.write(stdout_line + ch)
log_stdout.append(stdout_line)
stdout_line = ""
continue

stdout_line += ch

# Process stderr
ch = stderr.read(1)
if ch == "":
continue

if ch in newlines:
sys.stdout.write(stderr_line + ch)
log_stderr.append(stderr_line)
stderr_line = ""
continue

stderr_line += ch


def monitor_process(cmd):
""" Call Popen to start monitoring a process, then monitor the proceess with the monitor method """

proc = subprocess.Popen(
cmd,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
universal_newlines=True,
)
stdout, stderr = monitor(proc)
return proc.returncode, stdout, stderr


def main():
log("cron job starting")
args = sys.argv[1:]
if not args:
log("Error: Must provide one program to execute.")
sys.exit(-1)

try:
ret, stdout, stderr = monitor_process(args)
except KeyboardInterrupt:
sys.exit(-1)

if ret == 0:
# All went well, lets leave!
sys.exit(0)

# We did not exit successfully, so report an error
send_notification(" ".join(sys.argv[1:]), ret, stdout, stderr)
sys.exit(ret)


if __name__ == "__main__":
main()
15 changes: 15 additions & 0 deletions mbid_mapping/docker/consul_config.py.ctmpl
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os

{{- define "KEY" -}}
{{ key (printf "docker-server-configs/LB/config.%s.json/%s" (env "DEPLOY_ENV") .) }}
{{- end -}}
Expand Down Expand Up @@ -82,3 +84,16 @@ TYPESENSE_API_KEY = '''{{template "KEY" "typesense_api_key"}}'''

# For debugging, only fetches a tiny portion of the data if True
USE_MINIMAL_DATASET = False

# Sentry config
LOG_SENTRY = {
'dsn': '''{{template "KEY" "sentry/dsn"}}''',
'environment': '''{{template "KEY" "sentry/environment"}}''',
'release': os.getenv('GIT_SHA', None),
'traces_sample_rate': {{template "KEY" "sentry/traces_sample_rate"}},
}
DATASETS_SENTRY_DSN = '''{{template "KEY" "sentry/datasets_dsn"}}'''

# For monitoring cron jobs
SERVICE_MONITOR_TELEGRAM_BOT_TOKEN = '''{{template "KEY" "service_monitor_telegram_bot_token"}}'''
SERVICE_MONITOR_TELEGRAM_CHAT_ID = '''{{template "KEY" "service_monitor_telegram_chat_id"}}'''
17 changes: 9 additions & 8 deletions mbid_mapping/docker/crontab
Original file line number Diff line number Diff line change
@@ -1,23 +1,24 @@
# Create the mapping indexes (typesense, canonical data tables) each day at 4am
0 4 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py create-all >> /code/mapper/lb-cron.log 2>&1
0 5 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-create-all >> /code/mapper/lb-cron.log 2>&1

# Run the huesound color sync hourly
10 * * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py update-coverart >> /code/mapper/lb-cron.log 2>&1
10 * * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-update-coverart >> /code/mapper/lb-cron.log 2>&1

# Rebuild the spotify metadata index every friday at 1 A.M.
0 1 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py build-spotify-metadata-index >> /code/mapper/cron-spotify-metadata-index.log 2>&1
0 1 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-spotify-metadata-index >> /code/mapper/cron-spotify-metadata-index.log 2>&1

# Rebuild the apple music metadata index every friday at 2 A.M.
0 2 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py build-apple-metadata-index >> /code/mapper/cron-apple-metadata-index.log 2>&1
0 2 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-apple-metadata-index >> /code/mapper/cron-apple-metadata-index.log 2>&1

# Rebuild the soundcloud music metadata index every friday at 3 A.M.
0 3 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py build-soundcloud-metadata-index >> /code/mapper/cron-soundcloud-metadata-index.log 2>&1
0 3 * * 5 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-soundcloud-metadata-index >> /code/mapper/cron-soundcloud-metadata-index.log 2>&1

# Rebuild similar tag data at 2am sundays
0 2 * * 0 listenbrainz /usr/local/bin/python /code/mapper/manage.py build-tag-similarity >> /code/mapper/cron-tag-similarity.log 2>&1
0 2 * * 0 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-build-tag-similarity >> /code/mapper/cron-tag-similarity.log 2>&1

# Build the mb entity caches from scratch on first thursday of every month, MB dumps run on Wed and Sat so avoid those days
0 15 * * 4 listenbrainz [ $(date +\%d) -le 7 ] && /usr/local/bin/python /code/mapper/manage.py cron-build-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1
# NOTE: This cron job files every thursday, but the python script ensures that it only runs during the first week of the month
0 15 * * 4 listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-build-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1

# Update the mb entity caches incrementally every 4 hours
0 */4 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron-update-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1
0 */4 * * * listenbrainz /usr/local/bin/python /code/mapper/manage.py cron cron-update-all-mb-caches >> /code/mapper/cron-mb-entity-cache.log 2>&1
63 changes: 9 additions & 54 deletions mbid_mapping/manage.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from mapping.typesense_index import build_all as action_build_index
from mapping.mapping_test.mapping_test import test_mapping as action_test_mapping
from mapping.utils import log, CRON_LOG_FILE
from mapping.release_colors import sync_release_color_table, incremental_update_release_color_table
from mapping.release_colors import sync_release_color_table
from reports.tracks_of_the_year import calculate_tracks_of_the_year
from reports.top_discoveries import calculate_top_discoveries
from mapping.mb_metadata_cache import create_mb_metadata_cache, incremental_update_mb_metadata_cache, \
Expand All @@ -23,14 +23,19 @@
from mapping.spotify_metadata_index import create_spotify_metadata_index
from mapping.apple_metadata_index import create_apple_metadata_index
from similar.tag_similarity import create_tag_similarity

from manage_cron import cli as cron_cli


@click.group()
def cli():
pass


# Add the "cron" submenu
cron_cli.short_help = "cron jobs -- do not invoke manually."
cli.add_command(cron_cli, name="cron")


@cli.command()
def create_all():
"""
Expand All @@ -57,6 +62,7 @@ def update_canonical_releases(use_lb_conn):
"""
update_canonical_release_data(use_lb_conn)


@cli.command()
def test_mapping():
"""
Expand All @@ -81,26 +87,6 @@ def sync_coverart():
sync_release_color_table()


@cli.command()
def update_coverart():
"""
Update the release_color table incrementally. Designed to be called hourly by cron.
"""
incremental_update_release_color_table()


@cli.command()
def cron_log():
"""
Print the internal cron log file for debugging purposes.
"""
if os.path.exists(CRON_LOG_FILE):
log("Current cron job log file:")
subprocess.run(["cat", CRON_LOG_FILE])
else:
log("Log file is empty")


@cli.command()
@click.argument('year', type=int)
def top_discoveries(year):
Expand Down Expand Up @@ -175,38 +161,6 @@ def update_mb_artist_metadata_cache(use_lb_conn):
incremental_update_mb_artist_metadata_cache(use_lb_conn)


@cli.command()
def cron_build_mb_metadata_cache():
""" Build the mb metadata cache and tables it depends on in production in appropriate databases.
After building the cache, cleanup mbid_mapping table.
"""
create_mb_metadata_cache(True)
cleanup_mbid_mapping_table()


@cli.command()
@click.pass_context
def cron_build_all_mb_caches(ctx):
""" Build all mb entity metadata cache and tables it depends on in production in appropriate
databases. After building the cache, cleanup mbid_mapping table.
"""
ctx.invoke(cron_build_mb_metadata_cache)
ctx.invoke(build_mb_artist_metadata_cache)
ctx.invoke(build_mb_release_group_cache)


@cli.command()
@click.pass_context
def cron_update_all_mb_caches(ctx):
""" Update all mb entity metadata cache in ListenBrainz. """

# In this context we want to use mb_conn, not lb_conn, like the functions that follow
update_canonical_release_data(False)
ctx.invoke(update_mb_metadata_cache)
ctx.invoke(update_mb_artist_metadata_cache)
ctx.invoke(update_mb_release_group_cache)


@cli.command()
@click.option("--use-lb-conn/--use-mb-conn", default=True, help="whether to create the tables in LB or MB")
def build_spotify_metadata_index(use_lb_conn):
Expand All @@ -233,6 +187,7 @@ def build_soundcloud_metadata_index(use_lb_conn):
"""
create_soundcloud_metadata_index(use_lb_conn)


@cli.command()
def build_tag_similarity():
"""
Expand Down
Loading
Loading