From 649bba50850dd4c89ad59949ff1be1f36e05f083 Mon Sep 17 00:00:00 2001
From: "Ben Hearsum (he/him)" <ben@mozilla.com>
Date: Sun, 23 Jun 2024 21:13:26 -0400
Subject: [PATCH] feat: add a separate log for optimizations (fixes #523)
 (#525)

* feat: use separate python logger for optimizations

This is a precursor that will allow us to log these more verbosely and to a different location than the rest of the taskgraph log messages.

* fix: IndexSearch tests to pass for the right reasons

Without this, we ended up passing due to entering the KeyError block in IndexSearch rather than the actual blocks we were supposed to.

* test: add tests for optimization log messages

* feat: always write optimization log information verbosely to a separate file

This is a _little_ bit nasty because we don't have a great log configuration story in taskgraph to begin with, but it works. The `taskgraph_decision` parts are also untested because we don't have a good testing story there. I don't particularly want to shave those yaks as part of this...
---
 src/taskgraph/decision.py            | 22 +++++++++++++-
 src/taskgraph/optimize/base.py       |  2 +-
 src/taskgraph/optimize/strategies.py |  2 +-
 test/test_optimize_strategies.py     | 45 ++++++++++++++++++++++++----
 4 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/src/taskgraph/decision.py b/src/taskgraph/decision.py
index 6a37308b..5398bd70 100644
--- a/src/taskgraph/decision.py
+++ b/src/taskgraph/decision.py
@@ -74,8 +74,28 @@ def taskgraph_decision(options, parameters=None):
      * generating a set of artifacts to memorialize the graph
      * calling TaskCluster APIs to create the graph
     """
+    level = logging.INFO
     if options.get("verbose"):
-        logging.root.setLevel(logging.DEBUG)
+        level = logging.DEBUG
+
+    logging.root.setLevel(level)
+    # Handlers must have an explicit level set for them to properly filter
+    # messages from child loggers (such as the optimization log a few lines
+    # down).
+    for h in logging.root.handlers:
+        h.setLevel(level)
+
+    if not os.path.isdir(ARTIFACTS_DIR):
+        os.mkdir(ARTIFACTS_DIR)
+
+    # optimizations are difficult to debug after the fact, so we always
+    # log them at DEBUG level, and write the log as a separate artifact
+    opt_log = logging.getLogger("optimization")
+    opt_log.setLevel(logging.DEBUG)
+    opt_handler = logging.FileHandler(ARTIFACTS_DIR / "optimizations.log", mode="w")
+    if logging.root.handlers:
+        opt_handler.setFormatter(logging.root.handlers[0].formatter)
+    opt_log.addHandler(opt_handler)
 
     parameters = parameters or (
         lambda graph_config: get_decision_parameters(graph_config, options)
diff --git a/src/taskgraph/optimize/base.py b/src/taskgraph/optimize/base.py
index 917ab82a..f914e71a 100644
--- a/src/taskgraph/optimize/base.py
+++ b/src/taskgraph/optimize/base.py
@@ -24,7 +24,7 @@
 from taskgraph.util.python_path import import_sibling_modules
 from taskgraph.util.taskcluster import find_task_id_batched, status_task_batched
 
-logger = logging.getLogger(__name__)
+logger = logging.getLogger("optimization")
 registry = {}
 
 
diff --git a/src/taskgraph/optimize/strategies.py b/src/taskgraph/optimize/strategies.py
index 621e28aa..2f436c52 100644
--- a/src/taskgraph/optimize/strategies.py
+++ b/src/taskgraph/optimize/strategies.py
@@ -5,7 +5,7 @@
 from taskgraph.util.path import match as match_path
 from taskgraph.util.taskcluster import find_task_id, status_task
 
-logger = logging.getLogger(__name__)
+logger = logging.getLogger("optimization")
 
 
 @register_strategy("index-search")
diff --git a/test/test_optimize_strategies.py b/test/test_optimize_strategies.py
index 9cd408c3..2203e984 100644
--- a/test/test_optimize_strategies.py
+++ b/test/test_optimize_strategies.py
@@ -1,6 +1,7 @@
 # Any copyright is dedicated to the public domain.
 # http://creativecommons.org/publicdomain/zero/1.0/
 
+import logging
 import os
 from datetime import datetime
 from time import mktime
@@ -24,27 +25,37 @@ def params():
 
 
 @pytest.mark.parametrize(
-    "state,expires,expected",
+    "state,expires,expected,logs",
     (
         (
             "completed",
             "2021-06-06T14:53:16.937Z",
             False,
+            (
+                "not replacing {label} with {taskid} because it expires before {deadline}",
+            ),
         ),
-        ("completed", "2021-06-08T14:53:16.937Z", "abc"),
+        ("completed", "2021-06-08T14:53:16.937Z", "abc", ()),
         (
             "exception",
             "2021-06-08T14:53:16.937Z",
             False,
+            (
+                "not replacing {label} with {taskid} because it is in failed or exception state",
+            ),
         ),
         (
             "failed",
             "2021-06-08T14:53:16.937Z",
             False,
+            (
+                "not replacing {label} with {taskid} because it is in failed or exception state",
+            ),
         ),
     ),
 )
-def test_index_search(responses, params, state, expires, expected):
+def test_index_search(caplog, responses, params, state, expires, expected, logs):
+    caplog.set_level(logging.DEBUG, "optimization")
     taskid = "abc"
     index_path = "foo.bar.latest"
 
@@ -79,13 +90,27 @@ def test_index_search(responses, params, state, expires, expected):
     deadline = "2021-06-07T19:03:20.482Z"
     assert (
         opt.should_replace_task(
-            {}, params, deadline, ([index_path], label_to_taskid, taskid_to_status)
+            {"label": "task-label"},
+            params,
+            deadline,
+            ([index_path], label_to_taskid, taskid_to_status),
         )
         == expected
     )
     # test the non-batched variant as well
     assert opt.should_replace_task({}, params, deadline, [index_path]) == expected
 
+    if logs:
+        log_records = [
+            (
+                "optimization",
+                logging.DEBUG,
+                m.format(label="task-label", taskid=taskid, deadline=deadline),
+            )
+            for m in logs
+        ]
+        assert caplog.record_tuples == log_records
+
 
 @pytest.mark.parametrize(
     "params,file_patterns,should_optimize",
@@ -111,8 +136,18 @@ def test_index_search(responses, params, state, expires, expected):
         ),
     ),
 )
-def test_skip_unless_changed(params, file_patterns, should_optimize):
+def test_skip_unless_changed(caplog, params, file_patterns, should_optimize):
+    caplog.set_level(logging.DEBUG, "optimization")
     task = make_task("task")
 
     opt = SkipUnlessChanged()
     assert opt.should_remove_task(task, params, file_patterns) == should_optimize
+
+    if should_optimize:
+        assert caplog.record_tuples == [
+            (
+                "optimization",
+                logging.DEBUG,
+                f'no files found matching a pattern in `skip-unless-changed` for "{task.label}"',
+            ),
+        ]