feat: add a separate log for optimizations (fixes #523) (#525)

* feat: use separate python logger for optimizations This is a precursor that will allow us to log these more verbosely and to a different location than the rest of the taskgraph log messages. * fix: IndexSearch tests to pass for the right reasons Without this, we ended up passing due to entering the KeyError block in IndexSearch rather than the actual blocks we were supposed to. * test: add tests for optimization log messages * feat: always write optimization log information verbosely to a separate file This is a _little_ bit nasty because we don't have a great log configuration story in taskgraph to begin with, but it works. The `taskgraph_decision` parts are also untested because we don't have a good testing story there. I don't particularly want to shave those yaks as part of this...
taskcluster · Jun 24, 2024 · 649bba5 · 649bba5
1 parent 8ed01ce
commit 649bba5
Show file tree

Hide file tree

Showing 4 changed files with 63 additions and 8 deletions.
diff --git a/src/taskgraph/decision.py b/src/taskgraph/decision.py
@@ -74,8 +74,28 @@ def taskgraph_decision(options, parameters=None):
      * generating a set of artifacts to memorialize the graph
      * calling TaskCluster APIs to create the graph
     """
+    level = logging.INFO
     if options.get("verbose"):
-        logging.root.setLevel(logging.DEBUG)
+        level = logging.DEBUG
+
+    logging.root.setLevel(level)
+    # Handlers must have an explicit level set for them to properly filter
+    # messages from child loggers (such as the optimization log a few lines
+    # down).
+    for h in logging.root.handlers:
+        h.setLevel(level)
+
+    if not os.path.isdir(ARTIFACTS_DIR):
+        os.mkdir(ARTIFACTS_DIR)
+
+    # optimizations are difficult to debug after the fact, so we always
+    # log them at DEBUG level, and write the log as a separate artifact
+    opt_log = logging.getLogger("optimization")
+    opt_log.setLevel(logging.DEBUG)
+    opt_handler = logging.FileHandler(ARTIFACTS_DIR / "optimizations.log", mode="w")
+    if logging.root.handlers:
+        opt_handler.setFormatter(logging.root.handlers[0].formatter)
+    opt_log.addHandler(opt_handler)
 
     parameters = parameters or (
         lambda graph_config: get_decision_parameters(graph_config, options)

diff --git a/src/taskgraph/optimize/base.py b/src/taskgraph/optimize/base.py
@@ -24,7 +24,7 @@
 from taskgraph.util.python_path import import_sibling_modules
 from taskgraph.util.taskcluster import find_task_id_batched, status_task_batched
 
-logger = logging.getLogger(__name__)
+logger = logging.getLogger("optimization")
 registry = {}
 
 

diff --git a/src/taskgraph/optimize/strategies.py b/src/taskgraph/optimize/strategies.py
@@ -5,7 +5,7 @@
 from taskgraph.util.path import match as match_path
 from taskgraph.util.taskcluster import find_task_id, status_task
 
-logger = logging.getLogger(__name__)
+logger = logging.getLogger("optimization")
 
 
 @register_strategy("index-search")

diff --git a/test/test_optimize_strategies.py b/test/test_optimize_strategies.py
@@ -1,6 +1,7 @@
 # Any copyright is dedicated to the public domain.
 # http://creativecommons.org/publicdomain/zero/1.0/
 
+import logging
 import os
 from datetime import datetime
 from time import mktime
@@ -24,27 +25,37 @@ def params():
 
 
 @pytest.mark.parametrize(
-    "state,expires,expected",
+    "state,expires,expected,logs",
     (
         (
             "completed",
             "2021-06-06T14:53:16.937Z",
             False,
+            (
+                "not replacing {label} with {taskid} because it expires before {deadline}",
+            ),
         ),
-        ("completed", "2021-06-08T14:53:16.937Z", "abc"),
+        ("completed", "2021-06-08T14:53:16.937Z", "abc", ()),
         (
             "exception",
             "2021-06-08T14:53:16.937Z",
             False,
+            (
+                "not replacing {label} with {taskid} because it is in failed or exception state",
+            ),
         ),
         (
             "failed",
             "2021-06-08T14:53:16.937Z",
             False,
+            (
+                "not replacing {label} with {taskid} because it is in failed or exception state",
+            ),
         ),
     ),
 )
-def test_index_search(responses, params, state, expires, expected):
+def test_index_search(caplog, responses, params, state, expires, expected, logs):
+    caplog.set_level(logging.DEBUG, "optimization")
     taskid = "abc"
     index_path = "foo.bar.latest"
 
@@ -79,13 +90,27 @@ def test_index_search(responses, params, state, expires, expected):
     deadline = "2021-06-07T19:03:20.482Z"
     assert (
         opt.should_replace_task(
-            {}, params, deadline, ([index_path], label_to_taskid, taskid_to_status)
+            {"label": "task-label"},
+            params,
+            deadline,
+            ([index_path], label_to_taskid, taskid_to_status),
         )
         == expected
     )
     # test the non-batched variant as well
     assert opt.should_replace_task({}, params, deadline, [index_path]) == expected
 
+    if logs:
+        log_records = [
+            (
+                "optimization",
+                logging.DEBUG,
+                m.format(label="task-label", taskid=taskid, deadline=deadline),
+            )
+            for m in logs
+        ]
+        assert caplog.record_tuples == log_records
+
 
 @pytest.mark.parametrize(
     "params,file_patterns,should_optimize",
@@ -111,8 +136,18 @@ def test_index_search(responses, params, state, expires, expected):
         ),
     ),
 )
-def test_skip_unless_changed(params, file_patterns, should_optimize):
+def test_skip_unless_changed(caplog, params, file_patterns, should_optimize):
+    caplog.set_level(logging.DEBUG, "optimization")
     task = make_task("task")
 
     opt = SkipUnlessChanged()
     assert opt.should_remove_task(task, params, file_patterns) == should_optimize
+
+    if should_optimize:
+        assert caplog.record_tuples == [
+            (
+                "optimization",
+                logging.DEBUG,
+                f'no files found matching a pattern in `skip-unless-changed` for "{task.label}"',
+            ),
+        ]