From 9674fe7833ee76f36c98db20a175388f5a5fd2a7 Mon Sep 17 00:00:00 2001
From: Paul Yang <github@ardorful.com>
Date: Mon, 23 Sep 2024 19:42:00 -0700
Subject: [PATCH] Add `LazyFormat` to reduce logging overhead on performance
 (#1419)

For larger semantic manifests / queries, the overhead of formatting objects
for logging / writing log lines can be significant. This adds a
`LazyFormat` class can be used with the existing logger so that log
messages at the debug level don't get formatted nor written when the
configured log level would exclude it. Using this, a quick way to
migrate the code base would be a replacement like the following:

```
    Example:
        logger.debug(f"Result is: {expensive_function()}")
        ->
        logger.debug(LazyFormat(lambda: f"Result is: {expensive_function()}")
```
---
 .../formatting/formatting_helpers.py          | 27 ++++++++
 .../mf_logging/lazy_formattable.py            | 59 ++++++++++++++++
 .../mf_logging/recorded_logging_context.py    | 51 ++++++++++++++
 .../mf_logging/test_lazy_format.py            | 68 +++++++++++++++++++
 4 files changed, 205 insertions(+)
 create mode 100644 metricflow-semantics/metricflow_semantics/formatting/formatting_helpers.py
 create mode 100644 metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py
 create mode 100644 metricflow-semantics/tests_metricflow_semantics/mf_logging/recorded_logging_context.py
 create mode 100644 metricflow-semantics/tests_metricflow_semantics/mf_logging/test_lazy_format.py

diff --git a/metricflow-semantics/metricflow_semantics/formatting/formatting_helpers.py b/metricflow-semantics/metricflow_semantics/formatting/formatting_helpers.py
new file mode 100644
index 0000000000..33a0fb36dd
--- /dev/null
+++ b/metricflow-semantics/metricflow_semantics/formatting/formatting_helpers.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+import textwrap
+
+
+def mf_dedent(text: str) -> str:
+    """Remove leading newlines, dedents, and remove tailing newlines.
+
+    This function simplifies the somewhat-frequently used:
+
+        text = textwrap.dedent(
+            [triple quote][backslash]
+            Line 0
+            Line 1
+            [triple quote]
+        ).rstrip()
+
+    to:
+
+       text = mf_dedent(
+           [triple quote]
+           Line 0
+           Line 1
+           [triple quote]
+       )
+    """
+    return textwrap.dedent(text.lstrip("\n")).rstrip("\n")
diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py b/metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py
new file mode 100644
index 0000000000..3de7e3710a
--- /dev/null
+++ b/metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py
@@ -0,0 +1,59 @@
+from __future__ import annotations
+
+from functools import cached_property
+from typing import Callable, Union
+
+from typing_extensions import override
+
+from metricflow_semantics.mf_logging.pretty_print import mf_pformat_many
+
+
+class LazyFormat:
+    """Lazily formats the given objects into a string representation for logging.
+
+    The formatting is done when this object is converted to a string to be compatible with the standard `logging`
+    library. This is done lazily as the formatting operation can be expensive and allows debug log statements to not
+    incur a performance overhead in production.
+
+    Example:
+        logger.debug(LazyFormat("Found path", start_point=point_0, end_point=point_1))
+
+        ->
+
+        DEBUG - Found path
+            start_point: Point(x=0.0, y=0.0)
+            end_point: Point(x=1.0, y=1.0)
+
+    To aid migration of existing log statements, the message can be a function so that f-strings can be lazily
+    evaluated as well. This style will be deprecated as log statements are updated.
+
+    Example:
+        logger.debug(f"Result is: {expensive_function()}")
+
+        ->
+
+        logger.debug(LazyFormat(lambda: f"Result is: {expensive_function()}")
+    """
+
+    def __init__(self, message: Union[str, Callable[[], str]], **kwargs) -> None:  # type: ignore[no-untyped-def]
+        """Initializer.
+
+        Args:
+            message: The message or a function that returns a message.
+            **kwargs: A dictionary of objects to format to text when `__str__` is called on this object.
+        """
+        self._message = message
+        self._kwargs = kwargs
+
+    @cached_property
+    def _str_value(self) -> str:
+        """Cache the result as `__str__` can be called multiple times for multiple log handlers."""
+        if callable(self._message):
+            message = self._message()
+        else:
+            message = self._message
+        return mf_pformat_many(message, self._kwargs, preserve_raw_strings=True)
+
+    @override
+    def __str__(self) -> str:
+        return self._str_value
diff --git a/metricflow-semantics/tests_metricflow_semantics/mf_logging/recorded_logging_context.py b/metricflow-semantics/tests_metricflow_semantics/mf_logging/recorded_logging_context.py
new file mode 100644
index 0000000000..8858bfbffc
--- /dev/null
+++ b/metricflow-semantics/tests_metricflow_semantics/mf_logging/recorded_logging_context.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+import logging
+from contextlib import contextmanager
+from typing import Iterator, List, Optional, Sequence, Tuple
+
+from typing_extensions import override
+
+
+class RecordingLogHandler(logging.Handler):
+    """A log-record handler that stores them so that they can be checked in tests."""
+
+    def __init__(self) -> None:  # noqa: D107
+        super().__init__()
+        self._log_records: List[logging.LogRecord] = []
+
+    @override
+    def emit(self, record: logging.LogRecord) -> None:
+        self._log_records.append(record)
+
+    @property
+    def log_records(self) -> Sequence[logging.LogRecord]:
+        """Return the log records seen by the handler."""
+        return self._log_records
+
+    def get_last_message(self) -> Optional[str]:
+        """Return the message in the last log record, or None if this hasn't seen any."""
+        if len(self._log_records) == 0:
+            return None
+
+        return self._log_records[-1].message
+
+
+@contextmanager
+def recorded_logging_context(
+    logger: logging.Logger, logging_level_int: int
+) -> Iterator[Tuple[logging.Logger, RecordingLogHandler]]:
+    """Context with a logger (with the given log level) and associated handler to check what was logged.
+
+    The handler records all log records emitted that is appropriate for the given level during this context.
+    Log propagation could be disabled in this context to clean test log output, but some issues need to be resolved.
+    """
+    previous_logging_level = logger.level
+    handler = RecordingLogHandler()
+    logger.addHandler(handler)
+    try:
+        logger.setLevel(logging_level_int)
+        yield logger, handler
+    finally:
+        logger.removeHandler(handler)
+        logger.setLevel(previous_logging_level)
diff --git a/metricflow-semantics/tests_metricflow_semantics/mf_logging/test_lazy_format.py b/metricflow-semantics/tests_metricflow_semantics/mf_logging/test_lazy_format.py
new file mode 100644
index 0000000000..105d143ec7
--- /dev/null
+++ b/metricflow-semantics/tests_metricflow_semantics/mf_logging/test_lazy_format.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+import logging
+
+from metricflow_semantics.formatting.formatting_helpers import mf_dedent
+from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat
+from tests_metricflow_semantics.mf_logging.recorded_logging_context import RecordingLogHandler, recorded_logging_context
+from typing_extensions import override
+
+logger = logging.getLogger(__name__)
+
+
+def test_log_kwargs() -> None:
+    """Test that objects included via keyword args are formatted."""
+    recorded_logger: logging.Logger
+    handler: RecordingLogHandler
+    with recorded_logging_context(logger, logging.DEBUG) as (recorded_logger, handler):
+        recorded_logger.debug(
+            LazyFormat("Found candidates.", matches=[1, 2, 3], parameters={"field_0": "value_0", "field_1": "value_1"})
+        )
+        assert handler.get_last_message() == mf_dedent(
+            """
+            Found candidates.
+              matches: [1, 2, 3]
+              parameters: {'field_0': 'value_0', 'field_1': 'value_1'}
+            """
+        )
+
+
+def test_lambda() -> None:
+    """Test that a lambda that is passed in is called."""
+    example_message = "Example message."
+
+    recorded_logger: logging.Logger
+    handler: RecordingLogHandler
+    with recorded_logging_context(logger, logging.DEBUG) as (recorded_logger, handler):
+        recorded_logger.debug(LazyFormat(lambda: example_message))
+        assert handler.get_last_message() == example_message
+
+
+def test_lazy_object() -> None:
+    """Test that formatting of objects are done lazily and not when the logging level is not appropriate."""
+    assertion_message = "This should not have been formatted as a string."
+
+    class _NotFormattedObject:
+        @override
+        def __repr__(self) -> str:
+            raise AssertionError(assertion_message)
+
+    # Logging level is INFO, so DEBUG messages shouldn't be logged / formatted.
+    recorded_logger: logging.Logger
+    handler: RecordingLogHandler
+    with recorded_logging_context(logger, logging.INFO) as (recorded_logger, handler):
+        recorded_logger.debug(LazyFormat("Found candidates", should_not_be_formatted=_NotFormattedObject()))
+
+
+def test_lazy_lambda() -> None:
+    """Test that a lambda input is not evaluated when the logging level is not appropriate."""
+    assertion_message = "This should not have been formatted as a string."
+
+    def _should_not_be_called() -> str:
+        raise AssertionError(assertion_message)
+
+    # Logging level is INFO, so DEBUG messages shouldn't be logged / formatted.
+    recorded_logger: logging.Logger
+    handler: RecordingLogHandler
+    with recorded_logging_context(logger, logging.INFO) as (recorded_logger, handler):
+        recorded_logger.debug(LazyFormat(lambda: f"{_should_not_be_called()}"))