/* PR_START p--short-term-perf 02 */ Add LazyFormat for logging.

dbt-labs · Sep 23, 2024 · 390d0e5 · 390d0e5
1 parent e66b84a
commit 390d0e5
Show file tree

Hide file tree

Showing 4 changed files with 202 additions and 0 deletions.
diff --git a/metricflow-semantics/metricflow_semantics/formatting/formatting_helpers.py b/metricflow-semantics/metricflow_semantics/formatting/formatting_helpers.py
@@ -0,0 +1,27 @@
+from __future__ import annotations
+
+import textwrap
+
+
+def mf_dedent(text: str) -> str:
+    """Remove leading newlines, dedents, and remove tailing newlines.
+
+    This function simplifies the somewhat-frequently used:
+
+        text = textwrap.dedent(
+            [triple quote][backslash]
+            Line 0
+            Line 1
+            [triple quote]
+        ).rstrip()
+
+    to:
+
+       text = mf_dedent(
+           [triple quote]
+           Line 0
+           Line 1
+           [triple quote]
+       )
+    """
+    return textwrap.dedent(text.lstrip("\n")).rstrip("\n")
diff --git a/metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py b/metricflow-semantics/metricflow_semantics/mf_logging/lazy_formattable.py
@@ -0,0 +1,56 @@
+from __future__ import annotations
+
+from functools import cached_property
+from typing import Callable, Union, override
+
+from metricflow_semantics.mf_logging.pretty_print import mf_pformat_many
+
+
+class LazyFormat:
+    """Lazily formats the given objects into a string representation for logging.
+
+    The formatting is done when this object is converted to a string to be compatible with the standard `logging`
+    library. This is done lazily as the formatting operation can be expensive and allows debug log statements to not
+    incur a performance overhead in production.
+
+    Example:
+        logger.debug(LazyFormat("Found path", start_point=point_0, end_point=point_1))
+
+        ->
+
+        DEBUG - Found path
+            start_point: Point(x=0.0, y=0.0)
+            end_point: Point(x=1.0, y=1.0)
+
+    To aid migration of existing log statements, the message can be a function so that f-strings can be lazily
+    evaluated as well. This style will be deprecated as log statements are updated.
+
+    Example:
+        logger.debug(f"Result is: {expensive_function()}")
+
+        ->
+
+        logger.debug(LazyFormat(lambda: f"Result is: {expensive_function()}")
+    """
+
+    def __init__(self, message: Union[str, Callable[[], str]], **kwargs) -> None:  # type: ignore[no-untyped-def]
+        """Initializer.
+
+        Args:
+            message: The message to log.
+            **kwargs: A dictionary of objects to format to text when `__str__` is called on this object.
+        """
+        self._message = message
+        self._kwargs = kwargs
+
+    @cached_property
+    def _str_value(self) -> str:
+        if callable(self._message):
+            message = self._message()
+        else:
+            message = self._message
+        return mf_pformat_many(message, self._kwargs)
+
+    @override
+    def __str__(self) -> str:
+        return self._str_value
diff --git a/metricflow-semantics/tests_metricflow_semantics/mf_logging/recorded_logging_context.py b/metricflow-semantics/tests_metricflow_semantics/mf_logging/recorded_logging_context.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+import logging
+from contextlib import contextmanager
+from typing import Iterator, List, Optional, Sequence, Tuple
+
+from typing_extensions import override
+
+
+class RecordingLogHandler(logging.Handler):
+    """A log-record handler that stores them so that they can be checked in tests."""
+
+    def __init__(self) -> None:  # noqa: D107
+        super().__init__()
+        self._log_records: List[logging.LogRecord] = []
+
+    @override
+    def emit(self, record: logging.LogRecord) -> None:
+        self._log_records.append(record)
+
+    @property
+    def log_records(self) -> Sequence[logging.LogRecord]:
+        """Return the log records seen by the handler."""
+        return self._log_records
+
+    def get_last_message(self) -> Optional[str]:
+        """Return the message in the last log record, or None if this hasn't seen any."""
+        if len(self._log_records) == 0:
+            return None
+
+        return self._log_records[-1].message
+
+
+@contextmanager
+def recorded_logging_context(
+    logger: logging.Logger, logging_level_int: int
+) -> Iterator[Tuple[logging.Logger, RecordingLogHandler]]:
+    """Context with a logger (with the given log level) and associated handler to check what was logged.
+
+    The handler records all log records emitted that is appropriate for the given level during this context.
+    Log propagation could be disabled in this context to clean test log output, but some issues need to be resolved.
+    """
+    previous_logging_level = logger.level
+    handler = RecordingLogHandler()
+    logger.addHandler(handler)
+    try:
+        logger.setLevel(logging_level_int)
+        yield logger, handler
+    finally:
+        logger.removeHandler(handler)
+        logger.setLevel(previous_logging_level)
diff --git a/metricflow-semantics/tests_metricflow_semantics/mf_logging/test_lazy_format.py b/metricflow-semantics/tests_metricflow_semantics/mf_logging/test_lazy_format.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+import logging
+from typing import override
+
+from metricflow_semantics.formatting.formatting_helpers import mf_dedent
+from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat
+from tests_metricflow_semantics.mf_logging.recorded_logging_context import RecordingLogHandler, recorded_logging_context
+
+logger = logging.getLogger(__name__)
+
+
+def test_log_kwargs() -> None:
+    """Test that objects included via keyword args are formatted."""
+    recorded_logger: logging.Logger
+    handler: RecordingLogHandler
+    with recorded_logging_context(logger, logging.DEBUG) as (recorded_logger, handler):
+        recorded_logger.debug(
+            LazyFormat("Found candidates.", matches=[1, 2, 3], parameters={"field_0": "value_0", "field_1": "value_1"})
+        )
+        assert handler.get_last_message() == mf_dedent(
+            """
+            Found candidates.
+              matches: [1, 2, 3]
+              parameters: {'field_0': 'value_0', 'field_1': 'value_1'}
+            """
+        )
+
+
+def test_lambda() -> None:
+    """Test that a lambda that is passed in is called."""
+    example_message = "Example message."
+
+    recorded_logger: logging.Logger
+    handler: RecordingLogHandler
+    with recorded_logging_context(logger, logging.DEBUG) as (recorded_logger, handler):
+        recorded_logger.debug(LazyFormat(lambda: example_message))
+        assert handler.get_last_message() == example_message
+
+
+def test_lazy_object() -> None:
+    """Test that formatting of objects are done lazily and not when the logging level is not appropriate."""
+    assertion_message = "This should not have been formatted as a string."
+
+    class _NotFormattedObject:
+        @override
+        def __repr__(self) -> str:
+            raise AssertionError(assertion_message)
+
+    # Logging level is INFO, so DEBUG messages shouldn't be logged / formatted.
+    recorded_logger: logging.Logger
+    handler: RecordingLogHandler
+    with recorded_logging_context(logger, logging.INFO) as (recorded_logger, handler):
+        recorded_logger.debug(LazyFormat("Found candidates", should_not_be_formatted=_NotFormattedObject()))
+
+
+def test_lazy_lambda() -> None:
+    """Test that a lambda input is not evaluated when the logging level is not appropriate."""
+    assertion_message = "This should not have been formatted as a string."
+
+    def _should_not_be_called() -> str:
+        raise AssertionError(assertion_message)
+
+    # Logging level is INFO, so DEBUG messages shouldn't be logged / formatted.
+    recorded_logger: logging.Logger
+    handler: RecordingLogHandler
+    with recorded_logging_context(logger, logging.INFO) as (recorded_logger, handler):
+        recorded_logger.debug(LazyFormat(lambda: f"{_should_not_be_called()}"))