Skip to content

Commit

Permalink
Add LazyFormat to reduce logging overhead on performance (#1419)
Browse files Browse the repository at this point in the history
For larger semantic manifests / queries, the overhead of formatting objects
for logging / writing log lines can be significant. This adds a
`LazyFormat` class can be used with the existing logger so that log
messages at the debug level don't get formatted nor written when the
configured log level would exclude it. Using this, a quick way to
migrate the code base would be a replacement like the following:

```
    Example:
        logger.debug(f"Result is: {expensive_function()}")
        ->
        logger.debug(LazyFormat(lambda: f"Result is: {expensive_function()}")
```
  • Loading branch information
plypaul authored Sep 24, 2024
1 parent 9bee96f commit 9674fe7
Show file tree
Hide file tree
Showing 4 changed files with 205 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from __future__ import annotations

import textwrap


def mf_dedent(text: str) -> str:
"""Remove leading newlines, dedents, and remove tailing newlines.
This function simplifies the somewhat-frequently used:
text = textwrap.dedent(
[triple quote][backslash]
Line 0
Line 1
[triple quote]
).rstrip()
to:
text = mf_dedent(
[triple quote]
Line 0
Line 1
[triple quote]
)
"""
return textwrap.dedent(text.lstrip("\n")).rstrip("\n")
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from __future__ import annotations

from functools import cached_property
from typing import Callable, Union

from typing_extensions import override

from metricflow_semantics.mf_logging.pretty_print import mf_pformat_many


class LazyFormat:
"""Lazily formats the given objects into a string representation for logging.
The formatting is done when this object is converted to a string to be compatible with the standard `logging`
library. This is done lazily as the formatting operation can be expensive and allows debug log statements to not
incur a performance overhead in production.
Example:
logger.debug(LazyFormat("Found path", start_point=point_0, end_point=point_1))
->
DEBUG - Found path
start_point: Point(x=0.0, y=0.0)
end_point: Point(x=1.0, y=1.0)
To aid migration of existing log statements, the message can be a function so that f-strings can be lazily
evaluated as well. This style will be deprecated as log statements are updated.
Example:
logger.debug(f"Result is: {expensive_function()}")
->
logger.debug(LazyFormat(lambda: f"Result is: {expensive_function()}")
"""

def __init__(self, message: Union[str, Callable[[], str]], **kwargs) -> None: # type: ignore[no-untyped-def]
"""Initializer.
Args:
message: The message or a function that returns a message.
**kwargs: A dictionary of objects to format to text when `__str__` is called on this object.
"""
self._message = message
self._kwargs = kwargs

@cached_property
def _str_value(self) -> str:
"""Cache the result as `__str__` can be called multiple times for multiple log handlers."""
if callable(self._message):
message = self._message()
else:
message = self._message
return mf_pformat_many(message, self._kwargs, preserve_raw_strings=True)

@override
def __str__(self) -> str:
return self._str_value
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from __future__ import annotations

import logging
from contextlib import contextmanager
from typing import Iterator, List, Optional, Sequence, Tuple

from typing_extensions import override


class RecordingLogHandler(logging.Handler):
"""A log-record handler that stores them so that they can be checked in tests."""

def __init__(self) -> None: # noqa: D107
super().__init__()
self._log_records: List[logging.LogRecord] = []

@override
def emit(self, record: logging.LogRecord) -> None:
self._log_records.append(record)

@property
def log_records(self) -> Sequence[logging.LogRecord]:
"""Return the log records seen by the handler."""
return self._log_records

def get_last_message(self) -> Optional[str]:
"""Return the message in the last log record, or None if this hasn't seen any."""
if len(self._log_records) == 0:
return None

return self._log_records[-1].message


@contextmanager
def recorded_logging_context(
logger: logging.Logger, logging_level_int: int
) -> Iterator[Tuple[logging.Logger, RecordingLogHandler]]:
"""Context with a logger (with the given log level) and associated handler to check what was logged.
The handler records all log records emitted that is appropriate for the given level during this context.
Log propagation could be disabled in this context to clean test log output, but some issues need to be resolved.
"""
previous_logging_level = logger.level
handler = RecordingLogHandler()
logger.addHandler(handler)
try:
logger.setLevel(logging_level_int)
yield logger, handler
finally:
logger.removeHandler(handler)
logger.setLevel(previous_logging_level)
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from __future__ import annotations

import logging

from metricflow_semantics.formatting.formatting_helpers import mf_dedent
from metricflow_semantics.mf_logging.lazy_formattable import LazyFormat
from tests_metricflow_semantics.mf_logging.recorded_logging_context import RecordingLogHandler, recorded_logging_context
from typing_extensions import override

logger = logging.getLogger(__name__)


def test_log_kwargs() -> None:
"""Test that objects included via keyword args are formatted."""
recorded_logger: logging.Logger
handler: RecordingLogHandler
with recorded_logging_context(logger, logging.DEBUG) as (recorded_logger, handler):
recorded_logger.debug(
LazyFormat("Found candidates.", matches=[1, 2, 3], parameters={"field_0": "value_0", "field_1": "value_1"})
)
assert handler.get_last_message() == mf_dedent(
"""
Found candidates.
matches: [1, 2, 3]
parameters: {'field_0': 'value_0', 'field_1': 'value_1'}
"""
)


def test_lambda() -> None:
"""Test that a lambda that is passed in is called."""
example_message = "Example message."

recorded_logger: logging.Logger
handler: RecordingLogHandler
with recorded_logging_context(logger, logging.DEBUG) as (recorded_logger, handler):
recorded_logger.debug(LazyFormat(lambda: example_message))
assert handler.get_last_message() == example_message


def test_lazy_object() -> None:
"""Test that formatting of objects are done lazily and not when the logging level is not appropriate."""
assertion_message = "This should not have been formatted as a string."

class _NotFormattedObject:
@override
def __repr__(self) -> str:
raise AssertionError(assertion_message)

# Logging level is INFO, so DEBUG messages shouldn't be logged / formatted.
recorded_logger: logging.Logger
handler: RecordingLogHandler
with recorded_logging_context(logger, logging.INFO) as (recorded_logger, handler):
recorded_logger.debug(LazyFormat("Found candidates", should_not_be_formatted=_NotFormattedObject()))


def test_lazy_lambda() -> None:
"""Test that a lambda input is not evaluated when the logging level is not appropriate."""
assertion_message = "This should not have been formatted as a string."

def _should_not_be_called() -> str:
raise AssertionError(assertion_message)

# Logging level is INFO, so DEBUG messages shouldn't be logged / formatted.
recorded_logger: logging.Logger
handler: RecordingLogHandler
with recorded_logging_context(logger, logging.INFO) as (recorded_logger, handler):
recorded_logger.debug(LazyFormat(lambda: f"{_should_not_be_called()}"))

0 comments on commit 9674fe7

Please sign in to comment.