Skip to content

Commit

Permalink
Improve logging of HTTP requests (#143)
Browse files Browse the repository at this point in the history
There were some issues in how the requests were logged through our
logger, namely:
- the URL in the log was the URL of the resource the request was made
to, but not the action on the resource (`RunClient.reboot()` would log
URL `https://api.apify.com/v2/actor-runs/RUN_ID`, without the `/reboot`
at the end)
- `ApifyClient` and `ApifyClientAsync` had different log messages and
details
- the request attempt and method were not logged

This fixes all of that.
  • Loading branch information
fnesveda authored Aug 25, 2023
1 parent 12e6b52 commit e902df6
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 26 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
Changelog
=========

[1.4.1](../../releases/tag/v1.4.1) - Unreleased
-----------------------------------------------

### Internal changes

- Improved logging of HTTP requests

[1.4.0](../../releases/tag/v1.4.0) - 2023-08-22
-----------------------------------------------

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "apify_client"
version = "1.4.0"
version = "1.4.1"
description = "Apify API client for Python"
readme = "README.md"
license = {text = "Apache Software License"}
Expand Down
22 changes: 15 additions & 7 deletions src/apify_client/_http_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from apify_shared.utils import ignore_docs, is_content_type_json, is_content_type_text, is_content_type_xml

from ._errors import ApifyApiError, InvalidResponseBodyError, _is_retryable_error
from ._logging import logger_name
from ._logging import log_context, logger_name
from ._utils import _retry_with_exp_backoff, _retry_with_exp_backoff_async

DEFAULT_BACKOFF_EXPONENTIAL_FACTOR = 2
Expand Down Expand Up @@ -133,6 +133,9 @@ def call(
stream: Optional[bool] = None,
parse_response: Optional[bool] = True,
) -> httpx.Response:
log_context.method.set(method)
log_context.url.set(url)

if stream and parse_response:
raise ValueError('Cannot stream response and parse it at the same time!')

Expand All @@ -141,7 +144,8 @@ def call(
httpx_client = self.httpx_client

def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response:
logger.debug(f'Sending request to {url}', extra={'attempt': attempt})
log_context.attempt.set(attempt)
logger.debug('Sending request')
try:
request = httpx_client.build_request(
method=method,
Expand All @@ -157,7 +161,7 @@ def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response:

# If response status is < 300, the request was successful, and we can return the result
if response.status_code < 300:
logger.debug(f'Request to {url} successful', extra={'attempt': attempt, 'status_code': response.status_code})
logger.debug('Request successful', extra={'status_code': response.status_code})
if not stream:
if parse_response:
_maybe_parsed_body = self._maybe_parse_response(response)
Expand All @@ -168,17 +172,17 @@ def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response:
return response

except Exception as e:
logger.debug(f'Request to {url} threw exception', exc_info=e, extra={'attempt': attempt})
logger.debug('Request threw exception', exc_info=e)
if not _is_retryable_error(e):
logger.debug('Exception is not retryable', exc_info=e, extra={'attempt': attempt})
logger.debug('Exception is not retryable', exc_info=e)
stop_retrying()
raise e

# We want to retry only requests which are server errors (status >= 500) and could resolve on their own,
# and also retry rate limited requests that throw 429 Too Many Requests errors
logger.debug(f'Request to {url} unsuccessful', extra={'attempt': attempt, 'status_code': response.status_code})
logger.debug('Request unsuccessful', extra={'status_code': response.status_code})
if response.status_code < 500 and response.status_code != HTTPStatus.TOO_MANY_REQUESTS:
logger.debug('Status code is not retryable', extra={'attempt': attempt, 'status_code': response.status_code})
logger.debug('Status code is not retryable', extra={'status_code': response.status_code})
stop_retrying()
raise ApifyApiError(response, attempt)

Expand All @@ -204,6 +208,9 @@ async def call(
stream: Optional[bool] = None,
parse_response: Optional[bool] = True,
) -> httpx.Response:
log_context.method.set(method)
log_context.url.set(url)

if stream and parse_response:
raise ValueError('Cannot stream response and parse it at the same time!')

Expand All @@ -212,6 +219,7 @@ async def call(
httpx_async_client = self.httpx_async_client

async def _make_request(stop_retrying: Callable, attempt: int) -> httpx.Response:
log_context.attempt.set(attempt)
logger.debug('Sending request')
try:
request = httpx_async_client.build_request(
Expand Down
46 changes: 28 additions & 18 deletions src/apify_client/_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import json
import logging
from contextvars import ContextVar
from typing import TYPE_CHECKING, Any, Callable, Dict, Optional, Tuple, Type, cast
from typing import TYPE_CHECKING, Any, Callable, Dict, NamedTuple, Optional, Tuple, Type, cast

# Conditional import only executed when type checking, otherwise we'd get circular dependency issues
if TYPE_CHECKING:
Expand All @@ -15,12 +15,22 @@
# Logger used throughout the library
logger = logging.getLogger(logger_name)

# Context containing the details of the request and the resource client making the request
LogContext = NamedTuple('LogContext', [
('attempt', ContextVar[Optional[int]]),
('client_method', ContextVar[Optional[str]]),
('method', ContextVar[Optional[str]]),
('resource_id', ContextVar[Optional[str]]),
('url', ContextVar[Optional[str]]),
])

# Context variables containing the current resource client running in that context
# Used during logging to inject the resource client context to the log
ctx_client_method = ContextVar[Optional[str]]('client_method', default=None)
ctx_resource_id = ContextVar[Optional[str]]('resource_id', default=None)
ctx_url = ContextVar[Optional[str]]('url', default=None)
log_context = LogContext(
attempt=ContextVar('attempt', default=None),
client_method=ContextVar('client_method', default=None),
method=ContextVar('method', default=None),
resource_id=ContextVar('resource_id', default=None),
url=ContextVar('url', default=None),
)


# Metaclass for resource clients which wraps all their public methods
Expand All @@ -41,28 +51,25 @@ def _injects_client_details_to_log_context(fun: Callable) -> Callable:
if inspect.iscoroutinefunction(fun):
@functools.wraps(fun)
async def async_wrapper(resource_client: '_BaseBaseClient', *args: Any, **kwargs: Any) -> Any:
ctx_client_method.set(fun.__qualname__)
ctx_resource_id.set(resource_client.resource_id)
ctx_url.set(resource_client.url)
log_context.client_method.set(fun.__qualname__)
log_context.resource_id.set(resource_client.resource_id)

return await fun(resource_client, *args, **kwargs)
return async_wrapper
elif inspect.isasyncgenfunction(fun):
@functools.wraps(fun)
async def async_generator_wrapper(resource_client: '_BaseBaseClient', *args: Any, **kwargs: Any) -> Any:
ctx_client_method.set(fun.__qualname__)
ctx_resource_id.set(resource_client.resource_id)
ctx_url.set(resource_client.url)
log_context.client_method.set(fun.__qualname__)
log_context.resource_id.set(resource_client.resource_id)

async for item in fun(resource_client, *args, **kwargs):
yield item
return async_generator_wrapper
else:
@functools.wraps(fun)
def wrapper(resource_client: '_BaseBaseClient', *args: Any, **kwargs: Any) -> Any:
ctx_client_method.set(fun.__qualname__)
ctx_resource_id.set(resource_client.resource_id)
ctx_url.set(resource_client.url)
log_context.client_method.set(fun.__qualname__)
log_context.resource_id.set(resource_client.resource_id)

return fun(resource_client, *args, **kwargs)
return wrapper
Expand All @@ -72,9 +79,11 @@ def wrapper(resource_client: '_BaseBaseClient', *args: Any, **kwargs: Any) -> An
# but adds the current logging context to the record
class _ContextInjectingFilter(logging.Filter):
def filter(self, record: logging.LogRecord) -> bool:
record.client_method = ctx_client_method.get()
record.resource_id = ctx_resource_id.get()
record.url = ctx_url.get()
record.client_method = log_context.client_method.get()
record.resource_id = log_context.resource_id.get()
record.method = log_context.method.get()
record.url = log_context.url.get()
record.attempt = log_context.attempt.get()
return True


Expand All @@ -86,6 +95,7 @@ def filter(self, record: logging.LogRecord) -> bool:
class _DebugLogFormatter(logging.Formatter):
empty_record = logging.LogRecord('dummy', 0, 'dummy', 0, 'dummy', None, None)

# Gets the extra fields from the log record which are not present on an empty record
def _get_extra_fields(self, record: logging.LogRecord) -> Dict[str, Any]:
extra_fields: Dict[str, Any] = {}
for key, value in record.__dict__.items():
Expand Down

0 comments on commit e902df6

Please sign in to comment.