Skip to content

Commit

Permalink
Cache certain GitHub requests (#381)
Browse files Browse the repository at this point in the history
* Cache certain GitHub requests

To avoid GH rate limit issues with big big clients we can cache some of our more used requests
that are unlikely to change in a short period of time.

This work was done initially on PR #379 by @trent-codecov.
The present changes are an attempt to integrate the changes better into our existing code.
In oarticular we wanted to use `OurOwnCache`, but share it for torngit instances.

I thought it was better to have a shared `TorngitCache` among all the torngit instances from a process.
This is not a problem, as the configuration of the cache doesn't change within a process, and so
`TorngitCache` was born. It is configured (for real) by the first instance, but every instance calls
the function (that then does nothing).

The more troublesome portion was that the original PR had logging for cache hits.
`OurOwnCache` is crystal clear fully transparent, and it's very versatile as well,
so it was a bit tricky to get info from it while making sure we don't log dangerous things.

The solution I landed on is to let each cached function specfity if the cache should log hits, and in this case
what arguments from the function should be called. This is as good a logging as we can get, I believe.

Notice from the tests `shared/tests/unit/helpers/test_cache.py::TestCache::test_simple_caching_fake_backend_with_params`
that there is a difference if you use args or kwargs. And the cache understands this difference as 2 separate func calls
(so it becomes 2 different keys). There's a difference in logging as well.

In any case this work was a bit rushed, and that was the best balance I could get to.
If you think this is a bit overkill, well I thought so to, but it's not in a bad shape.
And it's very easy to use.

No Tickets associated

* Make `init_torngit_cache` a method of the singleton.

Moving logic from `init_torngit_cache` to the `TorngitCache` so it's a method of the singleton.
The new method is `initialize`.
  • Loading branch information
giovanni-guidini authored Jun 7, 2023
1 parent 20f2646 commit 3565648
Show file tree
Hide file tree
Showing 9 changed files with 283 additions and 11 deletions.
2 changes: 2 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,7 @@
"httpx>=0.23.0",
"oauthlib",
"redis",
"typing",
"typing_extensions",
],
)
52 changes: 45 additions & 7 deletions shared/helpers/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import logging
import pickle
from functools import wraps
from typing import Any, Callable, Hashable
from typing import Any, Callable, Hashable, List

from redis import Redis, RedisError

Expand Down Expand Up @@ -115,6 +115,23 @@ def set(self, key: str, ttl: int, value: Any):
log.warning("Unable to set cache on redis", exc_info=True)


class LogMapping(dict):
"""This let cached functions to specify what arguments they want to log.
In some cases we want to log cache hits for debugging purposes,
but some of the arguments might be dangerous to log (e.g. tokens)
"""

@property
def args_indexes_to_log(self) -> List[int]:
"""List of args from the function to be logged (if present)"""
return self.get("args_indexes_to_log", [])

@property
def kwargs_keys_to_log(self) -> List[Any]:
"""List of args from the function to be logged (if present)"""
return self.get("kwargs_keys_to_log", [])


class OurOwnCache(object):
"""
This is codecov distributed cache's implementation.
Expand Down Expand Up @@ -162,7 +179,9 @@ def configure(self, backend: BaseBackend, app: str = "shared"):
def get_backend(self) -> BaseBackend:
return self._backend

def cache_function(self, ttl: int = DEFAULT_TTL) -> "FunctionCacher":
def cache_function(
self, ttl: int = DEFAULT_TTL, log_hits: bool = False, log_map: LogMapping = None
) -> "FunctionCacher":
"""Creates a FunctionCacher with all the needed configuration to cache a function
Args:
Expand All @@ -171,26 +190,48 @@ def cache_function(self, ttl: int = DEFAULT_TTL) -> "FunctionCacher":
Returns:
FunctionCacher: A FunctionCacher that can decorate any callable
"""
return FunctionCacher(self, ttl)
return FunctionCacher(
self, ttl, log_hits, LogMapping(log_map if log_map is not None else {})
)


class FunctionCacher(object):
def __init__(self, cache_instance: OurOwnCache, ttl: int):
def __init__(
self, cache_instance: OurOwnCache, ttl: int, log_hits: bool, log_map: LogMapping
):
self.cache_instance = cache_instance
self.ttl = ttl
self.log_hits = log_hits
self.log_map = log_map

def __call__(self, func) -> Callable:
if asyncio.iscoroutinefunction(func):
return self.cache_async_function(func)
return self.cache_synchronous_function(func)

def _log_hits(self, func, args, kwargs, key) -> None:
args_to_log = []
for idx in self.log_map.args_indexes_to_log:
if idx < len(args):
args_to_log.append(args[idx])
kwargs_to_log = {}
for lkey in self.log_map.kwargs_keys_to_log:
if lkey in kwargs:
kwargs_to_log[lkey] = kwargs[lkey]
log.info(
"Returning cache hit",
extra=dict(func=func, args=args_to_log, kwargs=kwargs_to_log, key=key),
)

def cache_synchronous_function(self, func: Callable) -> Callable:
@wraps(func)
def wrapped(*args, **kwargs):
key = self.generate_key(func, args, kwargs)
value = self.cache_instance.get_backend().get(key)
if value is not NO_VALUE:
metrics.incr(f"{self.cache_instance._app}.caches.{func.__name__}.hits")
if self.log_hits:
self._log_hits(func, args, kwargs, key)
return value
metrics.incr(f"{self.cache_instance._app}.caches.{func.__name__}.misses")
with metrics.timer(
Expand Down Expand Up @@ -225,6 +266,3 @@ async def wrapped(*args, **kwargs):
return result

return wrapped


cache = OurOwnCache()
5 changes: 4 additions & 1 deletion shared/torngit/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import httpx

from shared.torngit.cache import torngit_cache
from shared.torngit.enums import Endpoints
from shared.typings.oauth_token_types import OauthConsumerToken, OnRefreshCallback

Expand Down Expand Up @@ -90,8 +91,10 @@ def __init__(
self._oauth = oauth_consumer_token
self.data = {"owner": {}, "repo": {}}
self.verify_ssl = verify_ssl

self.data.update(kwargs)
# This has the side effect of initializing the torngit_cache
# (if not yet initialized)
torngit_cache.initialize()

def __repr__(self):
return "<%s slug=%s ownerid=%s repoid=%s>" % (
Expand Down
67 changes: 67 additions & 0 deletions shared/torngit/cache/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from typing import Union

from redis import Redis
from typing_extensions import Literal

from shared.config import get_config
from shared.helpers.cache import OurOwnCache, RedisBackend


def get_redis_url() -> str:
url = get_config("services", "redis_url")
if url is not None:
return url
hostname = "redis"
port = 6379
return f"redis://{hostname}:{port}"


CachedEndpoint = Union[Literal["check"], Literal["compare"], Literal["status"]]


class TorngitCache(OurOwnCache):
def __init__(self):
super().__init__()
self.ttls = {}
self._initialized = False
self._enabled = False

def initialize(self) -> None:
"""Initializes and configures the TorngitCache according to config."""
if self.is_initialized:
return
use_cache = get_config("services", "vcs_cache", "enabled", default=False)
if use_cache:
redis = Redis.from_url(get_redis_url())
backend = RedisBackend(redis_connection=redis)
app = get_config("services", "vcs_cache", "metrics_app", default=None)
self.configure(backend=backend, app=app)
self._enabled = True
ttls = {
"check": get_config("services", "vcs_cache", "check_duration", default=120),
"compare": get_config(
"services", "vcs_cache", "compare_duration", default=120
),
"status": get_config(
"services", "vcs_cache", "status_duration", default=120
),
}
self.ttls = ttls
self._initialized = True

@property
def is_initialized(self) -> bool:
return self._initialized

@property
def is_enabled(self) -> bool:
return self._enabled

def get_ttl(self, endpoint: CachedEndpoint) -> dict:
return self.ttls.get(endpoint, 120)


# This instance is shared among all the Torngit instances created on the same process.
# It doesn't matter because the cache is distributed and configuration can't change
# between these instances, so it's ok to share the same cache.
torngit_cache = TorngitCache()
22 changes: 20 additions & 2 deletions shared/torngit/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from shared.metrics import metrics
from shared.torngit.base import TokenType, TorngitBaseAdapter
from shared.torngit.cache import torngit_cache
from shared.torngit.enums import Endpoints
from shared.torngit.exceptions import (
TorngitClientError,
Expand All @@ -23,7 +24,7 @@
TorngitUnauthorizedError,
)
from shared.torngit.status import Status
from shared.utils.urls import url_concat, url_escape
from shared.utils.urls import url_concat

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -663,6 +664,11 @@ async def set_commit_status(
)
return res

@torngit_cache.cache_function(
torngit_cache.get_ttl("status"),
log_hits=True,
log_map={"args_indexes_to_log": [0], "kwargs_keys_to_log": ["commit"]},
)
async def get_commit_statuses(self, commit, token=None):
token = self.get_token_by_type_if_none(token, TokenType.status)
page = 0
Expand Down Expand Up @@ -694,7 +700,6 @@ async def get_commit_statuses(self, commit, token=None):
)
if len(provided_statuses) < 100:
break

return Status(statuses)

# Source
Expand Down Expand Up @@ -743,6 +748,11 @@ async def get_commit_diff(self, commit, context=None, token=None):
raise
return self.diff_to_json(res)

@torngit_cache.cache_function(
torngit_cache.get_ttl("compare"),
log_hits=True,
log_map={"args_indexes_to_log": [0, 1], "kwargs_keys_to_log": ["base", "head"]},
)
async def get_compare(
self, base, head, context=None, with_commits=True, token=None
):
Expand Down Expand Up @@ -1088,6 +1098,14 @@ async def create_check_run(
)
return res["id"]

@torngit_cache.cache_function(
torngit_cache.get_ttl("check"),
log_hits=True,
log_map={
"args_indexes_to_log": [0, 1, 2],
"kwargs_keys_to_log": ["check_suite_id", "head_sha", "name"],
},
)
async def get_check_runs(
self, check_suite_id=None, head_sha=None, name=None, token=None
):
Expand Down
10 changes: 10 additions & 0 deletions shared/validation/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,16 @@ def check_task_config_key(field, value, error):
},
},
},
"vsc_cache": {
"type": "dict",
"schema": {
"enabled": {"type": "boolean"},
"metrics_app": {"type": "string"},
"check_duration": {"type": "integer"},
"compare_duration": {"type": "integer"},
"status_duration": {"type": "integer"},
},
},
},
},
"site": {"type": "dict", "schema": user_yaml_schema},
Expand Down
55 changes: 54 additions & 1 deletion tests/unit/helpers/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
from shared.helpers.cache import (
NO_VALUE,
BaseBackend,
LogMapping,
OurOwnCache,
RedisBackend,
make_hash_sha256,
)
from shared.helpers.cache import log as cache_log
from shared.helpers.cache import make_hash_sha256


class RandomCounter(object):
Expand All @@ -20,6 +22,9 @@ def call_function(self):
self.value += 1
return self.value

def call_function_args(self, base, head, something=None, danger=True):
return base + head

async def async_call_function(self):
self.value += 2
self.value *= 4
Expand Down Expand Up @@ -123,6 +128,54 @@ def test_simple_caching_fake_backend_no_params(self, mocker):
assert cached_function() == 1
assert cached_function() == 1

def test_simple_caching_fake_backend_with_params(self, mocker):
mock_log = mocker.patch.object(cache_log, "info")
log_map = LogMapping(
{
"args_indexes_to_log": [0, 1],
"kwargs_keys_to_log": ["base", "head", "something"],
}
)
cache = OurOwnCache()
cache.configure(FakeBackend())
assert cache._app == "shared"
sample_function = RandomCounter().call_function_args
cached_function = cache.cache_function(log_hits=True, log_map=log_map)(
sample_function
)
assert cached_function("base", "head", something="batata") == "basehead"
mock_log.assert_not_called() # not a hit, not logger
assert cached_function("base", "head", something="else") == "basehead"
mock_log.assert_not_called() # not a hit, not logger
assert cached_function("base", "head", something="else") == "basehead"
assert mock_log.call_count == 1
args, kwargs = mock_log.call_args
assert args == ("Returning cache hit",)
assert "extra" in kwargs
extra_logged = kwargs["extra"]
extra_args = extra_logged["args"]
extra_kwargs = extra_logged["kwargs"]
assert extra_args == ["base", "head"]
assert extra_kwargs == {
"something": "else"
} # Notice that danger is not part of it
# Changing the way we call the function
assert cached_function("base", head="head", something="else") == "basehead"
assert mock_log.call_count == 1 # This is not cached because the args changed
assert cached_function("base", head="head", something="else") == "basehead"
assert mock_log.call_count == 2
args, kwargs = mock_log.call_args
assert args == ("Returning cache hit",)
assert "extra" in kwargs
extra_logged = kwargs["extra"]
extra_args = extra_logged["args"]
extra_kwargs = extra_logged["kwargs"]
assert extra_args == ["base"]
assert extra_kwargs == {
"head": "head",
"something": "else",
} # Notice that danger is not part of it

@pytest.mark.asyncio
async def test_simple_caching_fake_backend_async_no_params(self, mocker):
cache = OurOwnCache()
Expand Down
Loading

0 comments on commit 3565648

Please sign in to comment.