Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(cache): replace HDF cache with Pickle #904

Merged
merged 2 commits into from
Nov 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# Changelog

### Changed

- Drop support for HDF cache, and use a Pickle cache instead. CacheEnum.HDF is now an alias to CacheEnum.pickle
and will be dropped in v0.15.0

## [0.13.0] - 2024-07-17

### Changed
Expand Down
22 changes: 0 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,25 +88,3 @@ If you just want to download a file, without converting it to a pandas dataframe
...
Image size: 60284 bytes
```

## Installation on macOS M1 chipset

## install everything
```console
brew install hdf5 snappy
HDF5_DIR="/opt/homebrew/Cellar/hdf5/1.12.1/" CPPFLAGS="-I/opt/homebrew/Cellar/snappy/1.1.9/include -L/opt/homebrew/Cellar/snappy/1.1.9/lib" poetry install
```

For more details, here is what is needed:

### install pytables
```console
brew install hdf5
HDF5_DIR="/opt/homebrew/Cellar/hdf5/1.12.1/" poetry run pip install tables
```

### install python-snappy
```console
brew install snappy
CPPFLAGS="-I/opt/homebrew/Cellar/snappy/1.1.9/include -L/opt/homebrew/Cellar/snappy/1.1.9/lib" poetry run pip install python-snappy
```
22 changes: 0 additions & 22 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,25 +86,3 @@ If you just want to download a file, without converting it to a pandas dataframe
...
Image size: 60284 bytes
```

## Installation on macOS M1 chipset

## install everything
```console
brew install hdf5 snappy
HDF5_DIR="/opt/homebrew/Cellar/hdf5/1.12.1/" CPPFLAGS="-I/opt/homebrew/Cellar/snappy/1.1.9/include -L/opt/homebrew/Cellar/snappy/1.1.9/lib" poetry install
```

For more details, here is what is needed:

### install pytables
```console
brew install hdf5
HDF5_DIR="/opt/homebrew/Cellar/hdf5/1.12.1/" poetry run pip install tables
```

### install python-snappy
```console
brew install snappy
CPPFLAGS="-I/opt/homebrew/Cellar/snappy/1.1.9/include -L/opt/homebrew/Cellar/snappy/1.1.9/lib" poetry run pip install python-snappy
```
42 changes: 24 additions & 18 deletions peakina/cache.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import warnings
from abc import ABCMeta, abstractmethod
from collections.abc import Callable
from contextlib import suppress
from datetime import timedelta
from enum import Enum
from functools import lru_cache, wraps
from pathlib import Path
from time import monotonic_ns, time
from typing import Any, Callable, TypedDict
from typing import Any, TypedDict

import pandas as pd

Expand All @@ -18,17 +20,25 @@ class InMemoryCached(TypedDict):

class CacheEnum(str, Enum):
MEMORY = "memory"
# FIXME: to be removed in v0.15.0
HDF = "hdf"
PICKLE = "pickle"


class Cache(metaclass=ABCMeta):
@staticmethod
def get_cache(kind: CacheEnum, *args: Any, **kwargs: Any) -> "Cache":
ALL_CACHES = {
CacheEnum.MEMORY: InMemoryCache,
CacheEnum.HDF: HDFCache,
}
return ALL_CACHES[kind](*args, **kwargs) # type: ignore[no-any-return]
if kind == CacheEnum.HDF:
warnings.warn(
"HDF Cache has been removed in v0.14.0, PickleCache will be used instead. "
"This will be an error in v0.15.0, please use CacheEnum.PICKLE instead",
DeprecationWarning,
)
kind = CacheEnum.PICKLE
if kind == CacheEnum.PICKLE:
return PickleCache(*args, **kwargs)
else:
return InMemoryCache(*args, **kwargs)

@staticmethod
def should_invalidate(
Expand Down Expand Up @@ -88,11 +98,13 @@ def delete(self, key: str) -> None:
del self._cache[key]


class HDFCache(Cache):
META_DF_KEY = "__meta__"
META_DF_KEY = "__meta__"


class PickleCache(Cache):
def __init__(self, cache_dir: str | Path) -> None:
self.cache_dir = Path(cache_dir).resolve()
self._meta_df_key = self.cache_dir / META_DF_KEY

def get_metadata(self) -> pd.DataFrame:
"""
Expand All @@ -101,20 +113,14 @@ def get_metadata(self) -> pd.DataFrame:
If metadata file is not found or is corrupted, an empty one is recreated.
"""
try:
# We manually instantiate the HDFStore to be able to close it no matter what
# See https://github.com/pandas-dev/pandas/pull/28429 for more infos
store = pd.HDFStore(self.cache_dir / self.META_DF_KEY, mode="r")
try:
metadata = pd.read_hdf(store)
finally:
store.close()
metadata = pd.read_pickle(self._meta_df_key)
except Exception: # catch all, on purpose
metadata = pd.DataFrame(columns=["key", "mtime", "created_at"])
self.set_metadata(metadata)
return metadata

def set_metadata(self, df: pd.DataFrame) -> None:
df.to_hdf(self.cache_dir / self.META_DF_KEY, self.META_DF_KEY, mode="w")
df.to_pickle(self._meta_df_key)

def get(
self, key: str, mtime: float | None = None, expire: timedelta | None = None
Expand All @@ -135,7 +141,7 @@ def get(
self.delete(key)

try:
return pd.read_hdf(self.cache_dir / key)
return pd.read_pickle(self.cache_dir / key)
except FileNotFoundError:
raise KeyError(key)

Expand All @@ -148,7 +154,7 @@ def set(self, key: str, value: pd.DataFrame, mtime: float | None = None) -> None
metadata = metadata[metadata.key != key] # drop duplicates
metadata = pd.concat([metadata, pd.Series(infos).to_frame().T], ignore_index=True)
self.set_metadata(metadata)
value.to_hdf(self.cache_dir / key, key, mode="w")
value.to_pickle(self.cache_dir / key)
except OSError:
self.delete(key)
raise
Expand Down
Loading
Loading