From 50c8fb6bdef4230cc912316eef91110a0eb25f48 Mon Sep 17 00:00:00 2001 From: Mauko Quiroga Date: Sat, 8 Feb 2020 18:28:52 +0100 Subject: [PATCH] Add test to get_memory_usage --- openfisca_core/data_storage.py | 82 +++++++++++++------ .../data_storage/test_in_memory_storage.py | 13 +++ .../core/data_storage/test_on_disk_storage.py | 22 +++++ 3 files changed, 91 insertions(+), 26 deletions(-) diff --git a/openfisca_core/data_storage.py b/openfisca_core/data_storage.py index d82e320398..12997bca4b 100644 --- a/openfisca_core/data_storage.py +++ b/openfisca_core/data_storage.py @@ -31,25 +31,13 @@ def delete( ) -> Optional[numpy.ndarray]: ... - def get_known_periods(self) -> List[periods.Period]: - return list(self._arrays.keys()) - - def get_memory_usage(self): - if not self._arrays: - return { - "nb_arrays": 0, - "total_nb_bytes": 0, - "cell_size": numpy.nan, - } - - nb_arrays = len(self._arrays) - array = next(iter(self._arrays.values())) + @abc.abstractmethod + def get_memory_usage(self) -> dict: + ... - return { - "nb_arrays": nb_arrays, - "total_nb_bytes": array.nbytes * nb_arrays, - "cell_size": array.itemsize, - } + @abc.abstractmethod + def get_known_periods(self) -> List[periods.Period]: + ... class InMemoryStorage(StorageLike): @@ -99,13 +87,33 @@ def delete( return self._arrays.pop(period) + def get_known_periods(self) -> List[periods.Period]: + return list(self._arrays.keys()) + + def get_memory_usage(self) -> dict: + if not self._arrays: + return { + "nb_arrays": 0, + "total_nb_bytes": 0, + "cell_size": numpy.nan, + } + + nb_arrays = len(self._arrays) + array = next(iter(self._arrays.values())) + + return { + "nb_arrays": nb_arrays, + "total_nb_bytes": array.nbytes * nb_arrays, + "cell_size": array.itemsize, + } + class OnDiskStorage(StorageLike): """ Low-level class responsible for storing and retrieving calculated vectors on disk. """ - _arrays: dict + _files: dict _enums: dict is_eternal: bool preserve_storage_dir: bool @@ -117,7 +125,7 @@ def __init__( is_eternal: bool = False, preserve_storage_dir: bool = False, ) -> None: - self._arrays = {} + self._files = {} self._enums = {} self.is_eternal = is_eternal self.preserve_storage_dir = preserve_storage_dir @@ -135,8 +143,8 @@ def get(self, period: periods.Period) -> Optional[numpy.ndarray]: period = periods.period(periods.ETERNITY) try: - values = self._arrays[period] - return self._decode_file(values) + file = self._files[period] + return self._decode_file(file) except KeyError: return None @@ -155,14 +163,14 @@ def put(self, value: numpy.ndarray, period: periods.Period) -> None: value = value.view(numpy.ndarray) numpy.save(path, value) - self._arrays[period] = path + self._files[period] = path def delete( self, period: Optional[periods.Period] = None, ) -> Optional[numpy.ndarray]: if period is None: - self._arrays = {} + self._files = {} return None if self.is_eternal: @@ -170,10 +178,10 @@ def delete( period = periods.period(period) - return self._arrays.pop(period) + return self._files.pop(period) def restore(self): - self._arrays = files = {} + self._files = files = {} # Restore self._arrays from content of storage_dir. for filename in os.listdir(self.storage_dir): if not filename.endswith('.npy'): @@ -183,6 +191,28 @@ def restore(self): period = periods.period(filename_core) files[period] = path + def get_known_periods(self) -> List[periods.Period]: + return list(self._files.keys()) + + def get_memory_usage(self) -> dict: + if not self._files: + return { + "nb_files": 0, + "total_nb_bytes": 0, + "cell_size": numpy.nan, + } + + nb_files = len(self._files) + file = next(iter(self._files.values())) + size = os.path.getsize(file) + array = self._decode_file(file) + + return { + "nb_files": nb_files, + "total_nb_bytes": size * nb_files, + "cell_size": array.itemsize, + } + def __del__(self): if self.preserve_storage_dir: return diff --git a/tests/core/data_storage/test_in_memory_storage.py b/tests/core/data_storage/test_in_memory_storage.py index 239a9ebbae..3015a6b418 100644 --- a/tests/core/data_storage/test_in_memory_storage.py +++ b/tests/core/data_storage/test_in_memory_storage.py @@ -98,3 +98,16 @@ def test_get_known_periods(storage, value, period): result = storage.get_known_periods() assert result == [period] + + +def test_get_memory_usage(storage, value, period): + storage = storage() + storage.put(value, period) + + result = storage.get_memory_usage() + + assert result == { + "nb_arrays": 1, + "total_nb_bytes": 8, + "cell_size": 8, + } diff --git a/tests/core/data_storage/test_on_disk_storage.py b/tests/core/data_storage/test_on_disk_storage.py index b852043c4e..125110f872 100644 --- a/tests/core/data_storage/test_on_disk_storage.py +++ b/tests/core/data_storage/test_on_disk_storage.py @@ -95,3 +95,25 @@ def test_delete_when_is_eternal(eternal_storage, value): result = storage.get("qwerty"), storage.get("azerty") assert result == (None, None) + + +def test_get_known_periods(storage, value, period): + storage = storage() + storage.put(value, period) + + result = storage.get_known_periods() + + assert result == [period] + + +def test_get_memory_usage(storage, value, period): + storage = storage() + storage.put(value, period) + + result = storage.get_memory_usage() + + assert result == { + "nb_files": 1, + "total_nb_bytes": 136, + "cell_size": 8, + }