From c5db5dfdb40e60ed9afbe8bc1f8d5961681108f0 Mon Sep 17 00:00:00 2001 From: Matas Gumbinas Date: Tue, 31 Dec 2024 17:30:23 +0200 Subject: [PATCH 1/2] feat(mappings): Implement overlayed and obfuscated dicts --- src/ezserialization/_mappings.py | 125 ++++++++++++++++++ .../test_custom_mappings.py | 24 ++++ 2 files changed, 149 insertions(+) create mode 100644 src/ezserialization/_mappings.py create mode 100644 tests/ezserialization_tests/test_custom_mappings.py diff --git a/src/ezserialization/_mappings.py b/src/ezserialization/_mappings.py new file mode 100644 index 0000000..28d4a93 --- /dev/null +++ b/src/ezserialization/_mappings.py @@ -0,0 +1,125 @@ +import itertools +from abc import abstractmethod +from collections.abc import MutableMapping +from typing import Generic, Hashable, Mapping, TypeVar, Union + +__all__ = [ + "OverlayedDict", + "ObfuscatedDict", +] + +_OM = TypeVar("_OM", bound=Union[Mapping, MutableMapping]) +_BM = TypeVar("_BM", bound=Union[Mapping, MutableMapping]) + + +class MappingOverlayMixin(MutableMapping, Generic[_OM, _BM]): + def __setitem__(self, key, value, /): + for mapping in self.inner_mappings: + if key in mapping: + mapping[key] = value + return + self.inner_mappings[1][key] = value + + def __delitem__(self, key, /): + for mapping in self.inner_mappings: + if key in mapping: + del mapping[key] + return + del self.inner_mappings[1][key] + + def __getitem__(self, key, /): + for mapping in self.inner_mappings: + if key in mapping: + return mapping[key] + raise KeyError(key) + + def __len__(self): + return len(self.inner_mappings[0]) + len(self.inner_mappings[1]) + + def __iter__(self): + return itertools.chain(*self.inner_mappings) + + @property + @abstractmethod + def inner_mappings(self) -> tuple[_OM, _BM]: + pass + + def set_overlay_item(self, key, value): + if key in self.inner_mappings[1]: + raise KeyError(f"Key {key} is already present at base mapping!") + self.inner_mappings[0][key] = value + + def get_overlay_item(self, key): + return self.inner_mappings[0][key] + + +class OverlayedDict(MappingOverlayMixin[_OM, _BM], dict): + def __init__(self, overlay: _OM, base: _BM): + self._maps = (overlay, base) + super().__init__({"overlay": overlay, "base": base}) + + @property + def inner_mappings(self) -> tuple[_OM, _BM]: + return self._maps + + +_M = TypeVar("_M", bound=Union[Mapping, MutableMapping]) + + +class ObfuscatedMappingMixin(MutableMapping, Generic[_M]): + def __setitem__(self, key, value, /): + self.unhide_key(key) + self.inner_mapping[key] = value + + def __delitem__(self, key, /): + if self.key_is_hidden(key): + raise KeyError(f"Key {key} is hidden!") + del self.inner_mapping[key] + + def __getitem__(self, key, /): + if self.key_is_hidden(key): + raise KeyError(f"Key {key} is hidden!") + return self.inner_mapping[key] + + def __len__(self): + return len(tuple(iter(self))) + + def __iter__(self): + return (key for key in self.inner_mapping if not self.key_is_hidden(key)) + + @property + @abstractmethod + def inner_mapping(self) -> _M: + pass + + @abstractmethod + def hide_key(self, key: Hashable) -> None: + pass + + @abstractmethod + def key_is_hidden(self, key: Hashable) -> bool: + pass + + @abstractmethod + def unhide_key(self, key: Hashable) -> None: + pass + + +class ObfuscatedDict(ObfuscatedMappingMixin[_M], dict): + def __init__(self, mapping: _M, hidden_keys: set[Hashable]): + self._mapping = mapping + self._hidden_keys = hidden_keys + super().__init__({"mapping": mapping, "hidden_keys": hidden_keys}) + + @property + def inner_mapping(self) -> _M: + return self._mapping + + def hide_key(self, key: Hashable) -> None: + self._hidden_keys.add(key) + + def key_is_hidden(self, key: Hashable) -> bool: + return key in self._hidden_keys + + def unhide_key(self, key: Hashable) -> None: + self._hidden_keys.discard(key) diff --git a/tests/ezserialization_tests/test_custom_mappings.py b/tests/ezserialization_tests/test_custom_mappings.py new file mode 100644 index 0000000..3ee732c --- /dev/null +++ b/tests/ezserialization_tests/test_custom_mappings.py @@ -0,0 +1,24 @@ +import json +from types import MappingProxyType + +import pytest + +from ezserialization._mappings import ObfuscatedDict, OverlayedDict + + +@pytest.mark.parametrize("dict_fn", [dict, MappingProxyType]) +def test_mapping_overlay_dict_json_compatibility(dict_fn): + base = {"a": 1} + overlay = dict_fn({"b": 2}) + mapping = OverlayedDict(overlay, base) + assert {**base, **overlay} == mapping + assert json.loads(json.dumps(mapping)) == mapping + + +@pytest.mark.parametrize("dict_fn", [dict, MappingProxyType]) +def test_obfuscated_dict_json_compatibility(dict_fn): + hidden_keys = {"a"} + base = dict_fn({"a": 1, "b": 2}) + mapping = ObfuscatedDict(base, hidden_keys=set(hidden_keys)) + assert {k: v for k, v in mapping.items() if k not in hidden_keys} == mapping + assert json.loads(json.dumps(mapping)) == mapping From c3e7520e44a86ef7b17291e8c6dee2e49cc6c450 Mon Sep 17 00:00:00 2001 From: Matas Gumbinas Date: Tue, 31 Dec 2024 17:31:35 +0200 Subject: [PATCH 2/2] feat(serialization): Use overlayed & obfuscated dicts to prevent copying the data --- pyproject.toml | 2 +- src/ezserialization/_serialization.py | 27 +++++++++++++++++---------- 2 files changed, 18 insertions(+), 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f2b95b7..a04bfd5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "ezserialization" -version = "0.3.0" +version = "0.4.0" description = "Simple, easy to use & transparent python objects serialization & deserialization." authors = ["Matas Gumbinas "] repository = "https://github.com/gMatas/ezserialization" diff --git a/src/ezserialization/_serialization.py b/src/ezserialization/_serialization.py index 319ceb2..e4c73f7 100644 --- a/src/ezserialization/_serialization.py +++ b/src/ezserialization/_serialization.py @@ -3,8 +3,9 @@ import importlib import threading from abc import abstractmethod -from copy import copy -from typing import Callable, Dict, Iterator, Mapping, Optional, Protocol, Type, TypeVar, cast +from typing import Callable, Dict, Iterator, Mapping, Optional, Protocol, Type, TypeVar, Union, cast + +from ezserialization._mappings import ObfuscatedDict, OverlayedDict __all__ = [ "TYPE_FIELD_NAME", @@ -18,11 +19,12 @@ "set_typename_alias", ] + TYPE_FIELD_NAME = "_type_" """ This attribute is being injected into the "serialized" object's dict to hold information about the source type. -This value can customized by the end-user. +This value can be customized by the end-user. """ @@ -147,15 +149,21 @@ def wrapper(cls_: Type[_T]) -> Type[_T]: def wrap_to_dict(method: Callable[..., Mapping]): @functools.wraps(method) - def to_dict_wrapper(__ctx, *__args, **__kwargs) -> Mapping: + def to_dict_wrapper(__ctx, *__args, **__kwargs) -> Union[Mapping, OverlayedDict]: data = method(__ctx, *__args, **__kwargs) # Wrap object with serialization metadata. if TYPE_FIELD_NAME in data: - raise KeyError(f"Key '{TYPE_FIELD_NAME}' already exist in the serialized data mapping!") + raise KeyError( + f"Key '{TYPE_FIELD_NAME}' already exist in the serialized data mapping! " + f"Change ezserialization's {TYPE_FIELD_NAME=} to some other value to not conflict with " + f"your existing codebase." + ) if _get_serialization_enabled(): typename = _typenames_[__ctx if isinstance(__ctx, type) else type(__ctx)] - return {TYPE_FIELD_NAME: typename, **data} # TODO: avoid copying data if possible - return copy(data) # TODO: avoid copying data if possible + # Avoid copying data when data is immutable mapping i.e. `MappingProxyType` is received + # instead of dict. + return OverlayedDict({TYPE_FIELD_NAME: typename}, data) + return data return to_dict_wrapper @@ -177,9 +185,8 @@ def from_dict_wrapper(*__args, **__kwargs) -> Serializable: src = __args[0] __args = __args[1:] - # Drop deserialization metadata. - src = dict(src) # TODO: avoid copying data - src.pop(TYPE_FIELD_NAME, None) + # Conceal instead of copy the data without deserialization metadata. + src = ObfuscatedDict(src, hidden_keys={TYPE_FIELD_NAME}) # Deserialize. if hasattr(method, "__self__"):