diff --git a/CHANGELOG.md b/CHANGELOG.md index 45f267f..06d1b6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,20 @@ project adheres to clauses 1–8 of [Semantic Versioning](https://semver.org/spe ## [Unreleased] +## [0.14.0] (Jun 18 2024) + +### Changed + +- The option `decode_json` in `ParamDB.load()` was replaced with `raw_json`, which + allows loading the raw JSON string from the database. +- The order of data for `ParamData` objects in the underlying JSON representation was + changed; see `ParamDB.load()` for the new order. + +### Removed + +- `ParamDBKey.WRAPPER` was removed in favor of encoding these values using + `ParamDBKey.PARAM` with a class name of `None`. + ## [0.13.0] (Jun 14 2024) ### Added @@ -191,7 +205,8 @@ project adheres to clauses 1–8 of [Semantic Versioning](https://semver.org/spe - Database class `ParamDB` to store parameters in a SQLite file - Ability to retrieve the commit history as `CommitEntry` objects -[unreleased]: https://github.com/PainterQubits/paramdb/compare/v0.13.0...develop +[unreleased]: https://github.com/PainterQubits/paramdb/compare/v0.14.0...develop +[0.14.0]: https://github.com/PainterQubits/paramdb/releases/tag/v0.14.0 [0.13.0]: https://github.com/PainterQubits/paramdb/releases/tag/v0.13.0 [0.12.0]: https://github.com/PainterQubits/paramdb/releases/tag/v0.12.0 [0.11.0]: https://github.com/PainterQubits/paramdb/releases/tag/v0.11.0 diff --git a/CITATION.cff b/CITATION.cff index 928cd58..22226ff 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -4,6 +4,6 @@ authors: - family-names: "Hadley" given-names: "Alex" title: "ParamDB" -version: 0.13.0 -date-released: 2024-06-14 +version: 0.14.0 +date-released: 2024-06-18 url: "https://github.com/PainterQubits/paramdb" diff --git a/docs/conf.py b/docs/conf.py index 64d5f17..20404cc 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -4,7 +4,7 @@ project = "ParamDB" copyright = "2023–2024, California Institute of Technology" author = "Alex Hadley" -release = "0.13.0" +release = "0.14.0" # General configuration extensions = [ diff --git a/paramdb/_database.py b/paramdb/_database.py index 54eadb4..2806634 100644 --- a/paramdb/_database.py +++ b/paramdb/_database.py @@ -41,13 +41,13 @@ class ParamDBKey: """Key for ordinary lists.""" DICT = "d" """Key for ordinary dictionaries.""" - WRAPPER = "w" + PARAM = "p" """ - Key for non-:py:class:`ParamData` children of :py:class:`ParamData` objects, since - they are wrapped with additional metadata, such as a last updated time. + Key for :py:class:`ParamData` objects. + + The JSON object should either include a parameter class name, or be None if wrapping + a non-:py:class:`ParamData` with parameter metadata (e.g. a last updated time). """ - PARAM = "p" - """Key for :py:class:`ParamData` objects.""" def _compress(text: str) -> bytes: @@ -80,13 +80,12 @@ def _encode_json(obj: Any) -> Any: {key: _encode_json(value) for key, value in obj.items()}, ] if isinstance(obj, ParamData): - timestamp_and_json = [ - obj.last_updated.timestamp(), + return [ + ParamDBKey.PARAM, _encode_json(obj.to_json()), + None if isinstance(obj, _ParamWrapper) else type(obj).__name__, + obj.last_updated.timestamp(), ] - if isinstance(obj, _ParamWrapper): - return [ParamDBKey.WRAPPER, *timestamp_and_json] - return [ParamDBKey.PARAM, type(obj).__name__, *timestamp_and_json] raise TypeError( f"'{type(obj).__name__}' object {repr(obj)} is not JSON serializable, so the" " commit failed" @@ -105,13 +104,13 @@ def _decode_json(json_data: Any) -> Any: return [_decode_json(item) for item in data[0]] if key == ParamDBKey.DICT: return {key: _decode_json(value) for key, value in data[0].items()} - if key == ParamDBKey.WRAPPER: - return _ParamWrapper.from_json(data[0], _decode_json(data[1])) if key == ParamDBKey.PARAM: - class_name = data[0] - param_class = get_param_class(class_name) + json_data, class_name, timestamp = data + param_class = ( + _ParamWrapper if class_name is None else get_param_class(class_name) + ) if param_class is not None: - return param_class.from_json(data[1], _decode_json(data[2])) + return param_class.from_json(_decode_json(json_data), timestamp) raise ValueError( f"ParamData class '{class_name}' is not known to ParamDB, so the load" " failed" @@ -122,16 +121,19 @@ def _decode_json(json_data: Any) -> Any: def _encode(obj: Any) -> bytes: """Encode the given object into bytes that will be stored in the database.""" # pylint: disable=no-member - return _compress(json.dumps(_encode_json(obj))) + return _compress(json.dumps(_encode_json(obj), separators=(",", ":"))) -def _decode(data: bytes, decode_json: bool) -> Any: +def _decode(data: bytes, raw_json: bool) -> Any: """ - Decode an object from the given data from the database. Classes will be loaded in - if ``load_classes`` is True; otherwise, classes will be loaded as dictionaries. + Decode an object from the given data from the database. + + If ``raw_json`` is True, the raw JSON string will from the database will be + returned; otherwise, the JSON data will be parsed and decoded into the corresponding + classes. """ - json_data = json.loads(_decompress(data)) - return _decode_json(json_data) if decode_json else json_data + json_str = _decompress(data) + return json_str if raw_json else _decode_json(json.loads(json_str)) class _Base(MappedAsDataclass, DeclarativeBase): @@ -283,24 +285,23 @@ def num_commits(self) -> int: @overload def load( - self, commit_id: int | None = None, *, decode_json: Literal[True] = True + self, commit_id: int | None = None, *, raw_json: Literal[False] = False ) -> DataT: ... @overload - def load( - self, commit_id: int | None = None, *, decode_json: Literal[False] - ) -> Any: ... + def load(self, commit_id: int | None = None, *, raw_json: Literal[True]) -> str: ... - def load(self, commit_id: int | None = None, *, decode_json: bool = True) -> Any: + def load(self, commit_id: int | None = None, *, raw_json: bool = False) -> Any: """ Load and return data from the database. If a commit ID is given, load from that commit; otherwise, load from the most recent commit. Raise an ``IndexError`` if the specified commit does not exist. Note that commit IDs begin at 1. By default, objects are reconstructed, which requires the relevant parameter - data classes to be defined in the current program. However, if ``decode_json`` - is False, the encoded JSON data is loaded directly from the database. The format - of the encoded data is as follows (see :py:class:`ParamDBKey` for key codes):: + data classes to be defined in the current program. However, if ``raw_json`` + is True, the JSON data is returned directly from the database as a string. + The format of the JSON data is as follows (see :py:class:`ParamDBKey` for key + codes):: json_data: | int @@ -312,15 +313,14 @@ def load(self, commit_id: int | None = None, *, decode_json: bool = True) -> Any | [ParamDBKey.QUANTITY, float, str] | [ParamDBKey.LIST, [json_data, ...]] | [ParamDBKey.DICT, {str: json_data, ...}] - | [ParamDBKey.WRAPPED, float, json_data] - | [ParamDBKey.PARAM, str, float, json_data] - """ + | [ParamDBKey.PARAM, json_data, str | None, float] + """ # noqa: E501 select_stmt = self._select_commit(select(_Snapshot.data), commit_id) with self._Session() as session: data = session.scalar(select_stmt) if data is None: raise self._index_error(commit_id) - return _decode(data, decode_json) + return _decode(data, raw_json) def load_commit_entry(self, commit_id: int | None = None) -> CommitEntry: """ @@ -358,7 +358,7 @@ def commit_history_with_data( start: int | None = None, end: int | None = None, *, - decode_json: Literal[True] = True, + raw_json: Literal[False] = False, ) -> list[CommitEntryWithData[DataT]]: ... @overload @@ -367,15 +367,15 @@ def commit_history_with_data( start: int | None = None, end: int | None = None, *, - decode_json: Literal[False], - ) -> list[CommitEntryWithData[Any]]: ... + raw_json: Literal[True], + ) -> list[CommitEntryWithData[str]]: ... def commit_history_with_data( self, start: int | None = None, end: int | None = None, *, - decode_json: bool = True, + raw_json: bool = False, ) -> list[CommitEntryWithData[Any]]: """ Retrieve the commit history with data as a list of @@ -392,7 +392,7 @@ def commit_history_with_data( snapshot.id, snapshot.message, snapshot.timestamp, - _decode(snapshot.data, decode_json), + _decode(snapshot.data, raw_json), ) for snapshot in snapshots ] diff --git a/paramdb/_param_data/_param_data.py b/paramdb/_param_data/_param_data.py index 7f0137b..c1d860e 100644 --- a/paramdb/_param_data/_param_data.py +++ b/paramdb/_param_data/_param_data.py @@ -177,7 +177,7 @@ def _init_from_json(self, json_data: Any) -> None: self.__init__(json_data) # type: ignore[misc] @classmethod - def from_json(cls, last_updated_timestamp: float, json_data: list[Any]) -> Self: + def from_json(cls, json_data: list[Any], last_updated_timestamp: float) -> Self: """ Construct a parameter data object from the given last updated timestamp and JSON data originally constructed by :py:meth:`to_json`. diff --git a/poetry.lock b/poetry.lock index db4b756..fadf058 100644 --- a/poetry.lock +++ b/poetry.lock @@ -101,13 +101,13 @@ test-all = ["astropy[test]", "coverage[toml]", "ipython (>=4.2)", "objgraph", "s [[package]] name = "astropy-iers-data" -version = "0.2024.6.10.0.30.47" +version = "0.2024.6.17.0.31.35" description = "IERS Earth Rotation and Leap Second tables for the astropy core package" optional = true python-versions = ">=3.8" files = [ - {file = "astropy_iers_data-0.2024.6.10.0.30.47-py3-none-any.whl", hash = "sha256:d37d37387c9461b148a96fbf0f5d54a9dd118c442423355e8f7732aa0b03d1e9"}, - {file = "astropy_iers_data-0.2024.6.10.0.30.47.tar.gz", hash = "sha256:4f555793a312045cffd0820dcc5f9c98bcbcd70972c00ac7a5246133b35c0ec1"}, + {file = "astropy_iers_data-0.2024.6.17.0.31.35-py3-none-any.whl", hash = "sha256:f4e0b40563813c4297745dd4ec03d80b2cbd6cb29340c8df0534b296cb27e3cf"}, + {file = "astropy_iers_data-0.2024.6.17.0.31.35.tar.gz", hash = "sha256:a6e0dca0985e15dfc4f3fc508bfb29b2b046b59eb9d028416860afa9c63b17eb"}, ] [package.extras] @@ -656,13 +656,13 @@ tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipyth [[package]] name = "fastjsonschema" -version = "2.19.1" +version = "2.20.0" description = "Fastest Python implementation of JSON schema" optional = false python-versions = "*" files = [ - {file = "fastjsonschema-2.19.1-py3-none-any.whl", hash = "sha256:3672b47bc94178c9f23dbb654bf47440155d4db9df5f7bc47643315f9c405cd0"}, - {file = "fastjsonschema-2.19.1.tar.gz", hash = "sha256:e3126a94bdc4623d3de4485f8d468a12f02a67921315ddc87836d6e456dc789d"}, + {file = "fastjsonschema-2.20.0-py3-none-any.whl", hash = "sha256:5875f0b0fa7a0043a91e93a9b8f793bcbbba9691e7fd83dca95c28ba26d21f0a"}, + {file = "fastjsonschema-2.20.0.tar.gz", hash = "sha256:3d48fc5300ee96f5d116f10fe6f28d938e6008f59a6a025c2649475b87f76a23"}, ] [package.extras] @@ -670,18 +670,18 @@ devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benc [[package]] name = "flake8" -version = "7.0.0" +version = "7.1.0" description = "the modular source code checker: pep8 pyflakes and co" optional = false python-versions = ">=3.8.1" files = [ - {file = "flake8-7.0.0-py2.py3-none-any.whl", hash = "sha256:a6dfbb75e03252917f2473ea9653f7cd799c3064e54d4c8140044c5c065f53c3"}, - {file = "flake8-7.0.0.tar.gz", hash = "sha256:33f96621059e65eec474169085dc92bf26e7b2d47366b70be2f67ab80dc25132"}, + {file = "flake8-7.1.0-py2.py3-none-any.whl", hash = "sha256:2e416edcc62471a64cea09353f4e7bdba32aeb079b6e360554c659a122b1bc6a"}, + {file = "flake8-7.1.0.tar.gz", hash = "sha256:48a07b626b55236e0fb4784ee69a465fbf59d79eec1f5b4785c3d3bc57d17aa5"}, ] [package.dependencies] mccabe = ">=0.7.0,<0.8.0" -pycodestyle = ">=2.11.0,<2.12.0" +pycodestyle = ">=2.12.0,<2.13.0" pyflakes = ">=3.2.0,<3.3.0" [[package]] @@ -1721,13 +1721,13 @@ tests = ["pytest"] [[package]] name = "pycodestyle" -version = "2.11.1" +version = "2.12.0" description = "Python style guide checker" optional = false python-versions = ">=3.8" files = [ - {file = "pycodestyle-2.11.1-py2.py3-none-any.whl", hash = "sha256:44fe31000b2d866f2e41841b18528a505fbd7fef9017b04eff4e2648a0fadc67"}, - {file = "pycodestyle-2.11.1.tar.gz", hash = "sha256:41ba0e7afc9752dfb53ced5489e89f8186be00e599e712660695b7a75ff2663f"}, + {file = "pycodestyle-2.12.0-py2.py3-none-any.whl", hash = "sha256:949a39f6b86c3e1515ba1787c2022131d165a8ad271b11370a8819aa070269e4"}, + {file = "pycodestyle-2.12.0.tar.gz", hash = "sha256:442f950141b4f43df752dd303511ffded3a04c2b6fb7f65980574f0c31e6e79c"}, ] [[package]] @@ -2739,13 +2739,13 @@ files = [ [[package]] name = "urllib3" -version = "2.2.1" +version = "2.2.2" description = "HTTP library with thread-safe connection pooling, file post, and more." optional = false python-versions = ">=3.8" files = [ - {file = "urllib3-2.2.1-py3-none-any.whl", hash = "sha256:450b20ec296a467077128bff42b73080516e71b56ff59a60a02bef2232c4fa9d"}, - {file = "urllib3-2.2.1.tar.gz", hash = "sha256:d0570876c61ab9e520d776c38acbbb5b05a776d3f9ff98a5c8fd5162a444cf19"}, + {file = "urllib3-2.2.2-py3-none-any.whl", hash = "sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472"}, + {file = "urllib3-2.2.2.tar.gz", hash = "sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168"}, ] [package.extras] @@ -2872,4 +2872,4 @@ pydantic = ["eval-type-backport", "pydantic"] [metadata] lock-version = "2.0" python-versions = "^3.9" -content-hash = "7270d73364aa6156988b8eb65296bf80a0225f90cccacb7d7a0defec1eb0e208" +content-hash = "591d31a891f1de269cd0e8fbad3dde19ee130dcd5486f38092443172dc9646f7" diff --git a/pyproject.toml b/pyproject.toml index 22b6744..21fd28a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "paramdb" -version = "0.13.0" +version = "0.14.0" description = "Python package for storing and retrieving experiment parameters." authors = ["Alex Hadley "] license = "BSD-3-Clause" @@ -29,7 +29,7 @@ pydantic = ["pydantic", "eval-type-backport"] [tool.poetry.group.dev.dependencies] mypy = "^1.10.0" -flake8 = "^7.0.0" +flake8 = "^7.1.0" pylint = "^3.2.3" black = "^24.4.2" pytest = "^8.2.2" diff --git a/tests/_param_data/test_param_data.py b/tests/_param_data/test_param_data.py index 555df76..8908dd1 100644 --- a/tests/_param_data/test_param_data.py +++ b/tests/_param_data/test_param_data.py @@ -95,10 +95,10 @@ def test_child_does_not_change(param_data: ParamData[Any]) -> None: def test_to_and_from_json(param_data: ParamData[Any]) -> None: """Parameter data can be converted to and from JSON data.""" - timestamp = param_data.last_updated.timestamp() json_data = param_data.to_json() + timestamp = param_data.last_updated.timestamp() with capture_start_end_times(): - param_data_from_json = param_data.from_json(timestamp, json_data) + param_data_from_json = param_data.from_json(json_data, timestamp) assert param_data_from_json == param_data assert param_data_from_json.last_updated == param_data.last_updated diff --git a/tests/test_database.py b/tests/test_database.py index 40e2b72..89946ce 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -10,6 +10,7 @@ import os from pathlib import Path from datetime import datetime, timezone +import json import pytest from tests.helpers import ( EmptyParam, @@ -221,18 +222,20 @@ def test_update_timestamp_after_load( ) -def test_decode_json_false(db_path: str, param_data: ParamData[Any]) -> None: - """Can load raw JSON data if ``decode_json`` is false.""" +def test_raw_json_true(db_path: str, param_data: ParamData[Any]) -> None: + """Can load raw JSON data if ``raw_json`` is True.""" param_db = ParamDB[ParamData[Any]](db_path) param_db.commit("Initial commit", param_data) - data_loaded = param_db.load(decode_json=False) - data_from_history = param_db.commit_history_with_data(decode_json=False)[0].data + data_loaded = json.loads(param_db.load(raw_json=True)) + data_from_history = json.loads( + param_db.commit_history_with_data(raw_json=True)[0].data + ) for data in data_loaded, data_from_history: # Check that loaded dictionary has the correct type and keys assert isinstance(data, list) assert len(data) == 4 - key, class_name, timestamp, json_data = data + key, json_data, class_name, timestamp = data assert key == ParamDBKey.PARAM assert class_name == type(param_data).__name__ assert timestamp == param_data.last_updated.timestamp() @@ -259,12 +262,14 @@ def test_load_classes_false_unknown_class(db_path: str) -> None: """ param_db = ParamDB[Unknown](db_path) param_db.commit("Initial commit", Unknown()) - data_loaded = param_db.load(decode_json=False) - data_from_history = param_db.commit_history_with_data(decode_json=False)[0].data + data_loaded = json.loads(param_db.load(raw_json=True)) + data_from_history = json.loads( + param_db.commit_history_with_data(raw_json=True)[0].data + ) assert isinstance(data_loaded, list) - assert data_loaded[1] == Unknown.__name__ + assert data_loaded[2] == Unknown.__name__ assert isinstance(data_from_history, list) - assert data_from_history[1] == Unknown.__name__ + assert data_from_history[2] == Unknown.__name__ # pylint: disable-next=too-many-arguments,too-many-locals