Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ignore documentation string mismatch in dimension validation #40

Merged
merged 3 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions .github/workflows/build_docs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,21 @@ jobs:
- name: Install sqlite
run: sudo apt-get install sqlite libyaml-dev

- name: Set the VIRTUAL_ENV variable for uv to work
run: |
echo "VIRTUAL_ENV=${Python_ROOT_DIR}" >> $GITHUB_ENV

- name: Update pip/wheel infrastructure
run: |
python -m pip install --upgrade pip
pip install uv
uv pip install wheel
uv pip install --system wheel

- name: Install dependencies
run: |
uv pip install -r requirements.txt
uv pip install --system -r requirements.txt

- name: Build and install
run: uv pip install --no-deps -v -e .
run: uv pip install --system --no-deps -v -e .

- name: Install documenteer
run: uv pip install 'documenteer[pipelines]==0.8.2'
run: uv pip install --system 'documenteer[pipelines]==0.8.2'

- name: Build documentation
env:
Expand Down
2 changes: 0 additions & 2 deletions migrations/dimensions-config/1fae088c80b6.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,6 @@ def _validate_initial_dimension_universe(ctx: MigrationContext) -> None:
ctx.attributes.validate_dimensions_json(5)
except ValueError as e:
e.add_note(
"Repositories originally created at dimension universe 1 or earlier may have incorrect"
" documentation strings.\n"
"Re-run butler migrate with the flag '--options allow_dimension_universe_mismatch=1' to"
" bypass this check.\n"
"This will overwrite any customizations made to the dimension universe."
Expand Down
14 changes: 12 additions & 2 deletions python/lsst/daf/butler_migrate/_dimensions_json_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import difflib
import json
from typing import Literal

import yaml
from lsst.resources import ResourcePath
Expand Down Expand Up @@ -66,7 +67,9 @@
return json.dumps(dimensions)


def compare_json_strings(expected: str, actual: str) -> str | None:
def compare_json_strings(
expected: str, actual: str, diff_style: Literal["unified", "ndiff"] = "unified"
) -> str | None:
"""Compare two JSON strings and return a human-readable description of
the differences.

Expand All @@ -76,6 +79,8 @@
JSON-encoded string to use as the basis for comparison.
actual : `str`
JSON-encoded string to compare with the expected value.
diff_style : "unified" | "ndiff"
What type of diff to return.

Returns
-------
Expand All @@ -90,7 +95,12 @@
if expected == actual:
return None

diff = difflib.unified_diff(expected.splitlines(), actual.splitlines(), lineterm="")
if diff_style == "unified":
diff = difflib.unified_diff(expected.splitlines(), actual.splitlines(), lineterm="")
elif diff_style == "ndiff":
diff = difflib.ndiff(expected.splitlines(), actual.splitlines())
else:
raise ValueError(f"Unknown {diff_style=}")

Check warning on line 103 in python/lsst/daf/butler_migrate/_dimensions_json_utils.py

View check run for this annotation

Codecov / codecov/patch

python/lsst/daf/butler_migrate/_dimensions_json_utils.py#L103

Added line #L103 was not covered by tests
return "\n".join(diff)


Expand Down
33 changes: 32 additions & 1 deletion python/lsst/daf/butler_migrate/butler_attributes.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from __future__ import annotations

import json
import re
from collections.abc import Callable, Iterable
from typing import Any, cast

Expand Down Expand Up @@ -219,7 +220,7 @@ def validate_dimensions_json(self, expected_universe_version: int) -> None:
expected_json = load_historical_dimension_universe_json(expected_universe_version)
actual_json = self._load_dimensions_json()
diff = compare_json_strings(expected_json, actual_json)
if diff is not None:
if diff is not None and not _is_expected_dimensions_json_mismatch(expected_json, actual_json):
err = ValueError(
"dimensions.json stored in database does not match expected"
f" daf_butler universe version {expected_universe_version}."
Expand All @@ -241,3 +242,33 @@ def replace_dimensions_json(self, universe_version: int) -> None:
"""
dimensions = load_historical_dimension_universe_json(universe_version)
self.update(_DIMENSIONS_JSON_KEY, dimensions)


def _is_expected_dimensions_json_mismatch(expected_json: str, actual_json: str) -> bool:
# Return `True` if the two dimension universe JSON strings differ only in
# ways expected because of the history of this data. Older repositories
# that have been previously migrated have some documentation strings that
# don't match the current dimension universe because of how dimension
# universes were patched in earlier migrations.

diff = compare_json_strings(expected_json, actual_json, diff_style="ndiff")
if diff is None:
return True

return all(_is_expected_diff_line(line) for line in diff.splitlines())


def _is_expected_diff_line(line: str) -> bool:
# ndiff prefix for matching lines and "hint" lines.
if line.startswith(" ") or line.startswith("? "):
return True

# Lines containing only docstring changes.
if re.match(r'^[-+]\s+"doc":', line):
return True

# Empty line.
if line.strip() == "":
return True

return False
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Feels a bit fragile. Would it be better to just ignore doc strings when doing diff (maybe optionally)? You could load JSON from string into dict, drop all doc keys recursively and compare the result.

20 changes: 20 additions & 0 deletions tests/test_dimensions_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -328,6 +328,26 @@ def test_validate_dimensions_json(self) -> None:
attribs.replace_dimensions_json(universe)
attribs.validate_dimensions_json(universe)

def test_ignore_expected_dimension_json_mismatch(self) -> None:
original = '{"a": 1, "doc": "good"}'
self.assertTrue(butler_attributes._is_expected_dimensions_json_mismatch(original, original))
# Mismatched doc but everything else OK
self.assertTrue(
butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 1, "doc": "bad"}')
)
# Order doesn't matter
self.assertTrue(
butler_attributes._is_expected_dimensions_json_mismatch(original, '{"doc": "bad", "a": 1}')
)
# Mismatched non-doc value
self.assertFalse(
butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "doc": "good"}')
)
# Mismatched value and doc
self.assertFalse(
butler_attributes._is_expected_dimensions_json_mismatch(original, '{"a": 2, "doc": "bad"}')
)


class SQLiteDimensionsJsonTestCase(DimensionsJsonTestCase, unittest.TestCase):
"""Test using SQLite backend."""
Expand Down
Loading