From aecf5e0dfbbcf158f9d72cbd9f9326b5cee772e0 Mon Sep 17 00:00:00 2001 From: tjlane Date: Sun, 25 Aug 2024 21:31:25 +0200 Subject: [PATCH 1/6] small refactor --- test/unit/conftest.py | 82 +++++++++++++++++++++++++++++++++ test/unit/test_tv.py | 105 +----------------------------------------- 2 files changed, 84 insertions(+), 103 deletions(-) create mode 100644 test/unit/conftest.py diff --git a/test/unit/conftest.py b/test/unit/conftest.py new file mode 100644 index 0000000..351b490 --- /dev/null +++ b/test/unit/conftest.py @@ -0,0 +1,82 @@ + +import gemmi +import numpy as np + +from meteor.utils import compute_coefficients_from_map + + +def generate_single_carbon_density( + carbon_position: tuple[float, float, float], + space_group: gemmi.SpaceGroup, + unit_cell: gemmi.UnitCell, + d_min: float, +) -> gemmi.FloatGrid: + model = gemmi.Model("single_atom") + chain = gemmi.Chain("A") + + residue = gemmi.Residue() + residue.name = "X" + residue.seqid = gemmi.SeqId("1") + + atom = gemmi.Atom() + atom.name = "C" + atom.element = gemmi.Element("C") + atom.pos = gemmi.Position(*carbon_position) + + residue.add_atom(atom) + chain.add_residue(residue) + model.add_chain(chain) + + structure = gemmi.Structure() + structure.add_model(model) + structure.cell = unit_cell + structure.spacegroup_hm = space_group.hm + + density_map = gemmi.DensityCalculatorX() + density_map.d_min = d_min + density_map.grid.setup_from(structure) + density_map.put_model_density_on_grid(structure[0]) + + return density_map.grid + + +def displaced_single_atom_difference_map_coefficients( + *, + noise_sigma: float, +) -> rs.DataSet: + unit_cell = gemmi.UnitCell(a=10.0, b=10.0, c=10.0, alpha=90, beta=90, gamma=90) + space_group = gemmi.find_spacegroup_by_name("P1") + d_min = 1.0 + + carbon_position1 = (5.0, 5.0, 5.0) + carbon_position2 = (5.1, 5.0, 5.0) + + density1 = generate_single_carbon_density(carbon_position1, space_group, unit_cell, d_min) + density2 = generate_single_carbon_density(carbon_position2, space_group, unit_cell, d_min) + + ccp4_map = gemmi.Ccp4Map() + grid_values = ( + np.array(density2) - np.array(density1) + noise_sigma * np.random.randn(*density2.shape) + ) + ccp4_map.grid = gemmi.FloatGrid(grid_values.astype(np.float32), unit_cell, space_group) + ccp4_map.update_ccp4_header() + + difference_map_coefficients = compute_coefficients_from_map( + ccp4_map=ccp4_map, + high_resolution_limit=d_min, + amplitude_label="DF", + phase_label="PHIC", + ) + assert (difference_map_coefficients.max() > 0.0).any() + + return difference_map_coefficients + + +@pytest.fixture +def noise_free_map() -> rs.DataSet: + return displaced_single_atom_difference_map_coefficients(noise_sigma=0.0) + + +@pytest.fixture +def noisy_map() -> rs.DataSet: + return displaced_single_atom_difference_map_coefficients(noise_sigma=0.03) \ No newline at end of file diff --git a/test/unit/test_tv.py b/test/unit/test_tv.py index 51b2679..7777bac 100644 --- a/test/unit/test_tv.py +++ b/test/unit/test_tv.py @@ -1,79 +1,11 @@ from typing import Sequence -import gemmi import numpy as np import pytest import reciprocalspaceship as rs from meteor import tv -from meteor.utils import MapLabels, compute_coefficients_from_map, compute_map_from_coefficients - - -def _generate_single_carbon_density( - carbon_position: tuple[float, float, float], - space_group: gemmi.SpaceGroup, - unit_cell: gemmi.UnitCell, - d_min: float, -) -> gemmi.FloatGrid: - model = gemmi.Model("single_atom") - chain = gemmi.Chain("A") - - residue = gemmi.Residue() - residue.name = "X" - residue.seqid = gemmi.SeqId("1") - - atom = gemmi.Atom() - atom.name = "C" - atom.element = gemmi.Element("C") - atom.pos = gemmi.Position(*carbon_position) - - residue.add_atom(atom) - chain.add_residue(residue) - model.add_chain(chain) - - structure = gemmi.Structure() - structure.add_model(model) - structure.cell = unit_cell - structure.spacegroup_hm = space_group.hm - - density_map = gemmi.DensityCalculatorX() - density_map.d_min = d_min - density_map.grid.setup_from(structure) - density_map.put_model_density_on_grid(structure[0]) - - return density_map.grid - - -def displaced_single_atom_difference_map_coefficients( - *, - noise_sigma: float, -) -> rs.DataSet: - unit_cell = gemmi.UnitCell(a=10.0, b=10.0, c=10.0, alpha=90, beta=90, gamma=90) - space_group = gemmi.find_spacegroup_by_name("P1") - d_min = 1.0 - - carbon_position1 = (5.0, 5.0, 5.0) - carbon_position2 = (5.1, 5.0, 5.0) - - density1 = _generate_single_carbon_density(carbon_position1, space_group, unit_cell, d_min) - density2 = _generate_single_carbon_density(carbon_position2, space_group, unit_cell, d_min) - - ccp4_map = gemmi.Ccp4Map() - grid_values = ( - np.array(density2) - np.array(density1) + noise_sigma * np.random.randn(*density2.shape) - ) - ccp4_map.grid = gemmi.FloatGrid(grid_values.astype(np.float32), unit_cell, space_group) - ccp4_map.update_ccp4_header() - - difference_map_coefficients = compute_coefficients_from_map( - ccp4_map=ccp4_map, - high_resolution_limit=1.0, - amplitude_label="DF", - phase_label="PHIC", - ) - assert (difference_map_coefficients.max() > 0.0).any() - - return difference_map_coefficients +from meteor.utils import MapLabels, compute_map_from_coefficients def rms_between_coefficients(ds1: rs.DataSet, ds2: rs.DataSet, diffmap_labels: MapLabels) -> float: @@ -93,6 +25,7 @@ def rms_between_coefficients(ds1: rs.DataSet, ds2: rs.DataSet, diffmap_labels: M map1_array = np.array(map2.grid) map2_array = np.array(map1.grid) + # standardize -- TODO better to scale? think... map1_array /= map1_array.std() map2_array /= map2_array.std() @@ -101,16 +34,6 @@ def rms_between_coefficients(ds1: rs.DataSet, ds2: rs.DataSet, diffmap_labels: M return rms -@pytest.fixture -def noise_free_map() -> rs.DataSet: - return displaced_single_atom_difference_map_coefficients(noise_sigma=0.0) - - -@pytest.fixture -def noisy_map() -> rs.DataSet: - return displaced_single_atom_difference_map_coefficients(noise_sigma=0.03) - - @pytest.mark.parametrize( "lambda_values_to_scan", [ @@ -154,27 +77,3 @@ def test_tv_denoise_difference_map( ) rms_after_denoising = rms_between_coefficients(noise_free_map, denoised_map, diffmap_labels) assert rms_after_denoising < rms_before_denoising - - # print("xyz", result.optimal_lambda, rms_before_denoising, rms_after_denoising) - - # testmap = compute_map_from_coefficients( - # map_coefficients=noise_free_map, - # amplitude_label=diffmap_labels.amplitude, - # phase_label=diffmap_labels.phase, - # map_sampling=1, - # ) - # testmap.write_ccp4_map("original.ccp4") - # testmap = compute_map_from_coefficients( - # map_coefficients=noisy_map, - # amplitude_label=diffmap_labels.amplitude, - # phase_label=diffmap_labels.phase, - # map_sampling=1, - # ) - # testmap.write_ccp4_map("noisy.ccp4") - # testmap = compute_map_from_coefficients( - # map_coefficients=denoised_map, - # amplitude_label=diffmap_labels.amplitude, - # phase_label=diffmap_labels.phase, - # map_sampling=1, - # ) - # testmap.write_ccp4_map("denoised.ccp4") From 818f30e7ff9c33a47bf2cc0fb176fc9732c2d148 Mon Sep 17 00:00:00 2001 From: tjlane Date: Sun, 25 Aug 2024 21:38:19 +0200 Subject: [PATCH 2/6] cleanup --- test/unit/conftest.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/test/unit/conftest.py b/test/unit/conftest.py index 351b490..065e366 100644 --- a/test/unit/conftest.py +++ b/test/unit/conftest.py @@ -1,9 +1,15 @@ - import gemmi import numpy as np +import pytest +import reciprocalspaceship as rs from meteor.utils import compute_coefficients_from_map +DEFAULT_RESOLUTION = 1.0 +DEFAULT_CELL_SIZE = 10.0 +DEFAULT_CARBON1_POSITION = (5.0, 5.0, 5.0) +DEFAULT_CARBON2_POSITION = (5.0, 5.2, 5.0) + def generate_single_carbon_density( carbon_position: tuple[float, float, float], @@ -40,16 +46,17 @@ def generate_single_carbon_density( return density_map.grid +@pytest.fixture def displaced_single_atom_difference_map_coefficients( *, noise_sigma: float, + d_min: float = DEFAULT_RESOLUTION, + cell_size: float = DEFAULT_CELL_SIZE, + carbon_position1: tuple[float, float, float] = DEFAULT_CARBON1_POSITION, + carbon_position2: tuple[float, float, float] = DEFAULT_CARBON2_POSITION, ) -> rs.DataSet: - unit_cell = gemmi.UnitCell(a=10.0, b=10.0, c=10.0, alpha=90, beta=90, gamma=90) + unit_cell = gemmi.UnitCell(a=cell_size, b=cell_size, c=cell_size, alpha=90, beta=90, gamma=90) space_group = gemmi.find_spacegroup_by_name("P1") - d_min = 1.0 - - carbon_position1 = (5.0, 5.0, 5.0) - carbon_position2 = (5.1, 5.0, 5.0) density1 = generate_single_carbon_density(carbon_position1, space_group, unit_cell, d_min) density2 = generate_single_carbon_density(carbon_position2, space_group, unit_cell, d_min) @@ -79,4 +86,4 @@ def noise_free_map() -> rs.DataSet: @pytest.fixture def noisy_map() -> rs.DataSet: - return displaced_single_atom_difference_map_coefficients(noise_sigma=0.03) \ No newline at end of file + return displaced_single_atom_difference_map_coefficients(noise_sigma=0.03) From 8deccf221a75683c6977ecac6ff5d338ee58e664 Mon Sep 17 00:00:00 2001 From: tjlane Date: Sun, 25 Aug 2024 23:01:06 +0200 Subject: [PATCH 3/6] test stronger --- meteor/tv.py | 24 ++++++++++++---------- test/unit/conftest.py | 1 - test/unit/test_tv.py | 46 +++++++++++++++++++++++++++++++++---------- 3 files changed, 50 insertions(+), 21 deletions(-) diff --git a/meteor/tv.py b/meteor/tv.py index 0cac0ed..44716ac 100644 --- a/meteor/tv.py +++ b/meteor/tv.py @@ -1,4 +1,4 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from typing import Literal, Sequence, overload import numpy as np @@ -26,6 +26,7 @@ class TvDenoiseResult: optimal_lambda: float optimal_negentropy: float map_sampling_used_for_tv: float + negetropies_for_lambdas_scanned: list[float] = field(default_factory=list) def _tv_denoise_array(*, map_as_array: np.ndarray, weight: float) -> np.ndarray: @@ -42,10 +43,10 @@ def _tv_denoise_array(*, map_as_array: np.ndarray, weight: float) -> np.ndarray: @overload def tv_denoise_difference_map( difference_map_coefficients: rs.DataSet, - full_output: Literal[False], + full_output: Literal[False] = False, difference_map_amplitude_column: str = "DF", difference_map_phase_column: str = "PHIC", - lambda_values_to_scan: Sequence[float] | None = None, + lambda_values_to_scan: Sequence[float] | np.ndarray | None = None, ) -> rs.DataSet: ... @@ -55,7 +56,7 @@ def tv_denoise_difference_map( full_output: Literal[True], difference_map_amplitude_column: str = "DF", difference_map_phase_column: str = "PHIC", - lambda_values_to_scan: Sequence[float] | None = None, + lambda_values_to_scan: Sequence[float] | np.ndarray | None = None, ) -> tuple[rs.DataSet, TvDenoiseResult]: ... @@ -140,18 +141,20 @@ def negentropy_objective(tv_lambda: float): return -1.0 * negentropy(denoised_map.flatten()) optimal_lambda: float + negetropies_for_lambdas_scanned = [] # scan a specific set of lambda values and find the best one if lambda_values_to_scan is not None: # use no denoising as the default to beat - optimal_lambda = 0.0 # initialization - highest_negentropy = negentropy(difference_map_as_array.flatten()) + optimal_lambda = lambda_values_to_scan[0] # initialization + optimal_negentropy = negentropy(difference_map_as_array.flatten()) for tv_lambda in lambda_values_to_scan: trial_negentropy = -1.0 * negentropy_objective(tv_lambda) - if trial_negentropy > highest_negentropy: + negetropies_for_lambdas_scanned.append(trial_negentropy) + if trial_negentropy > optimal_negentropy: optimal_lambda = tv_lambda - highest_negentropy = trial_negentropy + optimal_negentropy = trial_negentropy # use golden ratio optimization to pick an optimal lambda else: @@ -160,7 +163,7 @@ def negentropy_objective(tv_lambda: float): ) assert optimizer_result.success, "Golden minimization failed to find optimal TV lambda" optimal_lambda = optimizer_result.x - highest_negentropy = negentropy_objective(optimal_lambda) + optimal_negentropy = negentropy_objective(optimal_lambda) # denoise using the optimized parameters and convert to an rs.DataSet final_map = _tv_denoise_array(map_as_array=difference_map_as_array, weight=optimal_lambda) @@ -180,8 +183,9 @@ def negentropy_objective(tv_lambda: float): if full_output: tv_result = TvDenoiseResult( optimal_lambda=optimal_lambda, - optimal_negentropy=highest_negentropy, + optimal_negentropy=optimal_negentropy, map_sampling_used_for_tv=TV_MAP_SAMPLING, + negetropies_for_lambdas_scanned=negetropies_for_lambdas_scanned, ) return final_map_coefficients, tv_result else: diff --git a/test/unit/conftest.py b/test/unit/conftest.py index 065e366..91313e3 100644 --- a/test/unit/conftest.py +++ b/test/unit/conftest.py @@ -46,7 +46,6 @@ def generate_single_carbon_density( return density_map.grid -@pytest.fixture def displaced_single_atom_difference_map_coefficients( *, noise_sigma: float, diff --git a/test/unit/test_tv.py b/test/unit/test_tv.py index 7777bac..63947a8 100644 --- a/test/unit/test_tv.py +++ b/test/unit/test_tv.py @@ -7,23 +7,25 @@ from meteor import tv from meteor.utils import MapLabels, compute_map_from_coefficients +LAMBDAS_TO_SCAN = np.logspace(-3, 1, 50) -def rms_between_coefficients(ds1: rs.DataSet, ds2: rs.DataSet, diffmap_labels: MapLabels) -> float: + +def rms_between_coefficients(ds1: rs.DataSet, ds2: rs.DataSet, diffmap_labels: MapLabels, map_sampling: int = 3) -> float: map1 = compute_map_from_coefficients( map_coefficients=ds1, amplitude_label=diffmap_labels.amplitude, phase_label=diffmap_labels.phase, - map_sampling=3, + map_sampling=map_sampling, ) map2 = compute_map_from_coefficients( map_coefficients=ds2, amplitude_label=diffmap_labels.amplitude, phase_label=diffmap_labels.phase, - map_sampling=3, + map_sampling=map_sampling, ) - map1_array = np.array(map2.grid) - map2_array = np.array(map1.grid) + map1_array = np.array(map1.grid) + map2_array = np.array(map2.grid) # standardize -- TODO better to scale? think... map1_array /= map1_array.std() @@ -61,19 +63,43 @@ def test_tv_denoise_difference_map_smoke( else: assert isinstance(output, rs.DataSet) - -@pytest.mark.parametrize("lambda_values_to_scan", [None, np.logspace(-3, 2, 100)]) +# TODO re-implement None +@pytest.mark.parametrize("lambda_values_to_scan", [LAMBDAS_TO_SCAN,]) def test_tv_denoise_difference_map( lambda_values_to_scan: None | Sequence[float], noise_free_map: rs.DataSet, noisy_map: rs.DataSet, diffmap_labels: MapLabels, ) -> None: - rms_before_denoising = rms_between_coefficients(noise_free_map, noisy_map, diffmap_labels) + + def rms_to_noise_free(test_map: rs.DataSet) -> float: + return rms_between_coefficients(test_map, noise_free_map, diffmap_labels) + + # Normally, the `tv_denoise_difference_map` function only returns the best result -- since we + # know the ground truth, work around this to test all possible results. + + lowest_rms: float = np.inf + best_lambda: float = 0.0 + + for trial_lambda in LAMBDAS_TO_SCAN: + denoised_map = tv.tv_denoise_difference_map( + difference_map_coefficients=noisy_map, + lambda_values_to_scan=[trial_lambda,] + ) + rms = rms_to_noise_free(denoised_map) + if rms < lowest_rms: + lowest_rms = rms + best_lambda = trial_lambda + + # now run the denoising algorithm and make sure we get a result that's close + # to the one that minimizes the RMS error to the ground truth denoised_map, result = tv.tv_denoise_difference_map( difference_map_coefficients=noisy_map, lambda_values_to_scan=lambda_values_to_scan, full_output=True, ) - rms_after_denoising = rms_between_coefficients(noise_free_map, denoised_map, diffmap_labels) - assert rms_after_denoising < rms_before_denoising + + rms_after_denoising = rms_to_noise_free(denoised_map) + assert rms_after_denoising < rms_to_noise_free(noisy_map) + np.testing.assert_approx_equal(result.optimal_lambda, best_lambda) + np.testing.assert_approx_equal(rms_after_denoising, lowest_rms) From 5973c0f6892701127128c1fa0722092e5f85ad33 Mon Sep 17 00:00:00 2001 From: tjlane Date: Sun, 25 Aug 2024 23:09:44 +0200 Subject: [PATCH 4/6] yes golden works --- meteor/tv.py | 2 +- test/unit/test_tv.py | 29 ++++++++++++++++------------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/meteor/tv.py b/meteor/tv.py index 44716ac..241975a 100644 --- a/meteor/tv.py +++ b/meteor/tv.py @@ -163,7 +163,7 @@ def negentropy_objective(tv_lambda: float): ) assert optimizer_result.success, "Golden minimization failed to find optimal TV lambda" optimal_lambda = optimizer_result.x - optimal_negentropy = negentropy_objective(optimal_lambda) + optimal_negentropy = -1.0 * negentropy_objective(optimal_lambda) # denoise using the optimized parameters and convert to an rs.DataSet final_map = _tv_denoise_array(map_as_array=difference_map_as_array, weight=optimal_lambda) diff --git a/test/unit/test_tv.py b/test/unit/test_tv.py index 63947a8..9c4ec33 100644 --- a/test/unit/test_tv.py +++ b/test/unit/test_tv.py @@ -7,10 +7,12 @@ from meteor import tv from meteor.utils import MapLabels, compute_map_from_coefficients -LAMBDAS_TO_SCAN = np.logspace(-3, 1, 50) +DEFAULT_LAMBDA_VALUES_TO_SCAN = np.logspace(-2, 0, 30) -def rms_between_coefficients(ds1: rs.DataSet, ds2: rs.DataSet, diffmap_labels: MapLabels, map_sampling: int = 3) -> float: +def rms_between_coefficients( + ds1: rs.DataSet, ds2: rs.DataSet, diffmap_labels: MapLabels, map_sampling: int = 3 +) -> float: map1 = compute_map_from_coefficients( map_coefficients=ds1, amplitude_label=diffmap_labels.amplitude, @@ -27,7 +29,7 @@ def rms_between_coefficients(ds1: rs.DataSet, ds2: rs.DataSet, diffmap_labels: M map1_array = np.array(map1.grid) map2_array = np.array(map2.grid) - # standardize -- TODO better to scale? think... + # standardize map1_array /= map1_array.std() map2_array /= map2_array.std() @@ -63,28 +65,30 @@ def test_tv_denoise_difference_map_smoke( else: assert isinstance(output, rs.DataSet) -# TODO re-implement None -@pytest.mark.parametrize("lambda_values_to_scan", [LAMBDAS_TO_SCAN,]) + +@pytest.mark.parametrize("lambda_values_to_scan", [None, DEFAULT_LAMBDA_VALUES_TO_SCAN]) def test_tv_denoise_difference_map( lambda_values_to_scan: None | Sequence[float], noise_free_map: rs.DataSet, noisy_map: rs.DataSet, diffmap_labels: MapLabels, ) -> None: - def rms_to_noise_free(test_map: rs.DataSet) -> float: return rms_between_coefficients(test_map, noise_free_map, diffmap_labels) - # Normally, the `tv_denoise_difference_map` function only returns the best result -- since we + # Normally, the `tv_denoise_difference_map` function only returns the best result -- since we # know the ground truth, work around this to test all possible results. - + lowest_rms: float = np.inf best_lambda: float = 0.0 - for trial_lambda in LAMBDAS_TO_SCAN: - denoised_map = tv.tv_denoise_difference_map( + for trial_lambda in DEFAULT_LAMBDA_VALUES_TO_SCAN: + denoised_map, result = tv.tv_denoise_difference_map( difference_map_coefficients=noisy_map, - lambda_values_to_scan=[trial_lambda,] + lambda_values_to_scan=[ + trial_lambda, + ], + full_output=True, ) rms = rms_to_noise_free(denoised_map) if rms < lowest_rms: @@ -101,5 +105,4 @@ def rms_to_noise_free(test_map: rs.DataSet) -> float: rms_after_denoising = rms_to_noise_free(denoised_map) assert rms_after_denoising < rms_to_noise_free(noisy_map) - np.testing.assert_approx_equal(result.optimal_lambda, best_lambda) - np.testing.assert_approx_equal(rms_after_denoising, lowest_rms) + np.testing.assert_allclose(result.optimal_lambda, best_lambda, rtol=0.2) From f3ff5bb86da45a9eeb90098db679f4cf907d41ae Mon Sep 17 00:00:00 2001 From: tjlane Date: Tue, 10 Sep 2024 20:14:36 +0100 Subject: [PATCH 5/6] hack around mypy issue --- .github/workflows/mypy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/mypy.yml b/.github/workflows/mypy.yml index 3a207d7..3d95aa7 100644 --- a/.github/workflows/mypy.yml +++ b/.github/workflows/mypy.yml @@ -24,4 +24,5 @@ jobs: run: | pip install mypy pip install . + mkdir .mypy_cache mypy --install-types --non-interactive meteor/ test/ From 9a41395a1700f90f27cf12cd138f8cc79dc8d25f Mon Sep 17 00:00:00 2001 From: tjlane Date: Tue, 10 Sep 2024 20:17:53 +0100 Subject: [PATCH 6/6] for mypy --- test/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test/__init__.py diff --git a/test/__init__.py b/test/__init__.py new file mode 100644 index 0000000..e69de29