From a22a58b763a22b47e869d61f9cabad9d41c277f6 Mon Sep 17 00:00:00 2001 From: Brandon Bocklund Date: Thu, 5 Dec 2024 22:04:49 -0800 Subject: [PATCH] FIX: Dataset exclusion query for duplicated excluded model contributions (#263) This PR fixes a bug where fixed site fraction datasets containing duplicate excluded model contributions were filtered out and did not contribute to likelihood calculations. One example case that triggered the bug is when multiple tags apply the same excluded model contribution. --- .../non_equilibrium_thermochemical_error.py | 2 +- tests/test_error_functions.py | 15 +++++++++++++ tests/testing_data.py | 22 +++++++++++++++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/espei/error_functions/non_equilibrium_thermochemical_error.py b/espei/error_functions/non_equilibrium_thermochemical_error.py index 60dd806c..0de62e7b 100644 --- a/espei/error_functions/non_equilibrium_thermochemical_error.py +++ b/espei/error_functions/non_equilibrium_thermochemical_error.py @@ -324,7 +324,7 @@ def get_thermochemical_data(dbf, comps, phases, datasets, model=None, weight_dic if exclusion == tuple([]): exc_search = (~where('excluded_model_contributions').exists()) & (where('solver').exists()) else: - exc_search = (where('excluded_model_contributions').test(lambda x: tuple(sorted(x)) == exclusion)) & (where('solver').exists()) + exc_search = (where('excluded_model_contributions').test(lambda x: tuple(sorted(set(x))) == exclusion)) & (where('solver').exists()) curr_data = get_prop_data(comps, phase_name, prop, datasets, additional_query=exc_search) curr_data = filter_sublattice_configurations(curr_data, constituents) curr_data = filter_temperatures(curr_data) diff --git a/tests/test_error_functions.py b/tests/test_error_functions.py index 7af9e5c4..289bd5d3 100644 --- a/tests/test_error_functions.py +++ b/tests/test_error_functions.py @@ -100,6 +100,21 @@ def test_fixed_configuration_residual_function(datasets_db): assert np.isclose(likelihood, -14.28729, rtol=1e-6) +def test_fixed_configuration_residual_function_duplicate_excluded_model_contributions(datasets_db): + """Datasets where a excluded model contribution is duplicated is in the excluded model contributions contributes to the residual""" + dbf = Database(CU_MG_TDB) + datasets_db.insert(CU_MG_HM_MIX_CUMG2_ANTISITE_DUPLICATE_EXCLUDED_MODEL_CONTRIBUTIONS) + + residual_func = FixedConfigurationPropertyResidual(dbf, datasets_db, phase_models=None, symbols_to_fit=[]) + + # Regression test "truth" values - got values by running + residuals, weights = residual_func.get_residuals(np.asarray([])) + assert len(residuals) == len(weights) + assert np.allclose(residuals, [-10.0, -100.0]) + likelihood = residual_func.get_likelihood(np.asarray([])) + assert np.isclose(likelihood, -14.28729, rtol=1e-6) + + def test_fixed_configuration_residual_with_internal_degrees_of_freedom(datasets_db): """Unstable endmembers in phases that have internal degrees of freedom should retain fixed internal DOF""" dbf = Database(CU_MG_TDB) diff --git a/tests/testing_data.py b/tests/testing_data.py index a58365ea..73c9540e 100644 --- a/tests/testing_data.py +++ b/tests/testing_data.py @@ -385,6 +385,28 @@ """, Loader=YAML_LOADER) +CU_MG_HM_MIX_CUMG2_ANTISITE_DUPLICATE_EXCLUDED_MODEL_CONTRIBUTIONS = yaml.load("""{ + "components": ["CU", "MG", "VA"], + "phases": ["CUMG2"], + "solver": { + "sublattice_site_ratios": [1, 2], + "sublattice_configurations": [["CU", "MG"], ["MG", "CU"], ["MG", "MG"], ["CU", "CU"]], + "mode": "manual" + }, + "conditions": { + "P": 101325, + "T": [300, 400], + }, + + "output": "HM_MIX", + "values": [[[10, 11, 12, 13], [100, 101, 102, 103]]], + "reference": "FAKE DATA", + "comment": "FAKE DATA", + "excluded_model_contributions": ["idmix", "idmix"] +} +""", Loader=YAML_LOADER) + + CU_MG_HM_MIX_CUMG2_ALL_INVALID = yaml.load("""{ "components": ["CU", "MG", "VA"], "phases": ["CUMG2"],