Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add min "thickness" check in CifParser to filter invalid structure which leads to infinite loop #4133

Merged
Merged
22 changes: 21 additions & 1 deletion src/pymatgen/io/cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -969,8 +969,22 @@ def _get_structure(
primitive: bool,
symmetrized: bool,
check_occu: bool = False,
min_thickness: float = 0.01,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hope this default minimal "thickness" of 0.01 Å would be a good default value (it's already "lower than it should be", but I just want to prevent false positive), is there any case that we might expect valid structure below this threshold?

) -> Structure | None:
"""Generate structure from part of the CIF."""
"""Generate structure from part of the CIF.

Args:
data (CifBlock): The data block to parse.
primitive (bool): Whether to return primitive unit cells.
symmetrized (bool): Whether to return SymmetrizedStructure.
check_occu (bool): Whether to check site for unphysical occupancy > 1.
min_thickness (float): Minimum thickness in Angstrom to consider structure as valid.
This is added to guard against unphysical small/thin structure,
which could result in infinite loop for searching near neighbours.

Returns:
Structure or None if not found.
"""

def get_num_implicit_hydrogens(symbol: str) -> int:
"""Get number of implicit hydrogens."""
Expand All @@ -992,6 +1006,12 @@ def get_matching_coord(

lattice = self.get_lattice(data)

# Check minimal lattice thickness
if lattice is not None:
thickness = [lattice.d_hkl((1, 0, 0)), lattice.d_hkl((0, 1, 0)), lattice.d_hkl((0, 0, 1))]
if any(t < min_thickness for t in thickness):
raise ValueError(f"{thickness=} Å below threshold, double check your structure.")

# If magCIF, get magnetic symmetry moments and magmoms
# else standard CIF, and use empty magmom dict
if self.feature_flags["magcif_incommensurate"]:
Expand Down
Binary file added tests/files/cif/bad_superflat_inf_loop.cif.gz
Binary file not shown.
12 changes: 12 additions & 0 deletions tests/io/test_cif.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,18 @@ def test_cif_parser(self):
assert len(parser.parse_structures(primitive=False)[0]) == 2
assert not parser.has_errors

def test_parse_bad_superflat(self):
"""
Test unphysically "flat" structure with volume near zero,
which would originally lead to infinite loop (PR4133).
"""
parser = CifParser(f"{TEST_FILES_DIR}/cif/bad_superflat_inf_loop.cif.gz")
with (
pytest.raises(ValueError, match="Invalid CIF file with no structures"),
pytest.warns(UserWarning, match="Å below threshold, double check your structure."),
):
parser.parse_structures()

def test_get_symmetrized_structure(self):
parser = CifParser(f"{TEST_FILES_DIR}/cif/Li2O.cif")
sym_structure = parser.parse_structures(primitive=False, symmetrized=True)[0]
Expand Down