From 46a50c34af48377f3b7540a2b2e4eea5fe2fde6f Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 25 Oct 2024 18:04:45 +0800 Subject: [PATCH 1/8] add min_vol check --- src/pymatgen/io/cif.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/pymatgen/io/cif.py b/src/pymatgen/io/cif.py index 8606216a683..bb5abf1f369 100644 --- a/src/pymatgen/io/cif.py +++ b/src/pymatgen/io/cif.py @@ -969,8 +969,22 @@ def _get_structure( primitive: bool, symmetrized: bool, check_occu: bool = False, + min_vol: float = 0.5, ) -> Structure | None: - """Generate structure from part of the CIF.""" + """Generate structure from part of the CIF. + + Args: + data (CifBlock): The data block to parse. + primitive (bool): Whether to return primitive unit cells. + symmetrized (bool): Whether to return SymmetrizedStructure. + check_occu (bool): Whether to check site for unphysical occupancy > 1. + min_vol (float): Minimum volume in Angstrom^3 to consider structure as valid. + This is added to guard against 2D-like structure, which could result in + infinite loop for searching near neighbours. + + Returns: + Structure or None if not found. + """ def get_num_implicit_hydrogens(symbol: str) -> int: """Get number of implicit hydrogens.""" @@ -992,6 +1006,10 @@ def get_matching_coord( lattice = self.get_lattice(data) + # Check lattice + if min_vol > 0 and lattice is not None and lattice.volume < min_vol: + raise ValueError(f"{lattice.volume=} ų below threshold, double check your structure.") + # If magCIF, get magnetic symmetry moments and magmoms # else standard CIF, and use empty magmom dict if self.feature_flags["magcif_incommensurate"]: From e86e485f45944f97ab791e15a171df3a26096276 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 25 Oct 2024 19:20:05 +0800 Subject: [PATCH 2/8] increase min_vol to 1 --- src/pymatgen/io/cif.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/pymatgen/io/cif.py b/src/pymatgen/io/cif.py index a0308f4eeb2..ed197009c78 100644 --- a/src/pymatgen/io/cif.py +++ b/src/pymatgen/io/cif.py @@ -969,7 +969,7 @@ def _get_structure( primitive: bool, symmetrized: bool, check_occu: bool = False, - min_vol: float = 0.5, + min_vol: float = 1.0, ) -> Structure | None: """Generate structure from part of the CIF. @@ -979,8 +979,8 @@ def _get_structure( symmetrized (bool): Whether to return SymmetrizedStructure. check_occu (bool): Whether to check site for unphysical occupancy > 1. min_vol (float): Minimum volume in Angstrom^3 to consider structure as valid. - This is added to guard against 2D-like structure, which could result in - infinite loop for searching near neighbours. + This is added to guard against unphysical 2D-like structure, + which could result in infinite loop for searching near neighbours. Returns: Structure or None if not found. @@ -1007,7 +1007,7 @@ def get_matching_coord( lattice = self.get_lattice(data) # Check lattice volume - if min_vol > 0 and lattice is not None and lattice.volume < min_vol: + if lattice is not None and lattice.volume < min_vol: raise ValueError(f"{lattice.volume=} ų below threshold, double check your structure.") # If magCIF, get magnetic symmetry moments and magmoms From 20fb714bba4664f5c529e9712afa3504eae31124 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 25 Oct 2024 20:50:43 +0800 Subject: [PATCH 3/8] round volume to 4 digits --- src/pymatgen/io/cif.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pymatgen/io/cif.py b/src/pymatgen/io/cif.py index ed197009c78..47f782f87cc 100644 --- a/src/pymatgen/io/cif.py +++ b/src/pymatgen/io/cif.py @@ -1008,7 +1008,7 @@ def get_matching_coord( # Check lattice volume if lattice is not None and lattice.volume < min_vol: - raise ValueError(f"{lattice.volume=} ų below threshold, double check your structure.") + raise ValueError(f"{lattice.volume=:.4f} ų below threshold, double check your structure.") # If magCIF, get magnetic symmetry moments and magmoms # else standard CIF, and use empty magmom dict From 6872a8a45b4876a27053a96f37ff84dcd053c906 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Fri, 25 Oct 2024 20:50:53 +0800 Subject: [PATCH 4/8] add test --- tests/files/cif/bad_superflat_inf_loop.cif | 51 ++++++++++++++++++++++ tests/io/test_cif.py | 12 +++++ 2 files changed, 63 insertions(+) create mode 100644 tests/files/cif/bad_superflat_inf_loop.cif diff --git a/tests/files/cif/bad_superflat_inf_loop.cif b/tests/files/cif/bad_superflat_inf_loop.cif new file mode 100644 index 00000000000..5c660a061b0 --- /dev/null +++ b/tests/files/cif/bad_superflat_inf_loop.cif @@ -0,0 +1,51 @@ +# unphysically "flat" structure with volume near zero +# which would originally lead to infinite loop (PR4133) +data_V23Ni +_symmetry_space_group_name_H-M 'P 1' +_cell_length_a 1.49244463 +_cell_length_b 3.79421616 +_cell_length_c 30.42398643 +_cell_angle_alpha 20.62129211 +_cell_angle_beta 83.61568451 +_cell_angle_gamma 62.99439240 +_symmetry_Int_Tables_number 1 +_chemical_formula_structural V23Ni +_chemical_formula_sum 'V23 Ni1' +_cell_volume 0.00000000 +_cell_formula_units_Z 1 +loop_ + _symmetry_equiv_pos_site_id + _symmetry_equiv_pos_as_xyz + 1 'x, y, z' +loop_ + _atom_site_type_symbol + _atom_site_label + _atom_site_symmetry_multiplicity + _atom_site_fract_x + _atom_site_fract_y + _atom_site_fract_z + _atom_site_occupancy + V V0 1 0.67231035 0.73907024 0.30680549 1 + V V1 1 0.34705946 0.37159106 0.52686268 1 + V V2 1 0.36653438 0.38854167 0.52153718 1 + V V3 1 0.00921166 0.93152356 0.19919881 1 + V V4 1 0.95677882 0.91787136 0.49960217 1 + Ni Ni5 1 0.51781291 0.50244284 0.32125005 1 + V V6 1 0.09629190 0.08547926 0.87447387 1 + V V7 1 0.49141514 0.52637476 0.74801213 1 + V V8 1 0.78377295 0.75774127 0.46150076 1 + V V9 1 0.86805952 0.95672750 0.67616075 1 + V V10 1 0.17673326 0.06560791 0.04854067 1 + V V11 1 0.09662235 0.06218648 0.66348130 1 + V V12 1 0.35039228 0.35385907 0.34141171 1 + V V13 1 0.78324890 0.72533828 0.17591193 1 + V V14 1 0.96514380 0.07838918 0.98198533 1 + V V15 1 0.95602196 0.05976810 0.88894337 1 + V V16 1 0.19058158 0.27560839 0.92930967 1 + V V17 1 0.64331728 0.62581986 0.41340443 1 + V V18 1 0.29390040 0.31569031 0.45524845 1 + V V19 1 0.18374869 0.24463138 0.70731211 1 + V V20 1 0.02409820 0.10269854 0.72434735 1 + V V21 1 0.45413128 0.44173402 0.29142430 1 + V V22 1 0.91658413 0.93522346 0.09281332 1 + V V23 1 0.02599846 0.12289974 0.88971388 1 diff --git a/tests/io/test_cif.py b/tests/io/test_cif.py index 03b8181297e..680c4e58059 100644 --- a/tests/io/test_cif.py +++ b/tests/io/test_cif.py @@ -219,6 +219,18 @@ def test_cif_parser(self): assert len(parser.parse_structures(primitive=False)[0]) == 2 assert not parser.has_errors + def test_parse_bad_superflat(self): + """ + Test unphysically "flat" structure with volume near zero, + which would originally lead to infinite loop (PR4133). + """ + parser = CifParser(f"{TEST_FILES_DIR}/cif/bad_superflat_inf_loop.cif") + with ( + pytest.raises(ValueError, match="Invalid CIF file with no structures"), + pytest.warns(UserWarning, match="lattice.volume=0.0000 ų below threshold"), + ): + parser.parse_structures() + def test_get_symmetrized_structure(self): parser = CifParser(f"{TEST_FILES_DIR}/cif/Li2O.cif") sym_structure = parser.parse_structures(primitive=False, symmetrized=True)[0] From 33f4ea27eb2637cc5e6ab565bd1d3c794ddd4a59 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Sat, 26 Oct 2024 20:50:35 +0800 Subject: [PATCH 5/8] use min thickness for robustness --- src/pymatgen/io/cif.py | 12 +++++++----- tests/io/test_cif.py | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/pymatgen/io/cif.py b/src/pymatgen/io/cif.py index 47f782f87cc..49d905ed978 100644 --- a/src/pymatgen/io/cif.py +++ b/src/pymatgen/io/cif.py @@ -969,7 +969,7 @@ def _get_structure( primitive: bool, symmetrized: bool, check_occu: bool = False, - min_vol: float = 1.0, + min_thickness: float = 0.1, ) -> Structure | None: """Generate structure from part of the CIF. @@ -978,7 +978,7 @@ def _get_structure( primitive (bool): Whether to return primitive unit cells. symmetrized (bool): Whether to return SymmetrizedStructure. check_occu (bool): Whether to check site for unphysical occupancy > 1. - min_vol (float): Minimum volume in Angstrom^3 to consider structure as valid. + min_thickness (float): Minimum thickness in Angstrom to consider structure as valid. This is added to guard against unphysical 2D-like structure, which could result in infinite loop for searching near neighbours. @@ -1006,9 +1006,11 @@ def get_matching_coord( lattice = self.get_lattice(data) - # Check lattice volume - if lattice is not None and lattice.volume < min_vol: - raise ValueError(f"{lattice.volume=:.4f} ų below threshold, double check your structure.") + # Check minimal lattice thickness + if lattice is not None: + thickness = [lattice.d_hkl((1, 0, 0)), lattice.d_hkl((0, 1, 0)), lattice.d_hkl((0, 0, 1))] + if any(t < min_thickness for t in thickness): + raise ValueError(f"{thickness=} Šbelow threshold, double check your structure.") # If magCIF, get magnetic symmetry moments and magmoms # else standard CIF, and use empty magmom dict diff --git a/tests/io/test_cif.py b/tests/io/test_cif.py index 680c4e58059..43dd95cee6d 100644 --- a/tests/io/test_cif.py +++ b/tests/io/test_cif.py @@ -227,7 +227,7 @@ def test_parse_bad_superflat(self): parser = CifParser(f"{TEST_FILES_DIR}/cif/bad_superflat_inf_loop.cif") with ( pytest.raises(ValueError, match="Invalid CIF file with no structures"), - pytest.warns(UserWarning, match="lattice.volume=0.0000 ų below threshold"), + pytest.warns(UserWarning, match="Šbelow threshold, double check your structure."), ): parser.parse_structures() From 3c097d9c42093a8613e623e28d31acc7af23ef34 Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Tue, 29 Oct 2024 19:39:07 +0800 Subject: [PATCH 6/8] compress test file --- tests/files/cif/bad_superflat_inf_loop.cif | 51 ------------------ tests/files/cif/bad_superflat_inf_loop.cif.gz | Bin 0 -> 945 bytes tests/io/test_cif.py | 2 +- 3 files changed, 1 insertion(+), 52 deletions(-) delete mode 100644 tests/files/cif/bad_superflat_inf_loop.cif create mode 100644 tests/files/cif/bad_superflat_inf_loop.cif.gz diff --git a/tests/files/cif/bad_superflat_inf_loop.cif b/tests/files/cif/bad_superflat_inf_loop.cif deleted file mode 100644 index 5c660a061b0..00000000000 --- a/tests/files/cif/bad_superflat_inf_loop.cif +++ /dev/null @@ -1,51 +0,0 @@ -# unphysically "flat" structure with volume near zero -# which would originally lead to infinite loop (PR4133) -data_V23Ni -_symmetry_space_group_name_H-M 'P 1' -_cell_length_a 1.49244463 -_cell_length_b 3.79421616 -_cell_length_c 30.42398643 -_cell_angle_alpha 20.62129211 -_cell_angle_beta 83.61568451 -_cell_angle_gamma 62.99439240 -_symmetry_Int_Tables_number 1 -_chemical_formula_structural V23Ni -_chemical_formula_sum 'V23 Ni1' -_cell_volume 0.00000000 -_cell_formula_units_Z 1 -loop_ - _symmetry_equiv_pos_site_id - _symmetry_equiv_pos_as_xyz - 1 'x, y, z' -loop_ - _atom_site_type_symbol - _atom_site_label - _atom_site_symmetry_multiplicity - _atom_site_fract_x - _atom_site_fract_y - _atom_site_fract_z - _atom_site_occupancy - V V0 1 0.67231035 0.73907024 0.30680549 1 - V V1 1 0.34705946 0.37159106 0.52686268 1 - V V2 1 0.36653438 0.38854167 0.52153718 1 - V V3 1 0.00921166 0.93152356 0.19919881 1 - V V4 1 0.95677882 0.91787136 0.49960217 1 - Ni Ni5 1 0.51781291 0.50244284 0.32125005 1 - V V6 1 0.09629190 0.08547926 0.87447387 1 - V V7 1 0.49141514 0.52637476 0.74801213 1 - V V8 1 0.78377295 0.75774127 0.46150076 1 - V V9 1 0.86805952 0.95672750 0.67616075 1 - V V10 1 0.17673326 0.06560791 0.04854067 1 - V V11 1 0.09662235 0.06218648 0.66348130 1 - V V12 1 0.35039228 0.35385907 0.34141171 1 - V V13 1 0.78324890 0.72533828 0.17591193 1 - V V14 1 0.96514380 0.07838918 0.98198533 1 - V V15 1 0.95602196 0.05976810 0.88894337 1 - V V16 1 0.19058158 0.27560839 0.92930967 1 - V V17 1 0.64331728 0.62581986 0.41340443 1 - V V18 1 0.29390040 0.31569031 0.45524845 1 - V V19 1 0.18374869 0.24463138 0.70731211 1 - V V20 1 0.02409820 0.10269854 0.72434735 1 - V V21 1 0.45413128 0.44173402 0.29142430 1 - V V22 1 0.91658413 0.93522346 0.09281332 1 - V V23 1 0.02599846 0.12289974 0.88971388 1 diff --git a/tests/files/cif/bad_superflat_inf_loop.cif.gz b/tests/files/cif/bad_superflat_inf_loop.cif.gz new file mode 100644 index 0000000000000000000000000000000000000000..dfad7b1b93e9c32c755822a03dd38b4fa9f36a64 GIT binary patch literal 945 zcmV;i15W%OiwFn~$slI{17cxhUvqVEWpZY0VRT<Z*OoeV`*jpZB@&T<2Dey z`zwgu!z94QV6&UAKS&N4EP`MUxe&BQV++v3k)@sSe0`dvWXs4HHbAgh&F-pd`m31x z^XYnNs@S$y@#VFR(-(1>#<`m2u@-O5bP_*@cJ6A?*Krgd^*HRlinmi!oy6NPw=ZHC zn`6^wd|SsCF%6>WUz@&}YS9kES^V<-F9O70cQ0{@;fKQKW*08kuB)f<8ZPHp)!{e} z^EvdftHbZV{V9Za`YzyU7pl5#LtFR9=@cSw!=Ah%B7={y1IKW0Jt;6?9%EIC$vr9b z&d>^s{jse>Y|p2ZSINCmpu7Tjh#%@Hg&pn)U+_d&bKZU>J zp{*~WpSwdnrUn>)s=GvZcpb)WZev*ajx8Tp`1${#xl6QgSUfj3Y8GvTkbAj&mdJ{m zS%^#cCtsc92s?4Fu>N;$eundK2^W?tG%ueA<0ZUbKX#mgD|r7VuHVGR(=A(^hOXeJ z>$y(&!_YqLxA9Ov?%Xcn8mH#mHdQlSA5OfEv6{mBCwrfr`*_$Js%k#RzT%VO2Y+R* zoF%pjL84A;i(Xo(D6LT%CpG!RQ9;0hK(bPMGI?!5dysjpm2rlDcZ6CI#%LsTdF`Ag zFt#8-bA}B97lf3_b|&Zb2wI`eYw#Ytb8tt{g7Df{>zvAKu+9Qz1bJ_y0$UKzP5S8t zqxl>gysWuzQm*vQw%1bX`z>az%NsuLWnOb5)~kGrvqTo1y(8>`AP)o@XkiE~*^IE{ zBq+c;!Yv5v&|2k-BHCI4mDwU5Hz^Z(Mfe5bl2l%+l?P?DEZXwmOS|a`Y@|b=H3D_0_7r2csP&Wz!IA_D*>;c(&Ous5X$>+$-(9E60G1 Date: Tue, 29 Oct 2024 19:43:32 +0800 Subject: [PATCH 7/8] lower the threshold even further --- src/pymatgen/io/cif.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pymatgen/io/cif.py b/src/pymatgen/io/cif.py index 49d905ed978..4682d9bebb2 100644 --- a/src/pymatgen/io/cif.py +++ b/src/pymatgen/io/cif.py @@ -969,7 +969,7 @@ def _get_structure( primitive: bool, symmetrized: bool, check_occu: bool = False, - min_thickness: float = 0.1, + min_thickness: float = 0.01, ) -> Structure | None: """Generate structure from part of the CIF. From d35c43566f82f975c0c4b6dd02a117c4f980336e Mon Sep 17 00:00:00 2001 From: "Haoyu (Daniel)" Date: Tue, 29 Oct 2024 19:46:10 +0800 Subject: [PATCH 8/8] update comment --- src/pymatgen/io/cif.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pymatgen/io/cif.py b/src/pymatgen/io/cif.py index 4682d9bebb2..82d43e43cbb 100644 --- a/src/pymatgen/io/cif.py +++ b/src/pymatgen/io/cif.py @@ -979,7 +979,7 @@ def _get_structure( symmetrized (bool): Whether to return SymmetrizedStructure. check_occu (bool): Whether to check site for unphysical occupancy > 1. min_thickness (float): Minimum thickness in Angstrom to consider structure as valid. - This is added to guard against unphysical 2D-like structure, + This is added to guard against unphysical small/thin structure, which could result in infinite loop for searching near neighbours. Returns: