From 024a17af2a711dc1effadb5b123da32533954d3c Mon Sep 17 00:00:00 2001 From: Marcin Wojdyr Date: Tue, 7 May 2024 21:12:24 +0200 Subject: [PATCH] SmallStructure: change how space group is determined and stored Backward incompatible! SmallStructure::find_spacegroup() was replaced with a member variable SmallStructure::spacegroup. Now we also read Hall symbol, xyz symops and the space group number (previously, only H-M symbol); any combination of these items can be used to set spacegroup. --- docs/mol.rst | 88 ++++++++++++++++++++++++----- include/gemmi/small.hpp | 120 ++++++++++++++++++++++++++-------------- include/gemmi/smcif.hpp | 9 ++- python/read.cpp | 11 ++-- 4 files changed, 161 insertions(+), 67 deletions(-) diff --git a/docs/mol.rst b/docs/mol.rst index fd26aba7..9fd4fa7d 100644 --- a/docs/mol.rst +++ b/docs/mol.rst @@ -94,9 +94,7 @@ together with the unit cell and symmetry. >>> SiC.cell >>> # content of _symmetry_space_group_name_H-M or _space_group_name_H-M_alt - >>> SiC.spacegroup_hm - 'F -4 3 m' - >>> SiC.find_spacegroup() # based on spacegroup_hm + >>> SiC.spacegroup >>> list(SiC.sites) [, ] @@ -173,25 +171,85 @@ Alternatively, the same can be done in two steps: >>> SiC = gemmi.make_small_structure_from_block(cif_doc.sole_block()) Now you also have access to the CIF document. -Let's use it to obtain SpaceGroup from the symmetry operators -and check if it is consistent with the H-M name. + +.. _small_spacegroup: + +SmallStructure::spacegroup +-------------------------- + +When reading a small-molecule CIF file, a few CIF items that describe +the space group are read and stored in member variables: .. doctest:: - >>> op_list = cif_doc[0].find_values('_symmetry_equiv_pos_as_xyz') - >>> gops = gemmi.GroupOps([gemmi.Op(o) for o in op_list]) - >>> gemmi.find_spacegroup_by_ops(gops) - - >>> # find_spacegroup() is based on the H-M name. - >>> _ is SiC.find_spacegroup() - True + >>> st = gemmi.read_small_structure('../tests/2013551.cif') + >>> st.symops + ['x, y, z', '-y, x-y, z', 'y, x, -z', '-x+y, -x, z', '-x, -x+y, -z', 'x-y, -y, -z', '-x, -y, -z', 'y, -x+y, -z', '-y, -x, z', 'x-y, x, -z', 'x, x-y, z', '-x+y, y, z'] + >>> st.spacegroup_hall + '-P 3 2"' + >>> st.spacegroup_hm + 'P -3 m 1' + >>> st.spacegroup_number + 164 + +and the function `set_spacegroup("SH2")` is automatically +run to set `spacegroup`: + +.. doctest:: + + >>> st.spacegroup + + +`set_spacegroup()` takes one argument, a string in which each character +specifies what to use for space group determination: + +* `S` = symmetry operations stored in `symops`, +* `H` = Hall symbol from `spacegroup_hall` (we compare symmetry operations + encoded in the Hall symbol, not the strings), +* `1` = H-M symbol; for space groups such as "P n n n" that have two origin + choices listed in the International Tables, use *Origin Choice 1*, +* `2` = H-M symbol, with *Origin Choice 2* where applicable, +* `N` = the space group number. + +The first item that matches one of the 560+ space group settings tabulated +in Gemmi sets `spacegroup`. To use a different order of items than SH2, +call set_spacegroup() again: + +.. doctest:: + + >>> st.set_spacegroup('S1') + +Errors such as an incorrect format of the symop triplets or of the Hall +symbol are silently ignored, and the consistency between different items +is not checked. That's because this function is run when reading a file; +throwing an exception at that stage would prevent reading a file. +We have a separate function to check for errors and inconsistencies. +It returns a string, one line -- one error: + +.. doctest:: + + >>> st.check_spacegroup() + '' + +If the spacegroup setting used in a file is not tabulated in Gemmi, +you can still create a GroupOps object with symmetry operations: + +.. doctest:: + + >>> gemmi.GroupOps([gemmi.Op(o) for o in st.symops]) #doctest: +ELLIPSIS + + >>> # or + >>> gemmi.symops_from_hall(st.spacegroup_hall) #doctest: +ELLIPSIS + In C++ it would be similar, except that the following function -would be used to make gemmi::GroupOps:: +would be used to make gemmi::GroupOps from symops:: GroupOps split_centering_vectors(const std::vector& ops) ----- + +SmallStructure <-> Structure +---------------------------- If your structure is stored in a macromolecular format (PDB, mmCIF) you can read it first as macromolecular :ref:`hierarchy ` @@ -209,7 +267,7 @@ You could also create SmallStructure from scratch: >>> small = gemmi.SmallStructure() >>> small.spacegroup_hm = 'F -4 3 m' >>> small.cell = gemmi.UnitCell(4.358, 4.358, 4.358, 90, 90, 90) - >>> small.setup_cell_images() + >>> small.set_spacegroup("2") >>> # add a single atom >>> site = gemmi.SmallStructure.Site() >>> site.label = 'C1' diff --git a/include/gemmi/small.hpp b/include/gemmi/small.hpp index 903da520..28eeaf15 100644 --- a/include/gemmi/small.hpp +++ b/include/gemmi/small.hpp @@ -55,67 +55,101 @@ struct SmallStructure { std::string name; UnitCell cell; + const SpaceGroup* spacegroup = nullptr; std::string spacegroup_hm; std::string spacegroup_hall; - int spacegroup_number = 0; - std::vector symop_xyz; - const SpaceGroup* sg_cache = nullptr; + int spacegroup_number; + std::vector symops; std::vector sites; std::vector atom_types; double wavelength = 0.; // the first wavelength if multiple std::vector get_all_unit_cell_sites() const; - /// \param order should be made of letter xamn, for example: "xm" - /// x = list of xyz symmetry operation - /// a = Hall name, - /// m = H-M name, - /// n = space group number - bool determine_spacegroup_from(const std::string& order) { - sg_cache = nullptr; - for (char letter : order) { - sg_cache = find_spacegroup_from(letter); - if (sg_cache) - break; - } - // note: null sg_cache causes find_spacegroup() to fallback - setup_cell_images(); - return sg_cache != nullptr; - } - - // If sg_cache is left null, uses spacegroup_hm, - // as it was before determine_spacegroup_from() was introduced. - const SpaceGroup* find_spacegroup() const { - return sg_cache ? sg_cache : find_spacegroup_from('m'); - } + // deprecated, use directly spacegroup + const SpaceGroup* find_spacegroup() const { return spacegroup; } - const SpaceGroup* find_spacegroup_from_symop_xyz() const { + const SpaceGroup* find_spacegroup_from_symops() const { + if (symops.empty()) + return nullptr; std::vector ops; - ops.reserve(symop_xyz.size()); - for (const std::string& xyz : symop_xyz) + ops.reserve(symops.size()); + for (const std::string& xyz : symops) ops.push_back(parse_triplet(xyz)); GroupOps gops = split_centering_vectors(ops); return find_spacegroup_by_ops(gops); } - const SpaceGroup* find_spacegroup_from(char letter) const { - switch (lower(letter)) { - case 'x': - return find_spacegroup_from_symop_xyz(); - case 'a': - if (!spacegroup_hall.empty()) - return find_spacegroup_by_ops(symops_from_hall(spacegroup_hall.c_str())); - return nullptr; - case 'm': - return find_spacegroup_by_name(spacegroup_hm, cell.alpha, cell.gamma); + void set_spacegroup(const char* order) { + spacegroup = nullptr; + if (order) + for (const char* c = order; *c != '\0' && spacegroup == nullptr; ++c) { + try { + spacegroup = get_spacegroup_from(*c); + } catch (std::exception&) {} + } + setup_cell_images(); + } + + const SpaceGroup* get_spacegroup_from(char c) const { + switch (lower(c)) { + case 's': + return find_spacegroup_from_symops(); + case 'h': + if (spacegroup_hall.empty()) + return nullptr; + return find_spacegroup_by_ops(symops_from_hall(spacegroup_hall.c_str())); + case '1': + case '2': { + if (spacegroup_hm.empty()) + return nullptr; + char prefer[] = {c, '\0'}; + return find_spacegroup_by_name(spacegroup_hm, cell.alpha, cell.gamma, prefer); + } case 'n': - if (spacegroup_number != 0) - return find_spacegroup_by_number(spacegroup_number); - return nullptr; + if (spacegroup_number == 0) + return nullptr; + return find_spacegroup_by_number(spacegroup_number); default: - throw std::invalid_argument(cat("determine_spacegroup_from(): symbol '", - letter, "' is not one of x a m n")); + throw std::invalid_argument("set_spacegroup(): wrong character in 'order'"); + } + } + + std::string check_spacegroup() { + std::string err; + if (!symops.empty()) + try { + auto sg = find_spacegroup_from_symops(); + if (!sg) + cat_to(err, "space group from symops not found in the table\n"); + else if (sg != spacegroup) + cat_to(err, "space group from symops differs: ", sg->xhm(), '\n'); + } catch (std::exception& e) { + cat_to(err, "error while processing symops: ", e.what(), '\n'); + } + if (!spacegroup_hall.empty()) + try { + auto sg = find_spacegroup_by_ops(symops_from_hall(spacegroup_hall.c_str())); + if (!sg) + cat_to(err, "space group from Hall symbol (", spacegroup_hall, + ") not found in the table\n"); + else if (spacegroup != sg) + cat_to(err, "space group from Hall symbol (", spacegroup_hall, + ") differs: ", sg->xhm(), '\n'); + } catch (std::exception& e) { + cat_to(err, "error while processing Hall symbol: ", e.what(), '\n'); + } + if (!spacegroup_hm.empty()) { + auto sg = find_spacegroup_by_name(spacegroup_hm, cell.alpha, cell.gamma, "2"); + if (!sg) + cat_to(err, "H-M symbol (", spacegroup_hm, ") not found in the table\n"); + else if (!spacegroup || strcmp(spacegroup->hm, sg->hm) != 0) + cat_to(err, "space group from H-M symbol (", spacegroup_hm, + ") differs: ", sg->hm, '\n'); } + if (spacegroup_number != 0 && spacegroup && spacegroup->number != spacegroup_number) + cat_to(err, "space group number (", spacegroup_number, ") differs\n"); + return err; } const AtomType* get_atom_type(const std::string& symbol) const { diff --git a/include/gemmi/smcif.hpp b/include/gemmi/smcif.hpp index 92bab7aa..b4e93d15 100644 --- a/include/gemmi/smcif.hpp +++ b/include/gemmi/smcif.hpp @@ -38,13 +38,13 @@ SmallStructure make_small_structure_from_block(const cif::Block& block_) { for (const char* tag : {"_space_group_symop_operation_xyz", "_symmetry_equiv_pos_as_xyz"}) { if (const cif::Column col = block.find_values(tag)) { - st.symop_xyz.reserve(col.length()); + st.symops.reserve(col.length()); for (const std::string& value : col) - st.symop_xyz.push_back(cif::as_string(value)); + st.symops.push_back(cif::as_string(value)); break; } } - for (const char* tag : { "_space_group_name_Hall", "_symmetry_space_group_name_Hall"}) + for (const char* tag : {"_space_group_name_Hall", "_symmetry_space_group_name_Hall"}) if (const std::string* val = block.find_value(tag)) st.spacegroup_hall = cif::as_string(*val); for (const char* tag : {"_space_group_IT_number", "_symmetry_Int_Tables_number"}) @@ -52,6 +52,7 @@ SmallStructure make_small_structure_from_block(const cif::Block& block_) { st.spacegroup_number = cif::as_int(*val, 0); break; } + st.set_spacegroup("SH2"); enum { kLabel, kSymbol, kX, kY, kZ, kUiso, kBiso, kOcc, kDisorderGroup }; cif::Table atom_table = block.find("_atom_site_", @@ -123,8 +124,6 @@ SmallStructure make_small_structure_from_block(const cif::Block& block_) { } if (cif::Column w_col = block.find_values("_diffrn_radiation_wavelength")) st.wavelength = cif::as_number(w_col.at(0)); - st.setup_cell_images(); - return st; } diff --git a/python/read.cpp b/python/read.cpp index 88bb8959..0260f79a 100644 --- a/python/read.cpp +++ b/python/read.cpp @@ -130,23 +130,26 @@ void add_small(py::module& m) { .def(py::init<>()) .def_readwrite("name", &SmallStructure::name) .def_readwrite("cell", &SmallStructure::cell) + .def_readonly("spacegroup", &SmallStructure::spacegroup, + py::return_value_policy::reference_internal) .def_readwrite("spacegroup_hm", &SmallStructure::spacegroup_hm) - .def_readwrite("symop_xyz", &SmallStructure::symop_xyz) + .def_readwrite("spacegroup_hall", &SmallStructure::spacegroup_hall) + .def_readwrite("spacegroup_number", &SmallStructure::spacegroup_number) + .def_readwrite("symops", &SmallStructure::symops) .def_readonly("sites", &SmallStructure::sites) .def_readonly("atom_types", &SmallStructure::atom_types) .def_readwrite("wavelength", &SmallStructure::wavelength) .def("add_site", [](SmallStructure& self, const SmallStructure::Site& site) { self.sites.push_back(site); }) - .def("find_spacegroup", &SmallStructure::find_spacegroup) - .def("determine_spacegroup_from", &SmallStructure::determine_spacegroup_from) + .def("set_spacegroup", &SmallStructure::set_spacegroup, py::arg("order")) + .def("check_spacegroup", &SmallStructure::check_spacegroup) .def("get_atom_type", &SmallStructure::get_atom_type) .def("get_all_unit_cell_sites", &SmallStructure::get_all_unit_cell_sites) .def("remove_hydrogens", &SmallStructure::remove_hydrogens) .def("change_occupancies_to_crystallographic", &SmallStructure::change_occupancies_to_crystallographic, py::arg("max_dist")=0.4) - .def("setup_cell_images", &SmallStructure::setup_cell_images) .def("make_cif_block", &make_cif_block_from_small_structure) .def("__repr__", [](const SmallStructure& self) { return "";