diff --git a/corelib/src/libs/SireIO/CMakeLists.txt b/corelib/src/libs/SireIO/CMakeLists.txt index a56d24105..744809376 100644 --- a/corelib/src/libs/SireIO/CMakeLists.txt +++ b/corelib/src/libs/SireIO/CMakeLists.txt @@ -48,6 +48,7 @@ set ( SIREIO_HEADERS netcdffile.h pdb.h pdb2.h + pdbx.h perturbationslibrary.h protoms.h sdf.h @@ -95,6 +96,7 @@ set ( SIREIO_SOURCES netcdffile.cpp pdb.cpp pdb2.cpp + pdbx.cpp perturbationslibrary.cpp protoms.cpp sdf.cpp diff --git a/corelib/src/libs/SireIO/mol2.cpp b/corelib/src/libs/SireIO/mol2.cpp index 0c06dc247..568284b97 100644 --- a/corelib/src/libs/SireIO/mol2.cpp +++ b/corelib/src/libs/SireIO/mol2.cpp @@ -2380,6 +2380,9 @@ void Mol2::addToSystem(System &system, const PropertyMap &map) const // you should loop through each molecule in the system and work out // which ones are described in the file, and then add data from the file // to thise molecules. + throw SireError::unsupported(QObject::tr( + "You cannot add data from a mol2 file to an existing system!"), + CODELOC); } /** Internal function used to get the molecule structure for molecule 'imol'. */ diff --git a/corelib/src/libs/SireIO/moleculeparser.cpp b/corelib/src/libs/SireIO/moleculeparser.cpp index 63f4b92bd..d3d7fb7b1 100644 --- a/corelib/src/libs/SireIO/moleculeparser.cpp +++ b/corelib/src/libs/SireIO/moleculeparser.cpp @@ -1929,8 +1929,19 @@ MoleculeParserPtr MoleculeParser::parse(const System &system, const QString &for cannot be recognised, or if there is an error in parsing. */ MoleculeParserPtr MoleculeParser::parse(const QString &filename, const PropertyMap &map) { - MoleculeParserPtr parser = MoleculeParser::_pvt_parse(filename, map); - getFileCache()->clear(); + MoleculeParserPtr parser; + + try + { + parser = MoleculeParser::_pvt_parse(filename, map); + getFileCache()->clear(); + } + catch (...) + { + getFileCache()->clear(); + throw; + } + return parser; } @@ -1939,48 +1950,56 @@ QList MoleculeParser::parse(const QStringList &filenames, con { QList result; - if (filenames.count() == 1) - { - result.append(MoleculeParser::_pvt_parse(filenames[0], map)); - } - else + try { - QVector parsers(filenames.count()); - - bool run_parallel = true; - - if (map["parallel"].hasValue()) + if (filenames.count() == 1) { - run_parallel = map["parallel"].value().asA().value(); - } - - if (run_parallel) - { - // parse the files in parallel - we use a grain size of 1 - // as each file can be pretty big, and there won't be many of them - tbb::parallel_for( - tbb::blocked_range(0, filenames.count(), 1), - [&](tbb::blocked_range r) - { - for (int i = r.begin(); i < r.end(); ++i) - { - parsers[i] = MoleculeParser::_pvt_parse(filenames[i], map); - } - }, - tbb::simple_partitioner()); + result.append(MoleculeParser::_pvt_parse(filenames[0], map)); } else { - for (int i = 0; i < filenames.count(); ++i) + QVector parsers(filenames.count()); + + bool run_parallel = true; + + if (map["parallel"].hasValue()) + { + run_parallel = map["parallel"].value().asA().value(); + } + + if (run_parallel) { - parsers[i] = MoleculeParser::_pvt_parse(filenames[i], map); + // parse the files in parallel - we use a grain size of 1 + // as each file can be pretty big, and there won't be many of them + tbb::parallel_for( + tbb::blocked_range(0, filenames.count(), 1), + [&](tbb::blocked_range r) + { + for (int i = r.begin(); i < r.end(); ++i) + { + parsers[i] = MoleculeParser::_pvt_parse(filenames[i], map); + } + }, + tbb::simple_partitioner()); } + else + { + for (int i = 0; i < filenames.count(); ++i) + { + parsers[i] = MoleculeParser::_pvt_parse(filenames[i], map); + } + } + + result = parsers.toList(); } - result = parsers.toList(); + getFileCache()->clear(); + } + catch (...) + { + getFileCache()->clear(); + throw; } - - getFileCache()->clear(); return result; } diff --git a/corelib/src/libs/SireIO/pdbx.cpp b/corelib/src/libs/SireIO/pdbx.cpp new file mode 100644 index 000000000..5994d377d --- /dev/null +++ b/corelib/src/libs/SireIO/pdbx.cpp @@ -0,0 +1,379 @@ +/********************************************\ + * + * Sire - Molecular Simulation Framework + * + * Copyright (C) 2023 Christopher Woods + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * For full details of the license please see the COPYING file + * that should have come with this distribution. + * + * You can contact the authors via the website + * at https://sire.openbiosim.org + * +\*********************************************/ + +#include "SireIO/pdbx.h" + +#include "SireSystem/system.h" + +#include "SireBase/parallel.h" +#include "SireBase/stringproperty.h" + +#include "SireError/errors.h" +#include "SireIO/errors.h" + +#include "SireMol/atomcharges.h" +#include "SireMol/atomcoords.h" +#include "SireMol/atomelements.h" +#include "SireMol/core.h" +#include "SireMol/molecule.h" +#include "SireMol/moleditor.h" +#include "SireMol/trajectory.h" + +#include "SireStream/datastream.h" +#include "SireStream/shareddatastream.h" + +#include "SireUnits/units.h" + +#include + +using namespace SireBase; +using namespace SireIO; +using namespace SireMol; +using namespace SireSystem; +using namespace SireStream; + +const RegisterParser register_pdbx; + +static const RegisterMetaType r_pdbx; + +QDataStream &operator<<(QDataStream &ds, const PDBx &pdbx) +{ + writeHeader(ds, r_pdbx, 1); + + SharedDataStream sds(ds); + + sds << pdbx.parsed_system << static_cast(pdbx); + + return ds; +} + +QDataStream &operator>>(QDataStream &ds, PDBx &pdbx) +{ + VersionID v = readHeader(ds, r_pdbx); + + if (v == 1) + { + SharedDataStream sds(ds); + sds >> pdbx.parsed_system >> static_cast(pdbx); + } + else + throw version_error(v, "1", r_pdbx, CODELOC); + + return ds; +} + +static PDBxReaderFunction reader_function; +static PDBxWriterFunction writer_function; + +void SireIO::register_pdbx_loader_functions(const PDBxWriterFunction &writer, + const PDBxReaderFunction &reader) +{ + writer_function = writer; + reader_function = reader; +} + +/** Constructor */ +PDBx::PDBx() : ConcreteProperty() +{ +} + +/** Construct to read in the data from the file called 'filename'. The + passed property map can be used to pass extra parameters to control + the parsing */ +PDBx::PDBx(const QString &filename, const PropertyMap &map) : ConcreteProperty(filename, map) +{ + // the file has been read into memory and is available via + // the MoleculeParser::lines() function. + + // a parameter has also been read in MoleculeParser to say whether + // we are allowed to use multiple cores to parse the file, e.g. + // MoleculeParser::usesParallel() will be true + + // parse the data in the parse function + this->parseLines(map); + + // now make sure that everything is correct with this object + this->assertSane(); +} + +/** Construct to read in the data from the passed text lines. The + passed property map can be used to pass extra parameters to control + the parsing */ +PDBx::PDBx(const QStringList &lines, const PropertyMap &map) : ConcreteProperty(lines, map) +{ + // the file has been read into memory and is available via + // the MoleculeParser::lines() function. + + // a parameter has also been read in MoleculeParser to say whether + // we are allowed to use multiple cores to parse the file, e.g. + // MoleculeParser::usesParallel() will be true + + // parse the data in the parse function + this->parseLines(map); + + // now make sure that everything is correct with this object + this->assertSane(); +} + +/** Construct this parser by extracting all necessary information from the + passed SireSystem::System, looking for the properties that are specified + in the passed property map */ +PDBx::PDBx(const SireSystem::System &system, const PropertyMap &map) : ConcreteProperty(system, map) +{ + if (writer_function.empty()) + throw SireError::unsupported( + "No PDBx writer function has been registered. You need to " + "install a library to write PDBx/mmCIF files, e.g. gemmi. " + "Do this by running 'mamba install -c conda-forge gemmi' " + "and then re-running this script.", + CODELOC); + + MoleculeParser::setLines(writer_function(system, map).toVector()); + parsed_system = system; +} + +/** Copy constructor */ +PDBx::PDBx(const PDBx &other) + : ConcreteProperty(other), + parsed_system(other.parsed_system), parse_warnings(other.parse_warnings) +{ +} + +/** Destructor */ +PDBx::~PDBx() +{ +} + +/** Copy assignment operator */ +PDBx &PDBx::operator=(const PDBx &other) +{ + if (this != &other) + { + parsed_system = other.parsed_system; + parse_warnings = other.parse_warnings; + + MoleculeParser::operator=(other); + } + + return *this; +} + +/** Comparison operator */ +bool PDBx::operator==(const PDBx &other) const +{ + return parsed_system == other.parsed_system and + MoleculeParser::operator==(other); +} + +/** Comparison operator */ +bool PDBx::operator!=(const PDBx &other) const +{ + return not operator==(other); +} + +/** Return the C++ name for this class */ +const char *PDBx::typeName() +{ + return QMetaType::typeName(qMetaTypeId()); +} + +/** Return the C++ name for this class */ +const char *PDBx::what() const +{ + return PDBx::typeName(); +} + +bool PDBx::isTopology() const +{ + return true; +} + +bool PDBx::isFrame() const +{ + return true; +} + +int PDBx::nFrames() const +{ + return 1; +} + +Frame PDBx::getFrame(int i) const +{ + i = SireID::Index(i).map(this->nFrames()); + + throw SireError::unsupported(QObject::tr( + "You cannot extra frame data from a PDBx/mmCIF file."), + CODELOC); + + return Frame(); +} + +/** Return the parser that has been constructed by reading in the passed + file using the passed properties */ +MoleculeParserPtr PDBx::construct(const QString &filename, const PropertyMap &map) const +{ + return PDBx(filename, map); +} + +/** Return the parser that has been constructed by reading in the passed + text lines using the passed properties */ +MoleculeParserPtr PDBx::construct(const QStringList &lines, const PropertyMap &map) const +{ + return PDBx(lines, map); +} + +/** Return the parser that has been constructed by extract all necessary + data from the passed SireSystem::System using the specified properties */ +MoleculeParserPtr PDBx::construct(const SireSystem::System &system, const PropertyMap &map) const +{ + return PDBx(system, map); +} + +/** Return a string representation of this parser */ +QString PDBx::toString() const +{ + if (lines().isEmpty()) + return QObject::tr("PDBx::null"); + else + { + return QObject::tr("PDBx( nAtoms() = %1 )") + .arg(nAtoms()); + } +} + +/** Convert the the parsed data to a collection of PDBx record lines. */ +QVector PDBx::toLines() const +{ + if (writer_function.empty()) + throw SireError::unsupported( + "No PDBx writer function has been registered. You need to " + "install a library to write PDBx/mmCIF files, e.g. gemmi. " + "Do this by running 'mamba install -c conda-forge gemmi' " + "and then re-running this script.", + CODELOC); + + return writer_function(this->parsed_system, this->propertyMap()).toVector(); +} + +/** Return the format name that is used to identify this file format within Sire */ +QString PDBx::formatName() const +{ + return "PDBx"; +} + +/** Return a description of the file format */ +QString PDBx::formatDescription() const +{ + return QObject::tr("Protein Data Bank PDBx/mmCIF format files."); +} + +/** Return the suffixes that these files are normally associated with */ +QStringList PDBx::formatSuffix() const +{ + static const QStringList suffixes = {"pdbx", "cif"}; + return suffixes; +} + +/** Return the total number of atoms in all molecules. */ +int PDBx::nAtoms() const +{ + return 0; +} + +/** Function that is called to assert that this object is sane. This + should raise an exception if the parser is in an invalid state */ +void PDBx::assertSane() const +{ + // check state, raise SireError::program_bug if we are in an invalid state +} + +/** Internal function that is used to actually parse the data contained + in the lines of the file */ +void PDBx::parseLines(const PropertyMap &map) +{ + if (reader_function.empty()) + throw SireError::unsupported( + "No PDBx reader function has been registered. You need to " + "install a library to read PDBx/mmCIF files, e.g. gemmi. " + "Do this by running 'mamba install -c conda-forge gemmi' " + "and then re-running this script.", + CODELOC); + + // only parse if the file contains a line with + // '_audit_conform.dict_name mmcif_pdbx.dic' + + bool is_conformant = false; + + for (const auto &line : lines()) + { + if (line.contains("_audit_conform.dict_name") and line.contains("mmcif_pdbx.dic")) + { + is_conformant = true; + break; + } + else if (line.contains("_atom_site.")) + { + is_conformant = true; + break; + } + } + + if (not is_conformant) + throw SireError::unsupported( + QObject::tr("The file does not appear to be a PDBx/mmCIF file. " + "It does not contain the line '_audit_conform.dict_name mmcif_pdbx.dic'"), + CODELOC); + + parsed_system = reader_function(lines().toList(), map); + + parsed_system.setProperty(map["fileformat"].source(), StringProperty(this->formatName())); + + this->setScore(parsed_system.nAtoms()); +} + +/** Use the data contained in this parser to create a new System of molecules, + assigning properties based on the mapping in 'map' */ +System PDBx::startSystem(const PropertyMap &map) const +{ + return parsed_system; +} + +/** Use the data contained in this parser to add information from the file to + the molecules that exist already in the passed System. For example, this + may be used to add coordinate data from this file to the molecules in + the passed System that are missing coordinate data. */ +void PDBx::addToSystem(System &system, const PropertyMap &map) const +{ + // you should loop through each molecule in the system and work out + // which ones are described in the file, and then add data from the file + // to those molecules. + throw SireError::unsupported(QObject::tr( + "You cannot add data from a PDBx/mmCIF file to an existing system."), + CODELOC); +} diff --git a/corelib/src/libs/SireIO/pdbx.h b/corelib/src/libs/SireIO/pdbx.h new file mode 100644 index 000000000..952938240 --- /dev/null +++ b/corelib/src/libs/SireIO/pdbx.h @@ -0,0 +1,138 @@ +/********************************************\ + * + * Sire - Molecular Simulation Framework + * + * Copyright (C) 2023 Christopher Woods + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * For full details of the license please see the COPYING file + * that should have come with this distribution. + * + * You can contact the authors via the website + * at https://sire.openbiosim.org + * +\*********************************************/ + +#ifndef SIREIO_PDBX_H +#define SIREIO_PDBX_H + +#include "moleculeparser.h" + +#include "SireMaths/vector.h" + +#include "SireSystem/system.h" + +#include + +SIRE_BEGIN_HEADER + +namespace SireIO +{ + class PDBx; +} // namespace SireIO + +namespace SireMol +{ + class Atom; + class MolEditor; + class MoleculeInfoData; + class MoleculeView; + class Residue; +} // namespace SireMol + +SIREIO_EXPORT QDataStream &operator<<(QDataStream &, const SireIO::PDBx &); +SIREIO_EXPORT QDataStream &operator>>(QDataStream &, SireIO::PDBx &); + +namespace SireIO +{ + /** This class holds a parser for reading and writing PDBx/mmcif files */ + class SIREIO_EXPORT PDBx : public SireBase::ConcreteProperty + { + + friend SIREIO_EXPORT QDataStream & ::operator<<(QDataStream &, const PDBx &); + friend SIREIO_EXPORT QDataStream & ::operator>>(QDataStream &, PDBx &); + + public: + PDBx(); + PDBx(const QString &filename, const PropertyMap &map = PropertyMap()); + + PDBx(const QStringList &lines, const PropertyMap &map = PropertyMap()); + PDBx(const SireSystem::System &system, const PropertyMap &map = PropertyMap()); + + PDBx(const PDBx &other); + + ~PDBx(); + + PDBx &operator=(const PDBx &other); + + bool operator==(const PDBx &other) const; + bool operator!=(const PDBx &other) const; + + static const char *typeName(); + + const char *what() const; + + MoleculeParserPtr construct(const QString &filename, const PropertyMap &map) const; + + MoleculeParserPtr construct(const QStringList &lines, const PropertyMap &map) const; + + MoleculeParserPtr construct(const SireSystem::System &system, const PropertyMap &map) const; + + QString toString() const; + QVector toLines() const; + + QString formatName() const; + QString formatDescription() const; + QStringList formatSuffix() const; + + bool isTopology() const; + bool isFrame() const; + + int nAtoms() const; + + int nFrames() const; + SireMol::Frame getFrame(int i) const; + + protected: + SireSystem::System startSystem(const PropertyMap &map) const; + void addToSystem(SireSystem::System &system, const PropertyMap &map) const; + + private: + void assertSane() const; + void parseLines(const PropertyMap &map); + + /** The system that has been read (or to be written) */ + SireSystem::System parsed_system; + + /** Any warnings that were raised when reading the file. */ + QStringList parse_warnings; + }; + + typedef boost::function PDBxWriterFunction; + typedef boost::function PDBxReaderFunction; + + SIREIO_EXPORT void register_pdbx_loader_functions(const PDBxWriterFunction &writer, + const PDBxReaderFunction &reader); + +} // namespace SireIO + +Q_DECLARE_METATYPE(SireIO::PDBx) + +SIRE_EXPOSE_CLASS(SireIO::PDBx) + +SIRE_END_HEADER + +#endif diff --git a/corelib/src/libs/SireMol/moleculeinfodata.cpp b/corelib/src/libs/SireMol/moleculeinfodata.cpp index 4b935cd36..32db83780 100644 --- a/corelib/src/libs/SireMol/moleculeinfodata.cpp +++ b/corelib/src/libs/SireMol/moleculeinfodata.cpp @@ -940,10 +940,12 @@ MoleculeInfoData::MoleculeInfoData(const StructureEditor &editor) : RefCountData // ALL chains must contain at least one residue if (not empty_chains.isEmpty()) + { throw SireMol::missing_residue(QObject::tr("ALL chains must contain at least one residue. " "Chains missing residues are %1.") .arg(Sire::toString(empty_chains)), CODELOC); + } /// Finally convert the segments /// diff --git a/corelib/src/libs/SireSystem/forcefieldinfo.cpp b/corelib/src/libs/SireSystem/forcefieldinfo.cpp index de452c0d3..7d6bb5c5a 100644 --- a/corelib/src/libs/SireSystem/forcefieldinfo.cpp +++ b/corelib/src/libs/SireSystem/forcefieldinfo.cpp @@ -186,25 +186,40 @@ ForceFieldInfo::ForceFieldInfo(const System &system, const auto cutoff_prop = map["cutoff"]; + bool requested_no_cutoff = false; + if (cutoff_prop.hasValue()) { this->setCutoff(cutoff_prop.value().asA().toUnit()); } - else if (cutoff_prop.source() != "cutoff") + else { - throw SireError::invalid_arg(QObject::tr( - "The cutoff property should have a value. It cannot be the string " - "'%1'. If you want to specify the cutoff type, using " - "the 'cutoff_type' property.") - .arg(cutoff_prop.source()), - CODELOC); - } + auto s = cutoff_prop.source().toLower().simplified(); - const auto cutoff_type = map["cutoff_type"]; + if (s.startsWith("infinit") or s == "none") + { + requested_no_cutoff = true; + this->setNoCutoff(); + } + else if (s != "cutoff") + { + throw SireError::invalid_arg(QObject::tr( + "The cutoff property should have a value. It cannot be the string " + "'%1'. If you want to specify the cutoff type, using " + "the 'cutoff_type' property.") + .arg(cutoff_prop.source()), + CODELOC); + } + } - if (cutoff_type.hasSource() and cutoff_type.source() != "cutoff_type") + if (not requested_no_cutoff) { - this->setCutoffType(cutoff_type.source(), map); + const auto cutoff_type = map["cutoff_type"]; + + if (cutoff_type.hasSource() and cutoff_type.source() != "cutoff_type") + { + this->setCutoffType(cutoff_type.source(), map); + } } const auto ff_prop = map["forcefield"]; @@ -459,6 +474,14 @@ void ForceFieldInfo::setCutoffType(QString cutoff_type) void ForceFieldInfo::setCutoffType(QString s_cutoff_type, const PropertyMap &map) { + if (s_cutoff_type == "auto") + { + if (this->space().isPeriodic()) + s_cutoff_type = "PME"; + else + s_cutoff_type = "RF"; + } + auto cutoff_type = string_to_cutoff_type(s_cutoff_type); if (cutoff_type == NO_CUTOFF) diff --git a/doc/source/acknowledgements.rst b/doc/source/acknowledgements.rst index 785750b24..d30b33f6e 100644 --- a/doc/source/acknowledgements.rst +++ b/doc/source/acknowledgements.rst @@ -156,8 +156,10 @@ OpenMM ------ :mod:`sire` links to `OpenMM `__ to perform accelerated -dynamics (e.g. as part of the ``somd`` program). This is licensed -under either the MIT or LGPL licenses, so compatible with the GPL. +dynamics (e.g. as part of the ``somd`` program), and also via an +interconversion layer to switch between sire and OpenMM system representations. +This is licensed under either the MIT or LGPL licenses, +so compatible with the GPL. Regress ------- @@ -400,6 +402,44 @@ and `here `__ library +for reading and writing PDBx/mmCIF files. This is used under the terms of +the LGPL3 license. Thanks to this project for providing such a useful +header-only C++ library that is easy to bring incorporate into other +programs, and that is so well documented and feature-rich. This is provided +via an integration that supports interconversion between sire and gemmi objects. + +PEGTL +----- + +`gemmi `__ uses the +`PEGTL `__ library +for parsing mmCIF files. This is used under the terms of the MIT +or Boost Software License (the version bundled with gemmi is MIT, +while the version on GitHub is BSL) + +pyboost11 +--------- + +Thanks to Yung-Yu's excellent +`blog post `__ +on how to interconvert between boost::python and pybind11, and for providing +`pyboost11.hpp `__ +as a header-only library to support interconversion. This was really useful +to let me mix pybind11-wrapped gemmi objects with boost::python-wrapped sire. +The code is used under the terms of the liberal open source license +given at the top of the header file. + +RDKit +----- + +:mod:`sire` provides an integration with the `RDKit `__ +package, supporting interconversion between the sire and RDKit +molecular representations. This is used under the terms of the BSD3 license. + Python Dependencies =================== diff --git a/doc/source/blogposts.rst b/doc/source/blogposts.rst index ab856d8c8..73a24ebf6 100644 --- a/doc/source/blogposts.rst +++ b/doc/source/blogposts.rst @@ -38,7 +38,7 @@ enables :mod:`sire` to perform GPU-accelerated molecular dynamics simulations. GPU-accelerated free energies ----------------------------- -* `GPU-accelerated free energies `__ +* `GPU-accelerated free energies `__ In this fourth post, written around the 2023.4 release, we show how the OpenMM integration has been extended to diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst index ee0216c04..3b909ece3 100644 --- a/doc/source/changelog.rst +++ b/doc/source/changelog.rst @@ -12,8 +12,50 @@ Development was migrated into the `OpenBioSim `__ organisation on `GitHub `__. -`2023.4.2 `__ ------------------------------------------------------------------------------- +`2023.5.0 `__ - December 2023 +--------------------------------------------------------------------------------------------- + +* Added a new :mod:`sire.options` module that contains new + :class:`sire.options.Option` objects to represent configurable options. + These include documentation, and make it easier to validate and expose + possible values of configurable options. The API docs for + :class:`~sire.options.Option` shows how to create your own Option type. + The unit test in ``tests/options/test_options.py`` show how to use + the options. This is integrated into the sire/OpenMM layer. + +* Extended the ``.atom(s)``, ``.residue(s)``, ``.bond(s)`` and all other + indexing functions so that you can pass in an existing view or views as + the key. This lets you look up views in a container by other views, e.g. + ``mols.bond(mols.atoms()[0], mols.atoms()[1])`` would return the bond + between the first two atoms in the container ``mols``. Also added + a ``error_on_missing`` flag to the ``atoms``, ``residues``, ``bonds`` etc + functions, so that you get a ``KeyError`` exception if there is no match, + and ``error_on_missing`` is ``True``. For example, + ``mols.atoms("element C", error_on_missing=True)`` would raise an exception + if there are no carbon atoms in this container. This is default ``False`` + to keep existing behaviour, but we would recommend setting this to ``True`` + and would like to change the default in the future. + +* Added :func:`sire.convert` support for converting between :mod:`sire` + objects and `Gemmi `__ objects. This + has allowed us to support reading and writing of PDBx/mmCIF files. + We've updated :func:`sire.load` to automatically choose PDBs/mmCIF + files if gemmi-support is available. We've also added support for the + new-style PDB codes (e.g. "pdb_00003nss" instead of "3NSS"). Note that + this needs a custom Gemmi package build, where "shared libraries" are + turned on. This should be available from conda-forge in 2024, but for now, + you will need to clone the `Gemmi feedstock `__ + and build the conda package yourself. You will then need to recompile + sire from source. We will release 2023.5.1 as a conda package once + the conda-forge Gemmi package with shared library support is available. + +* Optimised the ``LambdaLever`` class so that it caches the forcefield parameters + calculated at different lambda values. This means that we don't have to + re-calculate the parameters at each lambda update step. This is a + significant speed-up for alchemical free energy simulations. + +`2023.4.2 `__ - December 2023 +--------------------------------------------------------------------------------------------- * Fixed use of ``QString::compare`` when comparing molecular properties during a water topology swap. @@ -50,7 +92,7 @@ organisation on `GitHub `__. to make it easier to unset mapped properties. `2023.4.1 `__ - October 2023 ---------------------------------------------------------------------------------------------- +-------------------------------------------------------------------------------------------- * Fixed regression introduced in 2023.4.0 that meant that removed the constraints from water molecules that had no internal bonds. These waters would blow up @@ -139,7 +181,7 @@ organisation on `GitHub `__. * Added support for alchemical restraints to the OpenMM dynamics layer. This lets you scale restraints as part of a λ-coordinate. This is - documented in the :doc:`tutorial `. Restraints can be named, meaning that you can scale different restraints at different stages and by different values across the λ-coordinate. @@ -442,7 +484,7 @@ organisation on `GitHub `__. * Added support for performing minimisation and molecular dynamics simulations based on the OpenMM integration. This is documented in full via both - :doc:`a tutorial ` and a + :doc:`a tutorial ` and a :doc:`detailed guide `. * Fixed the Amber PRMTOP `dihedral ring bug `__. diff --git a/doc/source/cheatsheet/openmm.rst b/doc/source/cheatsheet/openmm.rst index 40cb191ec..8443cf825 100644 --- a/doc/source/cheatsheet/openmm.rst +++ b/doc/source/cheatsheet/openmm.rst @@ -161,6 +161,9 @@ Available keys and allowable values are listed below. | timestep | Time between integration steps, e.g. | | | ``2 * sr.units.femtosecond`` | +------------------------------+----------------------------------------------------------+ +| tolerance | The tolerance to use for the PME calculation, e.g. | +| | ``0.0001`` | ++------------------------------+----------------------------------------------------------+ | use_dispersion_correction | Whether or not to use the dispersion correction to | | | deal with cutoff issues. This is very expensive. | +------------------------------+----------------------------------------------------------+ diff --git a/doc/source/tutorial/index_part02.rst b/doc/source/tutorial/index_part02.rst index 4ae88143a..06005f51d 100644 --- a/doc/source/tutorial/index_part02.rst +++ b/doc/source/tutorial/index_part02.rst @@ -19,13 +19,11 @@ First, let's load up an example protein, >>> import sire as sr >>> mols = sr.load("7SA1") -Downloading from 'https://files.rcsb.org/download/7SA1.pdb.gz'... -7SA1.pdb.gz -Unzipping './7SA1.pdb.gz'... ->>> mol = mols[0] ->>> print(mol) -Molecule( 7SA1:2 num_atoms=11728 num_residues=1518 ) ->>> mol.view() +Downloading from 'https://files.rcsb.org/download/7SA1.cif.gz'... +Unzipping './7SA1.cif.gz'... +>>> print(mols) +System( name=7SA1 num_molecules=26 num_residues=1518 num_atoms=11728 ) +>>> mols.view() .. image:: part02/images/7SA1.jpg :alt: Picture of 7SA1 viewed in NGLView @@ -35,17 +33,24 @@ Molecule( 7SA1:2 num_atoms=11728 num_residues=1518 ) :mod:`sire` automatically downloads and unpacks structures from the PDB. Just put in the PDB code as the argument to :func:`sire.load`. +.. note:: + + A new-format PDBx/mmCIF file will be downloaded if you have + `gemmi <>`__ installed. Otherwise, a legacy-format PDB file will be downloaded. + Molecules are constructed as atoms, which be can be (optionally) arranged into residues, chains and segments. We can get the number of each using ->>> print(f"The number of atoms is {mol.num_atoms()}") +>>> print(f"The number of atoms is {mols.num_atoms()}") The number of atoms is 11728 ->>> print(f"The number of residues is {mol.num_residues()}") +>>> print(f"The number of residues is {mols.num_residues()}") The number of residues is 1518 ->>> print(f"The number of chains is {mol.num_chains()}") +>>> print(f"The number of chains is {mols.num_chains()}") The number of chains is 4 ->>> print(f"The number of segments is {mol.num_segments()}") +>>> print(f"The number of segments is {mols.num_segments()}") The number of segments is 0 +>>> print(f"The number of molecules is {mols.num_molecules()}") +26 .. toctree:: :maxdepth: 1 diff --git a/doc/source/tutorial/part01/02_loading_a_molecule.rst b/doc/source/tutorial/part01/02_loading_a_molecule.rst index 14f6659c4..f59e2e5e6 100644 --- a/doc/source/tutorial/part01/02_loading_a_molecule.rst +++ b/doc/source/tutorial/part01/02_loading_a_molecule.rst @@ -15,10 +15,17 @@ PDB code as the argument to :func:`sire.load`. We will load structure >>> mols = sr.load("3NSS") >>> print(mols) -Downloading from 'https://files.rcsb.org/download/3NSS.pdb.gz'... -Unzipping './3NSS.pdb.gz'... +Downloading from 'https://files.rcsb.org/download/3NSS.cif.gz'... +Unzipping './3NSS.cif.gz'... System( name=3NSS num_molecules=1 num_residues=1679 num_atoms=6984 ) +.. note:: + + This will download a PDBx/mmCIF file from the RCSB website if + `gemmi `__ is installed. Otherwise, + it will download an (older format) PDB file. You can use either + traditional ("3NSS") or new-style ("pdb_00003nss") PDB codes. + If you are running in a Jupyter Notebook (or similar) you can view the molecule by calling the :func:`~sire.mol.SelectorMol.view` function, e.g. diff --git a/doc/source/tutorial/part01/07_supported_file_formats.rst b/doc/source/tutorial/part01/07_supported_file_formats.rst index cb23b5335..7ee3e1b22 100644 --- a/doc/source/tutorial/part01/07_supported_file_formats.rst +++ b/doc/source/tutorial/part01/07_supported_file_formats.rst @@ -9,67 +9,72 @@ You can print the list of supported formats using >>> print(sr.supported_formats()) - ## Parser DCD ## + ## Parser dcd ## Supports files: DCD DCD coordinate/velocity binary trajectory files based on charmm / namd / x-plor format. ################ - ## Parser Gro87 ## - Supports files: gro + ## Parser gro87 ## + Supports files: gro, g87 Gromacs Gro87 structure format files. ################## - ## Parser GroTop ## - Supports files: top + ## Parser grotop ## + Supports files: top, grotop, gtop Gromacs Topology format files. ################### - ## Parser MOL2 ## + ## Parser mol2 ## Supports files: mol2 Sybyl Mol2 format files. ################# - ## Parser PDB ## + ## Parser pdb ## Supports files: pdb Protein Data Bank (PDB) format files. ################ - ## Parser PRM7 ## + ## Parser pdbx ## + Supports files: pdbx, cif + Protein Data Bank PDBx/mmCIF format files. + ################# + + ## Parser prm7 ## Supports files: prm7, prm, top7, top, prmtop7, prmtop Amber topology/parameter format files supported from Amber 7 upwards. ################# - ## Parser PSF ## + ## Parser psf ## Supports files: psf Charmm PSF format files. ################ - ## Parser RST ## + ## Parser rst ## Supports files: rst, crd, trj, traj Amber coordinate/velocity binary (netcdf) restart/trajectory files supported since Amber 9, now default since Amber 16. ################ - ## Parser RST7 ## + ## Parser rst7 ## Supports files: rst7, rst, crd7, crd Amber coordinate/velocity text (ascii) restart files supported from Amber 7 upwards. ################# - ## Parser SDF ## + ## Parser sdf ## Supports files: sdf, mol Structure Data File (SDF) format files. ################ - ## Parser TRAJ ## + ## Parser traj ## Supports files: traj, trj, crd Amber trajectory (ascii) coordinate or velocity files supported from Amber 7 upwards. ################# - ## Parser TRR ## + ## Parser trr ## Supports files: trr Gromacs TRR (XDR file) coordinate / velocity / force trajectory file ################ - ## Parser XTC ## + ## Parser xtc ## Supports files: xtc Gromacs XTC (XDR file) compressed coordinate trajectory file ################ @@ -82,5 +87,5 @@ is symmetrical. This means that :mod:`sire` can read in and write out the same amount of information from a file (i.e. it can always read what it writes). Another design principle is that information should not be lost. As much -as possible, :mod:`sire` will load and preserve all information it can -read from a molecular file. +as possible, :mod:`sire` will load and preserve all +molecular-level information it can read from a molecular file. diff --git a/doc/source/tutorial/part02/images/7SA1.jpg b/doc/source/tutorial/part02/images/7SA1.jpg index 54703c3c9..47fb3d44b 100644 Binary files a/doc/source/tutorial/part02/images/7SA1.jpg and b/doc/source/tutorial/part02/images/7SA1.jpg differ diff --git a/doc/source/tutorial/part05/01_convert.rst b/doc/source/tutorial/part05/01_convert.rst index 0d463699c..7565f8065 100644 --- a/doc/source/tutorial/part05/01_convert.rst +++ b/doc/source/tutorial/part05/01_convert.rst @@ -134,7 +134,7 @@ environment as :mod:`sire`, and whether or not :mod:`sire` was compiled with support for that package. >>> print(sr.convert.supported_formats()) -['biosimspace', 'openmm', 'rdkit', 'sire'] +['biosimspace', 'gemmi', 'openmm', 'rdkit', 'sire'] .. note:: @@ -219,7 +219,7 @@ environment as :mod:`sire`, and whether or not :mod:`sire` was compiled with support for that package. >>> print(sr.convert.supported_formats()) -['biosimspace', 'openmm', 'rdkit', 'sire'] +['biosimspace', 'gemmi', 'openmm', 'rdkit', 'sire'] .. note:: @@ -273,6 +273,131 @@ extracted from the Context. Please do get in touch with us if you would like to learn about these functions, and would like to contribute to coding a more complete OpenMM to sire converter. +Gemmi +----- + +`Gemmi `__ is a Python library +developed primarily for use in macromolecular crystallography (MX). +In particular it can be used to parse PDBx/mmCIF files, refinement +restraints, reflection data, 3D grid data and dealing with +crystallographic symmetry. This is useful for structural bioinformatics. + +The :func:`sire.convert.supported_formats` function lists the formats that +:mod:`sire.convert` supports for the current installation. This will +depend on whether or not you have the package installed in the same conda +environment as :mod:`sire`, and whether or not :mod:`sire` was compiled +with support for that package. + +>>> print(sr.convert.supported_formats()) +['biosimspace', 'gemmi', 'openmm', 'rdkit', 'sire'] + +.. note:: + + If ``gemmi`` isn't listed, then you should quit Python and install + it, e.g. using the command ``conda install -c conda-forge gemmi``. + If it still isn't listed then please raise an issue on the + `sire GitHub repository `__. + +:func:`sire.convert.to` can convert a :class:`~sire.system.System`, list +of molecules, or single molecule into a +`Gemmi Structure `__ +object. + +>>> mols = sr.load(sr.expand(sr.tutorial_url, "ala.crd", "ala.top")) +>>> gemmi_struct = sr.convert.to(mols, "gemmi") +>>> print(gemmi_struct) + +>>> print(gemmi_struct[0].get_all_residue_names()) +['ACE', 'ALA', 'NME', 'WAT'] + +Passing in a single molecule or subset of molecules will return a +`Gemmi Structure `__ +with just those molecules, e.g. + +>>> gemmi_struct = sr.convert.to(mols[0], "gemmi") +>>> print(gemmi_struct) + +>>> print(gemmi_struct[0].get_all_residue_names()) +['ACE', 'ALA', 'NME'] + +You can convert a +`Gemmi Structure `__ +back to a :class:`~sire.system.System` object, e.g. + +>>> mols = sr.convert.to(gemmi_struct, "sire") +>>> print(mols) +System( name= num_molecules=1 num_residues=3 num_atoms=22 ) + +This conversion also preserves user-supplied System metadata, e.g. + +>>> mols.set_metadata("name", "alanine dipeptide") +>>> mols.set_metadata("residues", ["ACE", "ALA", "NME"]) +>>> mols.set_metadata("atoms", {"element": ["C", "N", "O"], +... "x_coords": [0.0, 1.0, 2.0], +... "y_coords": [3.0, 4.0, 5.0], +... "z_coords": [6.0, 7.0, 8.0]}) +>>> sr.save(mols, "test.pdbx") + +would add the following to the PDBx/mmCIF file: + +.. code-block:: none + + data_sire + loop_ + _atoms.element + _atoms.x_coords + _atoms.y_coords + _atoms.z_coords + C 0 3 6 + N 1 4 7 + O 2 5 8 + + _name "alanine dipeptide" + + loop_ + _residues.value + ACE + ALA + NME + +which could be recovered when loading the file... + +>>> mols = sr.load("test.pdbx") +>>> print(mols.metadata()) +Properties( + residues => [ ACE,ALA,NME ], + name => alanine dipeptide, + atoms => Properties( + element => SireBase::StringArrayProperty( size=3 +0: C +1: N +2: O +), + x_coords => SireBase::StringArrayProperty( size=3 +0: 0 +1: 1 +2: 2 +), + y_coords => SireBase::StringArrayProperty( size=3 +0: 3 +1: 4 +2: 5 +), + z_coords => SireBase::StringArrayProperty( size=3 +0: 6 +1: 7 +2: 8 +) +) +) + +.. note:: + + Note that metadata values loaded from a PDBx/mmCIF file are always + stored as strings. You may need to convert them to the appropriate + type for your application (e.g., here the coordinate values are + the strings "0", "1", "2" etc. rather than the numbers 0, 1, 2 etc.). + Anything to Anything -------------------- @@ -316,6 +441,22 @@ convert it to an OpenMM Context ready for minimisation or dynamics... >>> print(omm.getState().getTime()) 0.010000000000000002 ps +or you could load a PDBx file from Gemmi and convert a "MAN" moelcule within it +into an RDKit structure... + +>>> import gemmi +>>> import sire as sr +>>> import rdkit +>>> from rdkit import Chem +>>> import urllib +>>> urllib.request.urlretrieve("https://files.rcsb.org/download/3NSS.cif.gz", +... filename="3NSS.cif.gz") +>>> struct = gemmi.read_structure("3NSS.cif.gz") +>>> mol = gemmi.Selection("(MAN)").copy_structure_selection(struct) +>>> rdkit_mol = sr.convert.to(mol, "rdkit") +>>> print(Chem.MolToSmiles(rdkit_mol)) +O=C=C1OC(OC2=C(=O)C(=C=O)O[C-]=C2[O-])=C(=O)=C(=O)C1=O + Supporting other formats ------------------------ diff --git a/doc/source/tutorial/part06/05_free_energy_perturbation.rst b/doc/source/tutorial/part06/05_free_energy_perturbation.rst index 92514ea53..643b5ca70 100644 --- a/doc/source/tutorial/part06/05_free_energy_perturbation.rst +++ b/doc/source/tutorial/part06/05_free_energy_perturbation.rst @@ -234,7 +234,7 @@ instead of ``energy_{lambda}.s3``). ... print(f"Simulating lambda={lambda_value:.2f}") ... # minimise the system at this lambda value ... min_mol = mol.minimisation(lambda_value=lambda_value, -... vacuum=True).run().commit() +... vacuum=True).run().commit(return_as_system=True) ... # create a dynamics object for the system ... d = min_mol.dynamics(timestep="1fs", temperature="25oC", ... lambda_value=lambda_value, @@ -267,6 +267,14 @@ instead of ``energy_{lambda}.s3``). and instead set the space to a :class:`~sire.space.Cartesian` space. This has the affect of simulating the molecules in vacuum. +.. note:: + + The option ``return_as_system`` tells the minimisation's ``commit`` + function to return the result as a :class:`~sire.system.System` object, + rather than a molecule. This way, ``min_mol`` is a :class:`~sire.system.System`, + and so you can access the energy trajectory via the + ``energy_trajectory()`` function. + This should run more quickly than the simulation in water, e.g. about 15 seconds per window (at about 150 nanoseconds per day of sampling). diff --git a/doc/source/tutorial/part06/scripts/run_md.py b/doc/source/tutorial/part06/scripts/run_md.py index 63ed1891b..ab26f839b 100644 --- a/doc/source/tutorial/part06/scripts/run_md.py +++ b/doc/source/tutorial/part06/scripts/run_md.py @@ -25,9 +25,15 @@ for i, lambda_value in enumerate(lambda_values): print(f"Simulating lambda={lambda_value:.2f}") # minimise the system at this lambda value - min_mols = mols.minimisation(lambda_value=lambda_value, - constraint=constraint, - perturbable_constraint="none").run().commit() + min_mols = ( + mols.minimisation( + lambda_value=lambda_value, + constraint=constraint, + perturbable_constraint="none", + ) + .run() + .commit() + ) # create a dynamics object for the system d = min_mols.dynamics( @@ -47,9 +53,7 @@ print(d) # get the values of lambda for neighbouring windows - lambda_windows = lambda_values[ - max(i - 1, 0) : min(len(lambda_values), i + 2) - ] + lambda_windows = lambda_values[max(i - 1, 0) : min(len(lambda_values), i + 2)] # run the dynamics, saving the energy every 0.1 ps d.run( @@ -75,11 +79,14 @@ # off the perturbable constraint) min_mols = ( mols[0] - .minimisation(lambda_value=lambda_value, vacuum=True, - constraint=constraint, - perturbable_constraint="none") + .minimisation( + lambda_value=lambda_value, + vacuum=True, + constraint=constraint, + perturbable_constraint="none", + ) .run() - .commit() + .commit(return_as_system=True) ) # create a dynamics object for the system @@ -101,9 +108,7 @@ print(d) # get the values of lambda for neighbouring windows - lambda_windows = lambda_values[ - max(i - 1, 0) : min(len(lambda_values), i + 2) - ] + lambda_windows = lambda_values[max(i - 1, 0) : min(len(lambda_values), i + 2)] # run the dynamics, saving the energy every 0.1 ps d.run( diff --git a/recipes/sire/template.yaml b/recipes/sire/template.yaml index c820a7284..a2f6535b5 100644 --- a/recipes/sire/template.yaml +++ b/recipes/sire/template.yaml @@ -2,7 +2,7 @@ package: name: {{ name }} - version: {{ environ.get('GIT_DESCRIBE_TAG', 'PR') }} + version: {{ environ.get('GIT_DESCRIBE_TAG', 'PR').replace('-','') }} source: git_url: SIRE_REMOTE diff --git a/requirements_bss.txt b/requirements_bss.txt index 52744b173..fde780ff4 100644 --- a/requirements_bss.txt +++ b/requirements_bss.txt @@ -29,7 +29,8 @@ py3dmol pydot pygtail pyyaml -rdkit>=2023.0.0 +rdkit >=2023.0.0 +gemmi >=0.6.3,<0.7.0 # The below are packages that aren't available on all # platforms/OSs and so need to be conditionally included diff --git a/requirements_build.txt b/requirements_build.txt index dde339d83..c550ad6d2 100644 --- a/requirements_build.txt +++ b/requirements_build.txt @@ -13,5 +13,10 @@ sysroot_linux-64==2.17 ; sys_platform == "linux" # These packages are needed to compile # the SireRDKit plugin -rdkit>=2023.0.0 -rdkit-dev>=2023.0.0 +rdkit >=2023.0.0 +rdkit-dev >=2023.0.0 + +# These packages are needed to compile +# the SireGemmi plugin +gemmi >=0.6.3,<0.7.0 +pybind11 diff --git a/requirements_host.txt b/requirements_host.txt index 76f154ec1..3b9c2a2a3 100644 --- a/requirements_host.txt +++ b/requirements_host.txt @@ -12,3 +12,6 @@ qt-main rich tbb tbb-devel +gemmi >=0.6.3,<0.7.0 +rdkit >=2023.0.0 + diff --git a/requirements_test.txt b/requirements_test.txt index adb6147fd..55ed43d38 100644 --- a/requirements_test.txt +++ b/requirements_test.txt @@ -1,4 +1,5 @@ # Test requirements. These aren't needed by Sire, but if installed, will # enable test to run to validate advanced functionality -rdkit>=2023.0.0 +rdkit >=2023.0.0 +gemmi >=0.6.3,<0.7.0 diff --git a/src/sire/CMakeLists.txt b/src/sire/CMakeLists.txt index d56e2c42f..c152f4342 100644 --- a/src/sire/CMakeLists.txt +++ b/src/sire/CMakeLists.txt @@ -103,6 +103,7 @@ add_subdirectory (mm) add_subdirectory (mol) add_subdirectory (morph) add_subdirectory (move) +add_subdirectory (options) add_subdirectory (restraints) add_subdirectory (search) add_subdirectory (stream) diff --git a/src/sire/__init__.py b/src/sire/__init__.py index 391434ce8..02b97fb7a 100644 --- a/src/sire/__init__.py +++ b/src/sire/__init__.py @@ -697,6 +697,7 @@ def _convert(id): mol = _lazy_import.lazy_module("sire.mol") morph = _lazy_import.lazy_module("sire.morph") move = _lazy_import.lazy_module("sire.move") + options = _lazy_import.lazy_module("sire.options") qt = _lazy_import.lazy_module("sire.qt") restraints = _lazy_import.lazy_module("sire.restraints") search = _lazy_import.lazy_module("sire.search") diff --git a/src/sire/_load.py b/src/sire/_load.py index e94f33ad7..23a3a75af 100644 --- a/src/sire/_load.py +++ b/src/sire/_load.py @@ -17,9 +17,7 @@ class _tutorial_url: def __init__(self, value): self._value = value - self.__doc__ = ( - "The base URL for all molecule files used in the tutorial." - ) + self.__doc__ = "The base URL for all molecule files used in the tutorial." def __str__(self): return self._value @@ -81,9 +79,7 @@ def _get_gromacs_dir(): try: import urllib.request - urllib.request.urlretrieve( - f"{tutorial_url}/gromacs.tar.bz2", gromacs_tbz2 - ) + urllib.request.urlretrieve(f"{tutorial_url}/gromacs.tar.bz2", gromacs_tbz2) except Exception: # we cannot download - just give up return None @@ -194,13 +190,10 @@ def _resolve_path(path, directory=".", auto_unzip=True, silent=False): if os.path.isfile(filename): if not silent: print(f"Using cached download of '{path}'...") - return _resolve_path( - filename, directory=directory, silent=silent - ) + return _resolve_path(filename, directory=directory, silent=silent) else: raise IOError( - f"Cannot overwrite {filename} as it is an " - "existing directory!" + f"Cannot overwrite {filename} as it is an " "existing directory!" ) if not silent: @@ -242,12 +235,32 @@ def _resolve_path(path, directory=".", auto_unzip=True, silent=False): if is_code: code = path.lower() - # https://files.rcsb.org/download/4hhb.pdb.gz + + from .convert import supported_formats + + if "gemmi" in supported_formats(): + ext = "cif" + else: + ext = "pdb" + + # https://files.rcsb.org/download/4hhb.ext.gz return _resolve_path( - f"https://files.rcsb.org/download/{path}.pdb.gz", + f"https://files.rcsb.org/download/{path}.{ext}.gz", directory=directory, silent=silent, ) + + elif path.startswith("pdb_") and len(path) == 12: + # this is a new PDB ID code, of format pdb_00003nss + if not path.startswith("pdb_0000"): + raise IOError( + f"Unable to load {path} as we don't yet support new-style " + "PDB codes that cannot be converted into old-style codes." + ) + + pdbid = path[8:].upper() + return _resolve_path(pdbid, directory=directory, silent=silent) + elif path.startswith("alphafold:"): # alphafold code code = path[10:] @@ -661,9 +674,7 @@ def load_test_files(files: _Union[_List[str], str], *args, map=None): cache_dir = os.path.join(d, "cache") files = expand(tutorial_url, files, suffix=".bz2") - return load( - files, directory=cache_dir, silent=True, show_warnings=False, map=map - ) + return load(files, directory=cache_dir, silent=True, show_warnings=False, map=map) def smiles( diff --git a/src/sire/base/__init__.py b/src/sire/base/__init__.py index d60d6f2c7..07cf795e5 100644 --- a/src/sire/base/__init__.py +++ b/src/sire/base/__init__.py @@ -1,10 +1,10 @@ -__all__ = ["create_map", "wrap", "PropertyMap", "ProgressBar"] +__all__ = ["create_map", "wrap", "PropertyMap", "ProgressBar", "Properties"] from ..legacy import Base as _Base from .. import use_new_api as _use_new_api -from ..legacy.Base import PropertyMap +from ..legacy.Base import PropertyMap, Properties from ._progressbar import ProgressBar @@ -122,6 +122,25 @@ def __propertymap_set(obj, key, value): PropertyMap.__orig__set = PropertyMap.set PropertyMap.set = __propertymap_set + def __propertymap_get_string(obj, key: str): + """ + Return the string value associated with the passed 'key' + + This returns 'key' if there is no value associated + """ + key = str(key) + if obj.specified(key): + val = obj[key] + + if val.has_value(): + return val.value().as_string() + else: + return val.source() + else: + return key + + PropertyMap.get_string = __propertymap_get_string + def create_map(values, extras=None): """Construct a PropertyMap from the diff --git a/src/sire/convert/__init__.py b/src/sire/convert/__init__.py index 1a5fbbb93..7b932d64d 100644 --- a/src/sire/convert/__init__.py +++ b/src/sire/convert/__init__.py @@ -1,8 +1,10 @@ __all__ = [ "biosimspace_to_sire", + "gemmi_to_sire", "openmm_to_sire", "rdkit_to_sire", "sire_to_biosimspace", + "sire_to_gemmi", "sire_to_rdkit", "sire_to_openmm", "supported_formats", @@ -23,7 +25,7 @@ def _to_selectormol(obj): if hasattr(obj, "molecules"): return obj.molecules() - elif type(obj) is list: + elif isinstance(obj, list): mols = [] for o in obj: @@ -64,14 +66,15 @@ def to(obj, format: str = "sire", map=None): return to_sire(obj, map=map) elif format == "rdkit": return to_rdkit(obj, map=map) + elif format == "gemmi": + return to_gemmi(obj, map=map) elif format == "biosimspace": return to_biosimspace(obj, map=map) elif format == "openmm": return to_openmm(obj, map=map) else: raise ValueError( - f"Cannot convert {obj} as the format '{format}' is " - "not recognised." + f"Cannot convert {obj} as the format '{format}' is " "not recognised." ) @@ -87,7 +90,7 @@ def to_sire(obj, map=None): # already a sire object? return obj - if type(obj) is not list: + if not isinstance(obj, list): obj = [obj] # create lists of objects of the same type (in the same order) @@ -106,9 +109,11 @@ def _to_type(o): return "rdkit" elif "openmm" in t: return "openmm" + elif "gemmi" in t: + return "gemmi" else: raise TypeError( - f"Cannot convert '{o}' as it is of unrecognised type {type(0)}" + f"Cannot convert '{o}' as it is of unrecognised type {type(o)}" ) # sort all the objects into lists of types (preserving the order) @@ -140,6 +145,9 @@ def _to_type(o): elif typ[0] == "openmm": c = openmm_to_sire(typ[1], map=map) + elif typ[0] == "gemmi": + c = gemmi_to_sire(typ[1], map=map) + else: raise TypeError(f"Unrecognised type {typ[0]}") @@ -181,6 +189,14 @@ def to_rdkit(obj, map=None): return sire_to_rdkit(to_sire(obj, map=map), map=map) +def to_gemmi(obj, map=None): + """ + Convert the passed object from its current object format to a + gemmi object format. + """ + return sire_to_gemmi(to_sire(obj, map=map), map=map) + + def to_openmm(obj, map=None): """ Convert the passed object from its current object format to an @@ -196,7 +212,7 @@ def biosimspace_to_sire(obj, map=None): """ from ..system import System - if type(obj) is list: + if isinstance(obj, list): if len(obj) == 0: return None elif len(obj) == 1: @@ -308,7 +324,7 @@ def openmm_to_sire(obj, map=None): """ Convert the passed OpenMM.System to the sire equivalent """ - if type(obj) is not list: + if not isinstance(obj, list): obj = [obj] try: @@ -380,7 +396,7 @@ def rdkit_to_sire(obj, map=None): Convert the passed rdkit object (either a molecule or list of molecules) to the sire equivalent """ - if type(obj) is not list: + if not isinstance(obj, list): obj = [obj] try: @@ -432,3 +448,115 @@ def sire_to_rdkit(obj, map=None): return mols[0] else: return mols + + +def gemmi_to_sire(obj, map=None): + """ + Convert the passed gemmi Structure to the sire equivalent + """ + try: + from ..legacy.Convert import gemmi_to_sire as _gemmi_to_sire + except Exception: + raise ModuleNotFoundError( + "gemmi is not available. Please install via " + "'mamba install -c conda-forge gemmi'" + ) + + from ..base import create_map + + map = create_map(map) + + if not isinstance(obj, list): + obj = [obj] + + results = [] + + from ..system import System + + for o in obj: + s = System(_gemmi_to_sire(o, map=map)) + + if hasattr(o, "_sire_metadata"): + metadata = o._sire_metadata() + + if metadata is not None: + s.set_property("metadata", metadata) + + results.append(s) + + if len(results) == 1: + return results[0] + else: + return results + + +_gemmi_metadata = {} + + +def _find_sire_gemmi_metadata(obj): + """ + Find the sire metadata associated with the passed gemmi object + """ + global _gemmi_metadata + + if obj in _gemmi_metadata: + return _gemmi_metadata[obj] + else: + return None + + +def sire_to_gemmi(obj, map=None): + """ + Convert the passed sire object to a gemmi structure + """ + try: + from ..legacy.Convert import sire_to_gemmi as _sire_to_gemmi + except Exception: + raise ModuleNotFoundError( + "rdkit is not available. Please install via " + "'mamba install -c conda-forge rdkit'" + ) + + if not isinstance(obj, list): + obj = [obj] + + from ..system import System + from ..base import create_map + + map = create_map(map) + + result = [] + + for o in obj: + if System.is_system(o): + try: + o = o._system + except Exception: + pass + else: + s = System() + s.add(_to_selectormol(o)) + o = s._system + + if o.contains_property("metadata"): + metadata = o.property("metadata") + else: + metadata = None + + g = _sire_to_gemmi(o, map=map) + + if metadata is not None: + # add metadata to the gemmi structure. We have to use + # this convoluted way of doing it as gemmi doesn't + # allow us to add arbitrary metadata to the structure + # and the class is not extensible (can't be monkey patched) + global _gemmi_metadata + _gemmi_metadata[g] = metadata + g.__class__._sire_metadata = _find_sire_gemmi_metadata + + result.append(g) + + if len(result) == 1: + return result[0] + else: + return result diff --git a/src/sire/io/parser/__init__.py b/src/sire/io/parser/__init__.py index 8fd2e6a1c..38d1c7693 100644 --- a/src/sire/io/parser/__init__.py +++ b/src/sire/io/parser/__init__.py @@ -4,6 +4,7 @@ "GTOP", "MOL2", "PDB", + "PDBx", "PRM", "PSF", "RST", @@ -21,6 +22,7 @@ GTOP = _IO.GroTop MOL2 = _IO.Mol2 PDB = _IO.PDB2 +PDBx = _IO.PDBx PRM = _IO.AmberPrm PSF = _IO.CharmmPSF RST = _IO.AmberRst diff --git a/src/sire/mol/__init__.py b/src/sire/mol/__init__.py index b1c781edb..a6420383f 100644 --- a/src/sire/mol/__init__.py +++ b/src/sire/mol/__init__.py @@ -535,6 +535,61 @@ def __fixed__getitem__(obj, key): return obj.dihedrals(key, auto_reduce=True) elif ImproperID in type(key).mro(): return obj.impropers(key, auto_reduce=True) + elif Atom in type(key).mro(): + atoms = obj.atoms(key, auto_reduce=True) + + if __is_selector_class(atoms) and len(atoms) == 1: + return atoms[0] + else: + return atoms + elif Residue in type(key).mro(): + res = obj.residues(key, auto_reduce=True) + + if __is_selector_class(res) and len(res) == 1: + return res[0] + else: + return res + elif Chain in type(key).mro(): + chains = obj.chains(key, auto_reduce=True) + + if __is_selector_class(chains) and len(chains) == 1: + return chains[0] + else: + return chains + elif Segment in type(key).mro(): + segs = obj.segments(key, auto_reduce=True) + + if __is_selector_class(segs) and len(segs) == 1: + return segs[0] + else: + return segs + elif Molecule in type(key).mro(): + mols = obj.molecules(key, auto_reduce=True) + + if __is_selector_class(mols) and len(mols) == 1: + return mols[0] + else: + return mols + elif __is_selector_class(key): + if len(key) == 0: + raise KeyError("Nothing matched the search.") + elif len(key) == 1: + return __fixed__getitem__(obj, key[0]) + + T = type(key[0]) + + if Atom in T.mro(): + return obj.atoms(key, auto_reduce=True) + elif Residue in T.mro(): + return obj.residues(key, auto_reduce=True) + elif Chain in T.mro(): + return obj.chains(key, auto_reduce=True) + elif Segment in T.mro(): + return obj.segments(key, auto_reduce=True) + elif Molecule in T.mro(): + return obj.molecules(key, auto_reduce=True) + else: + raise TypeError(f"You cannot search using an index of type {T}") if __is_selector_class(obj): return obj.__orig__getitem__(key) @@ -544,23 +599,62 @@ def __fixed__getitem__(obj, key): return obj.atoms(key, auto_reduce=True) -def __fixed__atoms__(obj, idx=None, auto_reduce=False, map=None): +def __idx_to_atoms(obj, idx, map): + if hasattr(idx, "atoms"): + atoms = obj.atoms() + return atoms[atoms.find(idx.atoms())] + else: + return obj.atoms(idx, map=map) + + +def __fixed__atoms__( + obj, idx=None, auto_reduce=False, error_on_missing: bool = False, map=None +): from ..base import create_map if idx is None: result = obj.__orig__atoms() elif type(idx) is range: result = obj.__orig__atoms(list(idx), map=create_map(map)) + elif hasattr(idx, "atoms"): + atoms = obj.atoms() + idxs = atoms.find(idx.atoms()) + return atoms.__orig__atoms(idxs, map=create_map(map)) else: result = obj.__orig__atoms(idx, map=create_map(map)) if auto_reduce and len(result) == 1: return result[0] + elif error_on_missing and len(result) == 0: + raise KeyError("There is no matching atom in this view.") else: return result -def __fixed__bonds__(obj, idx=None, idx1=None, auto_reduce=False, map=None): +def __fixed__atom__(obj, idx=None, map=None): + if isinstance(idx, int): + return obj.__orig__atom(idx) + + atoms = __fixed__atoms__(obj, idx, auto_reduce=False, map=map) + + if len(atoms) == 0: + raise KeyError("There is no matching atom in this view.") + elif len(atoms) > 1: + raise KeyError( + f"More than one atom matches. Number of matches is {len(atoms)}." + ) + + return atoms[0] + + +def __fixed__bonds__( + obj, + idx=None, + idx1=None, + auto_reduce=False, + error_on_missing: bool = False, + map=None, +): if idx is None and idx1 is not None: idx = idx1 idx1 = None @@ -568,6 +662,8 @@ def __fixed__bonds__(obj, idx=None, idx1=None, auto_reduce=False, map=None): from . import MoleculeView from ..base import create_map + map = create_map(map) + if issubclass(type(obj), MoleculeView): # this is a single-molecule view from ..mm import SelectorBond @@ -575,7 +671,7 @@ def __fixed__bonds__(obj, idx=None, idx1=None, auto_reduce=False, map=None): C = SelectorBond def _fromBondID(obj, bondid): - return SelectorBond(obj, bondid, map=create_map(map)) + return SelectorBond(obj, bondid, map=map) else: # this is a multi-molecule container @@ -584,31 +680,41 @@ def _fromBondID(obj, bondid): C = SelectorMBond def _fromBondID(obj, bondid): - return SelectorMBond( - obj.to_select_result(), bondid, map=create_map(map) - ) + return SelectorMBond(obj.to_select_result(), bondid, map=map) if idx is None: try: - result = C(obj, map=create_map(map)) + result = C(obj, map=map) except Exception: - result = C(obj.to_select_result(), map=create_map(map)) + result = C(obj.to_select_result(), map=map) elif idx1 is None: if BondID in type(idx).mro(): result = _fromBondID(obj, idx) else: - result = C(obj.atoms(idx, map=create_map(map))) + result = C(__idx_to_atoms(obj, idx, map), map=map) else: - result = C(obj.atoms(idx), obj.atoms(idx1), map=create_map(map)) + result = C( + __idx_to_atoms(obj, idx, map), + __idx_to_atoms(obj, idx1, map), + map=map, + ) if auto_reduce and len(result) == 1: return result[0] + elif error_on_missing and len(result) == 0: + raise KeyError("There is no matching bond in this view.") else: return result def __fixed__angles__( - obj, idx=None, idx1=None, idx2=None, auto_reduce=False, map=None + obj, + idx=None, + idx1=None, + idx2=None, + auto_reduce=False, + error_on_missing: bool = False, + map=None, ): if idx1 is None and idx2 is not None: idx1 = idx2 @@ -621,6 +727,8 @@ def __fixed__angles__( from . import MoleculeView from ..base import create_map + map = create_map(map) + if issubclass(type(obj), MoleculeView): # this is a single-molecule view from ..mm import SelectorAngle @@ -628,7 +736,7 @@ def __fixed__angles__( C = SelectorAngle def _fromAngleID(obj, angid): - return SelectorAngle(obj, angid, map=create_map(map)) + return SelectorAngle(obj, angid, map=map) else: # this is a multi-molecule container @@ -637,38 +745,49 @@ def _fromAngleID(obj, angid): C = SelectorMAngle def _fromAngleID(obj, angid): - return SelectorMAngle( - obj.to_select_result(), angid, map=create_map(map) - ) + return SelectorMAngle(obj.to_select_result(), angid, map=map) if idx is None: try: - result = C(obj, map=create_map(map)) + result = C(obj, map=map) except Exception: - result = C(obj.to_select_result(), map=create_map(map)) + result = C(obj.to_select_result(), map=map) elif idx1 is None: if AngleID in type(idx).mro(): result = _fromAngleID(obj, idx) else: - result = C(obj.atoms(idx, map=create_map(map))) + result = C(__idx_to_atoms(obj, idx, map), map=map) elif idx2 is None: - result = C(obj.atoms(idx), obj.atoms(idx1), map=create_map(map)) + result = C( + __idx_to_atoms(obj, idx, map), + __idx_to_atoms(obj, idx1, map), + map=map, + ) else: result = C( - obj.atoms(idx), - obj.atoms(idx1), - obj.atoms(idx2), - map=create_map(map), + __idx_to_atoms(obj, idx, map), + __idx_to_atoms(obj, idx1, map), + __idx_to_atoms(obj, idx2, map), + map=map, ) if auto_reduce and len(result) == 1: return result[0] + elif error_on_missing and len(result) == 0: + raise KeyError("There is no matching angle in this view.") else: return result def __fixed__dihedrals__( - obj, idx=None, idx1=None, idx2=None, idx3=None, auto_reduce=False, map=None + obj, + idx=None, + idx1=None, + idx2=None, + idx3=None, + auto_reduce=False, + error_on_missing: bool = False, + map=None, ): if idx2 is None and idx3 is not None: idx2 = idx3 @@ -685,6 +804,8 @@ def __fixed__dihedrals__( from . import MoleculeView from ..base import create_map + map = create_map(map) + if issubclass(type(obj), MoleculeView): # this is a single-molecule view from ..mm import SelectorDihedral @@ -692,7 +813,7 @@ def __fixed__dihedrals__( C = SelectorDihedral def _fromDihedralID(obj, dihid): - return SelectorDihedral(obj, dihid, map=create_map(map)) + return SelectorDihedral(obj, dihid, map=map) else: # this is a multi-molecule container @@ -701,46 +822,57 @@ def _fromDihedralID(obj, dihid): C = SelectorMDihedral def _fromDihedralID(obj, dihid): - return SelectorMDihedral( - obj.to_select_result(), dihid, map=create_map(map) - ) + return SelectorMDihedral(obj.to_select_result(), dihid, map=map) if idx is None: try: result = C(obj, map=create_map(map)) except Exception: - result = C(obj.to_select_result(), map=create_map(map)) + result = C(obj.to_select_result(), map=map) elif idx1 is None: if DihedralID in type(idx).mro(): result = _fromDihedralID(obj, idx) else: - result = C(obj.atoms(idx), map=create_map(map)) + result = C(__idx_to_atoms(obj, idx, map), map=map) elif idx2 is None: - result = C(obj.atoms(idx), obj.atoms(idx1), map=create_map(map)) + result = C( + __idx_to_atoms(obj, idx, map), + __idx_to_atoms(obj, idx1, map), + map=map, + ) elif idx3 is None: result = C( - obj.atoms(idx), - obj.atoms(idx1), - obj.atoms(idx2), - map=create_map(map), + __idx_to_atoms(obj, idx, map), + __idx_to_atoms(obj, idx1, map), + __idx_to_atoms(obj, idx2, map), + map=map, ) else: result = C( - obj.atoms(idx), - obj.atoms(idx1), - obj.atoms(idx2), - obj.atoms(idx3), - map=create_map(map), + __idx_to_atoms(obj, idx, map), + __idx_to_atoms(obj, idx1, map), + __idx_to_atoms(obj, idx2, map), + __idx_to_atoms(obj, idx3, map), + map=map, ) if auto_reduce and len(result) == 1: return result[0] + elif error_on_missing and len(result) == 0: + raise KeyError("There is no matching dihedral in this view.") else: return result def __fixed__impropers__( - obj, idx=None, idx1=None, idx2=None, idx3=None, auto_reduce=False, map=None + obj, + idx=None, + idx1=None, + idx2=None, + idx3=None, + auto_reduce=False, + error_on_missing: bool = False, + map=None, ): if idx2 is None and idx3 is not None: idx2 = idx3 @@ -757,6 +889,8 @@ def __fixed__impropers__( from . import MoleculeView from ..base import create_map + map = create_map(map) + if issubclass(type(obj), MoleculeView): # this is a single-molecule view from ..mm import SelectorImproper @@ -764,7 +898,7 @@ def __fixed__impropers__( C = SelectorImproper def _fromImproperID(obj, impid): - return SelectorImproper(obj, impid, map=create_map(map)) + return SelectorImproper(obj, impid, map=map) else: # this is a multi-molecule container @@ -773,40 +907,44 @@ def _fromImproperID(obj, impid): C = SelectorMImproper def _fromImproperID(obj, impid): - return SelectorMImproper( - obj.to_select_result(), impid, map=create_map(map) - ) + return SelectorMImproper(obj.to_select_result(), impid, map=map) if idx is None: try: - result = C(obj, map=create_map(map)) + result = C(obj, map=map) except Exception: - result = C(obj.to_select_result(), map=create_map(map)) + result = C(obj.to_select_result(), map=map) elif idx1 is None: if ImproperID in type(idx).mro(): result = _fromImproperID(obj, idx) else: - result = C(obj.atoms(idx), map=create_map(map)) + result = C(__idx_to_atoms(obj, idx, map), map=map) elif idx2 is None: - result = C(obj.atoms(idx), obj.atoms(idx1), map=create_map(map)) + result = C( + __idx_to_atoms(obj, idx, map), + __idx_to_atoms(obj, idx1, map), + map=map, + ) elif idx3 is None: result = C( - obj.atoms(idx), - obj.atoms(idx1), - obj.atoms(idx2), - map=create_map(map), + __idx_to_atoms(obj, idx, map), + __idx_to_atoms(obj, idx1, map), + __idx_to_atoms(obj, idx2, map), + map=map, ) else: result = C( - obj.atoms(idx), - obj.atoms(idx1), - obj.atoms(idx2), - obj.atoms(idx3), - map=create_map(map), + __idx_to_atoms(obj, idx, map), + __idx_to_atoms(obj, idx1, map), + __idx_to_atoms(obj, idx2, map), + __idx_to_atoms(obj, idx3, map), + map=map, ) if auto_reduce and len(result) == 1: return result[0] + elif error_on_missing and len(result) == 0: + raise KeyError("There is no matching improper in this view.") else: return result @@ -875,45 +1013,67 @@ def __fixed__improper__( return impropers[0] -def __fixed__residues__(obj, idx=None, auto_reduce=False, map=None): +def __fixed__residues__( + obj, idx=None, auto_reduce=False, error_on_missing: bool = False, map=None +): from ..base import create_map if idx is None: result = obj.__orig__residues() elif type(idx) is range: result = obj.__orig__residues(list(idx), map=create_map(map)) + elif hasattr(idx, "residues"): + residues = obj.residues() + idxs = residues.find(idx.residues()) + return residues.__orig__residues(idxs, map=create_map(map)) else: result = obj.__orig__residues(idx, map=create_map(map)) if auto_reduce and len(result) == 1: return result[0] + elif error_on_missing and len(result) == 0: + raise KeyError("There is no matching residue in this view.") else: return result -def __fixed__chains__(obj, idx=None, auto_reduce=False, map=None): +def __fixed__chains__( + obj, idx=None, auto_reduce=False, error_on_missing: bool = False, map=None +): from ..base import create_map if idx is None: result = obj.__orig__chains() elif type(idx) is range: result = obj.__orig__chains(list(idx), map=create_map(map)) + elif hasattr(idx, "chains"): + chains = obj.chains() + idxs = chains.find(idx.chains()) + return chains.__orig__chains(idxs, map=create_map(map)) else: result = obj.__orig__chains(idx, map=create_map(map)) if auto_reduce and len(result) == 1: return result[0] + elif error_on_missing and len(result) == 0: + raise KeyError("There is no matching chain in this view.") else: return result -def __fixed__segments__(obj, idx=None, auto_reduce=False, map=None): +def __fixed__segments__( + obj, idx=None, auto_reduce=False, error_on_missing: bool = False, map=None +): from ..base import create_map if idx is None: result = obj.__orig__segments() elif type(idx) is range: result = obj.__orig__segments(list(idx), map=create_map(map)) + elif hasattr(idx, "segments"): + segments = obj.segments() + idxs = segments.find(idx.segments()) + return segments.__orig__segments(idxs, map=create_map(map)) else: from ..base import create_map @@ -921,22 +1081,32 @@ def __fixed__segments__(obj, idx=None, auto_reduce=False, map=None): if auto_reduce and len(result) == 1: return result[0] + elif error_on_missing and len(result) == 0: + raise KeyError("There is no matching segment in this view.") else: return result -def __fixed__molecules__(obj, idx=None, auto_reduce=False, map=None): +def __fixed__molecules__( + obj, idx=None, auto_reduce=False, error_on_missing: bool = False, map=None +): from ..base import create_map if idx is None: result = obj.__orig__molecules() elif type(idx) is range: result = obj.__orig__molecules(list(idx), map=create_map(map)) + elif hasattr(idx, "molecules"): + molecules = obj.molecules() + idxs = molecules.find(idx.molecules()) + return molecules.__orig__molecules(idxs, map=create_map(map)) else: result = obj.__orig__molecules(idx, map=create_map(map)) if auto_reduce and len(result) == 1: return result[0] + elif error_on_missing and len(result) == 0: + raise KeyError("There is no matching molecule in this view.") else: return result @@ -945,6 +1115,9 @@ def __fix_getitem(C): if not hasattr(C, "__orig__getitem__"): C.__orig__getitem__ = C.__getitem__ + if not hasattr(C, "__orig_atom"): + C.__orig__atom = C.atom + if not hasattr(C, "__orig__atoms"): C.__orig__atoms = C.atoms @@ -958,6 +1131,7 @@ def __fix_getitem(C): C.__orig__segments = C.segments C.__getitem__ = __fixed__getitem__ + C.atom = __fixed__atom__ C.atoms = __fixed__atoms__ C.residues = __fixed__residues__ C.chains = __fixed__chains__ @@ -1411,6 +1585,7 @@ def _dynamics( constraint=None, perturbable_constraint=None, include_constrained_energies: bool = True, + integrator=None, schedule=None, lambda_value=None, swap_end_states=None, @@ -1486,6 +1661,13 @@ def _dynamics( energy of the constrained degrees of freedom are not included in the total energy, and their forces are not evaluated. + integrator: str + The type of integrator to use, e.g. `langevin`, `verlet` etc. + See https://sire.openbiosim.org/cheatsheet/openmm.html#choosing-options + for the full list of options. This will be automatically + set to `langevin_middle` (NVT/NPT) or `verlet` (NVE) depending + on the ensemble if this is not set (or is set to `auto`) + schedule: sire.cas.LambdaSchedule The schedule used to control how perturbable forcefield parameters should be morphed as a function of lambda. If this is not set @@ -1613,14 +1795,7 @@ def _dynamics( cutoff = 7.5 * angstrom if cutoff_type is None and not map.specified("cutoff_type"): - try: - if view.property(map["space"]).is_periodic(): - cutoff_type = "PME" - else: - cutoff_type = "RF" - except Exception: - # no space, use RF - cutoff_type = "RF" + cutoff_type = "auto" if timestep is None and not map.specified("timestep"): from ..units import femtosecond @@ -1646,27 +1821,7 @@ def _dynamics( map.set("save_velocities", save_velocities) if constraint is None and not map.specified("constraint"): - from ..units import femtosecond - - if timestep is None: - # it must be in the map - timestep = map["timestep"].value() - - if timestep > 4 * femtosecond: - # need constraint on everything - constraint = "bonds-h-angles" - - elif timestep > 2 * femtosecond: - # need constraint on everything - constraint = "h-bonds-h-angles" - - elif timestep > 1 * femtosecond: - # need it just on H bonds and angles - constraint = "h-bonds" - - else: - # can get away with no constraints - constraint = "none" + constraint = "auto" if perturbable_constraint is not None: perturbable_constraint = str(perturbable_constraint).lower() @@ -1691,6 +1846,9 @@ def _dynamics( if platform is not None: map.set("platform", str(platform)) + if integrator is not None: + map.set("integrator", str(integrator)) + return Dynamics( view, cutoff=cutoff, @@ -1878,14 +2036,7 @@ def _minimisation( cutoff = 7.5 * angstrom if cutoff_type is None and not map.specified("cutoff_type"): - try: - if view.property(map["space"]).is_periodic(): - cutoff_type = "PME" - else: - cutoff_type = "RF" - except Exception: - # no space, use RF - cutoff_type = "RF" + cutoff_type = "auto" if device is not None: map.set("device", str(device)) diff --git a/src/sire/options/CMakeLists.txt b/src/sire/options/CMakeLists.txt new file mode 100644 index 000000000..b1fe86e40 --- /dev/null +++ b/src/sire/options/CMakeLists.txt @@ -0,0 +1,15 @@ +######################################## +# +# sire.options +# +######################################## + +# Add your script to this list +set ( SCRIPTS + __init__.py + _option.py + _dynamics_options.py + ) + +# installation +install( FILES ${SCRIPTS} DESTINATION ${SIRE_PYTHON}/sire/options ) diff --git a/src/sire/options/__init__.py b/src/sire/options/__init__.py new file mode 100644 index 000000000..b0f3b52be --- /dev/null +++ b/src/sire/options/__init__.py @@ -0,0 +1,18 @@ +__all__ = [ + "Option", + "Integrator", + "Constraint", + "PerturbableConstraint", + "Cutoff", + "Platform", +] + +from ._option import Option + +from ._dynamics_options import ( + Integrator, + Constraint, + Cutoff, + PerturbableConstraint, + Platform, +) diff --git a/src/sire/options/_dynamics_options.py b/src/sire/options/_dynamics_options.py new file mode 100644 index 000000000..ef63efcf2 --- /dev/null +++ b/src/sire/options/_dynamics_options.py @@ -0,0 +1,128 @@ +__all__ = [ + "Integrator", + "Constraint", + "PerturbableConstraint", + "Cutoff", + "Platform", +] + +from ._option import Option as _Option + + +class Integrator(_Option): + """ + All of the supported options for the integrator + """ + + AUTO = "auto", "Choose the integrator automatically" + VERLET = "verlet", "Use the Verlet integrator" + LEAPFROG = "leapfrog", "Use the Leapfrog integrator" + LANGEVIN = "langevin", "Use the Langevin integrator" + LANGEVIN_MIDDLE = ( + "langevin_middle", + "Use the middle scheme Langevin integrator", + ) + NOSE_HOOVER = "nose_hoover", "Use the Nose-Hoover integrator" + BROWNIAN = "brownian", "Use the Brownian integrator" + ANDERSEN = ( + "andersen", + "Use the Verlet integrator with an Andersen thermostat", + ) + + @staticmethod + def create(option: str): + return _Option._create(Integrator, option) + + @staticmethod + def options(include_docs: bool = False): + return _Option._options(Integrator, include_docs=include_docs) + + +class Constraint(_Option): + """ + All of the supported constraint options + """ + + NONE = "none", "Do not use constraints" + AUTO = "auto", "Choose the constraints automatically" + HBONDS = "h_bonds", "Constrain bonds involving hydrogens" + BONDS = "bonds", "Constrain all bonds" + HBONDS_HANGLES = ( + "h_bonds_h_angles", + "Constrain bonds and angles involving hydrogens", + ) + BOND_HANGLES = ( + "bonds_h_angles", + "Constrain all bonds, and angles involving hydrogens", + ) + + @staticmethod + def create(option: str): + return _Option._create(Constraint, option) + + @staticmethod + def options(include_docs: bool = False): + return _Option._options(Constraint, include_docs=include_docs) + + +PerturbableConstraint = Constraint + + +class Cutoff(_Option): + """ + All of the support cutoff options + """ + + NONE = "none", "Do not use a cutoff" + AUTO = "auto", "Choose the cutoff automatically" + RF = "rf", "Use a reaction field cutoff" + PME = "pme", "Use a Particle Mesh Ewald cutoff" + EWALD = "ewald", "Use an Ewald cutoff" + + @staticmethod + def canonicalise(option: str): + """ + Convert the passed option string to the canonical form + """ + option = _Option.canonicalise(option) + + if option == "reaction_field" or option == "reaction field": + return "rf" + elif ( + option == "particle_mesh_ewald" or option == "particle mesh ewald" + ): + return "pme" + elif option == "no_cutoff" or option == "no cutoff": + return "none" + else: + return option + + @staticmethod + def create(option: str): + return _Option._create(Cutoff, option) + + @staticmethod + def options(include_docs: bool = False): + return _Option._options(Cutoff, include_docs=include_docs) + + +class Platform(_Option): + """ + All of the supported platforms + """ + + AUTO = "auto", "Choose the platform automatically" + CPU = "cpu", "Run on the CPU" + CUDA = "cuda", "Run on the GPU using CUDA (nVidia)" + OPENCL = "opencl", "Run on the GPU using OpenCL (all GPUs)" + METAL = "metal", "Run on the GPU using Metal (Apple)" + HIP = "hip", "Run on the GPU using HIP (AMD)" + REFERENCE = "reference", "Run on CPU using the reference implementation" + + @staticmethod + def create(option: str): + return _Option._create(Platform, option) + + @staticmethod + def options(include_docs: bool = False): + return _Option._options(Platform, include_docs=include_docs) diff --git a/src/sire/options/_option.py b/src/sire/options/_option.py new file mode 100644 index 000000000..b2ab02fb4 --- /dev/null +++ b/src/sire/options/_option.py @@ -0,0 +1,110 @@ +__all__ = ["Option"] + +from enum import Enum as _Enum + + +class Option(str, _Enum): + """ + Base class of all of the Option objects. These are + effectively StrEnum objects that have additional + functions to support case-insentive assigment + and querying. + + It is very easy to create a new type of Option. + + Example + ------- + + .. code-block:: python + + class MyOption(Option): + ""Docstring for MyOption"" + + A = "a", "Option A" + B = "b", "Option B" + C = "c", "Option C" + + @staticmethod + def create(option: str): + return Option._create(MyOption, option) + + @staticmethod + def options(include_docs: bool = False): + return Option._options(MyOption, include_docs=include_docs) + + The resulting class can then be used as follows: + + .. code-block:: python + + assert MyOption.A == "a" + + assert MyOption.create("a") == MyOption.A + assert MyOption.create("a") == "a" + + assert MyOption.options() == ["a", "b", "c"] + assert MyOption.options(include_docs=True) == [ + ("a", "Option A"), + ("b", "Option B"), + ("c", "Option C"), + ] + """ + + def __new__(cls, value, doc=None): + self = super().__new__(cls, value) + self._value_ = value + + if doc is not None: + self.__doc__ = doc + + return self + + def __str__(self) -> str: + return str(self.value) + + def __eq__(self, other) -> bool: + return str(self) == str(other) + + @staticmethod + def canonicalise(option: str) -> str: + """ + Return the canonical version of the passed option string. + This is a lower case string with no extra whitespace (none + at beginning or end, with repeated whitespace removed), + and where hyphens are replaced by underscores + """ + option = str(option) + option = option.replace("-", "_") + option = option.lower().lstrip().rstrip() + option = " ".join(option.split()) + return option + + @staticmethod + def _create(CLS, option): + """ + Create an option from the passed string. Returns + an option of type CLS + """ + if type(option) is CLS: + return option + else: + # Clean the option up, lowercasing, removing whitespace etc. + option = CLS.canonicalise(option) + + try: + return CLS(option) + except ValueError: + raise ValueError( + f"Invalid option '{option}'. Available options are: " + + ", ".join(Option._options(CLS)) + ) + + @staticmethod + def _options(CLS, include_docs: bool = False): + """ + Return the list of strings representing all + of the available options + """ + if include_docs: + return [(x.value, x.__doc__) for x in CLS] + else: + return [x.value for x in CLS] diff --git a/src/sire/system/_system.py b/src/sire/system/_system.py index 5e4c8dfd8..50d866831 100644 --- a/src/sire/system/_system.py +++ b/src/sire/system/_system.py @@ -550,6 +550,13 @@ def dynamics(self, *args, **kwargs): is useful if you just want to run standard molecular dynamics of the reference or perturbed states. + integrator: str + The type of integrator to use, e.g. `langevin`, `verlet` etc. + See https://sire.openbiosim.org/cheatsheet/openmm.html#choosing-options + for the full list of options. This will be automatically + set to `langevin_middle` (NVT/NPT) or `verlet` (NVE) depending + on the ensemble if this is not set (or is set to `auto`) + temperature: temperature The temperature at which to run the simulation. A microcanonical (NVE) simulation will be run if you don't @@ -814,9 +821,7 @@ def energy_trajectory( # we need to create this trajectory from ..maths import EnergyTrajectory - self._system.set_property( - traj_propname.source(), EnergyTrajectory() - ) + self._system.set_property(traj_propname.source(), EnergyTrajectory()) traj = self._system.property(traj_propname) @@ -852,8 +857,7 @@ def set_energy_trajectory(self, trajectory, map=None): if trajectory.what() != "SireMaths::EnergyTrajectory": raise TypeError( - f"You cannot set a {type(trajectory)} as an " - "energy trajectory!" + f"You cannot set a {type(trajectory)} as an " "energy trajectory!" ) self._system.set_property(traj_propname.source(), trajectory) @@ -947,6 +951,73 @@ def shared_properties(self): """ return self._system.shared_properties() + def set_metadata(self, key, value): + """ + Set the metadata for this System so that the metadata associated + with the key 'key' is equal to 'value' + """ + from ..base import Properties, wrap + + if self._system.contains_property("metadata"): + metadata = self._system.property("metadata") + + if not isinstance(metadata, Properties): + metadata = Properties() + + else: + metadata = Properties() + + metadata.set_property(str(key), wrap(value)) + + self._system.set_property("metadata", metadata) + + def has_metadata(self, key): + """ + Return whether or not this System has metadata associated + with the requested key + """ + if self._system.contains_property("metadata"): + return self._system.property("metadata").has_property(str(key)) + else: + return False + + def metadata(self, key: str = None): + """ + Return the metadata associated with the passed 'key', or + all metadata if 'key' is None. This returns None if there + is no metadata associated with this key, or no metadata + has been set + """ + if self._system.contains_property("metadata"): + if key is None: + return self._system.property("metadata") + + try: + value = self._system.property("metadata").property(str(key)) + except Exception: + return None + + if hasattr(value, "is_integer"): + if value.is_integer(): + return value.as_integer() + + if hasattr(value, "value"): + return value.value() + + return value + else: + return None + + def metadata_keys(self): + """ + Return the keys of all metadata set in this System + This returns an empty list if no metadata has been set + """ + if self._system.contains_property("metadata"): + return self._system.property("metadata").property_keys() + else: + return [] + def cursor(self): """ Return a sire.mol.Cursor that can be used to edit diff --git a/tests/conftest.py b/tests/conftest.py index 043e55466..2335dd95a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -144,6 +144,39 @@ def pentane_cyclopentane(): return sr.load_test_files("pentane_cyclopentane.bss") +@pytest.fixture(scope="session") +def pdb_3nss(): + return sr.load_test_files("3NSS.pdb") + + +@pytest.fixture(scope="session") +def pdbx_3nss(): + if "gemmi" in sr.convert.supported_formats(): + return sr.load_test_files("3NSS.cif") + else: + return None + + +@pytest.fixture(scope="session") +def testfile_cache_dir(): + import os + + d = os.path.abspath(os.path.curdir) + + if d.endswith("tests"): + # we are running in the tests directory, so cache downloads here + cache_dir = os.path.join(d, "cache") + else: + d2 = os.path.split(d)[0] + if d2.endswith("tests"): + # we are a subdirectory of the parent directory + cache_dir = os.path.join(d2, "cache") + else: + cache_dir = os.path.join(d, "cache") + + return cache_dir + + @pytest.fixture(scope="session") def neopentane_methane(): return sr.load_test_files("neo_meth_scratch.bss") diff --git a/tests/convert/test_gemmi.py b/tests/convert/test_gemmi.py new file mode 100644 index 000000000..c4da5827f --- /dev/null +++ b/tests/convert/test_gemmi.py @@ -0,0 +1,136 @@ +import sire as sr + +import pytest + + +def _assert_equal(v0, v1, tol): + assert v0.x() == pytest.approx(v1.x(), tol) + assert v0.y() == pytest.approx(v1.y(), tol) + assert v0.z() == pytest.approx(v1.z(), tol) + + +@pytest.mark.skipif( + "gemmi" not in sr.convert.supported_formats(), reason="gemmi not available" +) +def test_gemmi(testfile_cache_dir, pdbx_3nss): + mols = pdbx_3nss + + import os + + cif_file = os.path.join(testfile_cache_dir, "3NSS.cif") + + import gemmi + + s = gemmi.read_structure(cif_file) + + mols2 = sr.convert.to(s, "sire") + + assert mols.num_molecules() == mols2.num_molecules() + assert mols.num_atoms() == mols2.num_atoms() + assert mols.num_residues() == mols2.num_residues() + assert mols.num_chains() == mols2.num_chains() + + s2 = sr.convert.to(mols, "gemmi") + + assert len(s) == len(s2) + assert len(list(s[0].all())) == len(list(s2[0].all())) + + r0 = s[0].get_all_residue_names() + r1 = s2[0].get_all_residue_names() + + assert len(r0) == len(r1) + + for r in r0: + assert r in r1 + + +@pytest.mark.skipif( + "gemmi" not in sr.convert.supported_formats(), reason="gemmi not available" +) +def test_gemmi_roundtrip(tmpdir, pdbx_3nss): + mols = pdbx_3nss.clone() + + mols.set_metadata("cat", "meow") + mols.set_metadata("dog", ["growl", "bark", "woof"]) + mols.set_metadata( + "vehicle", + { + "car": ["mercedes", "ferrari", "jaguar"], + "bike": ["yamaha", "harley", "honda"], + "numbers": [2, 4, 6], + }, + ) + + g = sr.convert.to(mols, "gemmi") + + mols2 = sr.convert.to(g, "sire") + + assert mols.num_molecules() == mols2.num_molecules() + assert mols.num_atoms() == mols2.num_atoms() + assert mols.num_residues() == mols2.num_residues() + + assert mols.metadata() == mols2.metadata() + + d = tmpdir.mkdir("test_gemmi_roundtrip") + + f = sr.save(mols, d.join("test.pdbx")) + + if isinstance(f, list): + f = f[0] + + mols3 = sr.load(f) + + assert mols.num_molecules() == mols3.num_molecules() + assert mols.num_atoms() == mols3.num_atoms() + assert mols.num_residues() == mols3.num_residues() + + m = mols3.metadata() + + assert m["cat"] == "meow" + assert m["dog"] == ["growl", "bark", "woof"] + + v = m["vehicle"] + + assert v["car"].as_array() == ["mercedes", "ferrari", "jaguar"] + assert v["bike"].as_array() == ["yamaha", "harley", "honda"] + assert v["numbers"].as_array() == ["2", "4", "6"] + + +@pytest.mark.skipif( + "gemmi" not in sr.convert.supported_formats(), reason="gemmi not available" +) +def test_gemmi_complex_metadata(tmpdir, ala_mols): + mols = ala_mols.clone() + + mols.set_metadata("name", "alanine dipeptide") + mols.set_metadata("residues", ["ACE", "ALA", "NME"]) + mols.set_metadata( + "atoms", + { + "element": ["C", "N", "O"], + "x_coords": [0.0, 1.0, 2.0], + "y_coords": [3.0, 4.0, 5.0], + "z_coords": [6.0, 7.0, 8.0], + }, + ) + + d = tmpdir.mkdir("test_gemmi_complex_metadata") + + f = sr.save(mols, d.join("test.pdbx")) + + if isinstance(f, list): + f = f[0] + + mols2 = sr.load(f) + + m = mols2.metadata() + + assert m["name"] == "alanine dipeptide" + assert m["residues"] == ["ACE", "ALA", "NME"] + + a = m["atoms"] + + assert a["element"].as_array() == ["C", "N", "O"] + assert a["x_coords"].as_array() == ["0", "1", "2"] + assert a["y_coords"].as_array() == ["3", "4", "5"] + assert a["z_coords"].as_array() == ["6", "7", "8"] diff --git a/tests/convert/test_openmm.py b/tests/convert/test_openmm.py index ed04ac302..82d3bca1c 100644 --- a/tests/convert/test_openmm.py +++ b/tests/convert/test_openmm.py @@ -2,6 +2,61 @@ import pytest +@pytest.mark.skipif( + "openmm" not in sr.convert.supported_formats(), + reason="openmm support is not available", +) +def test_openmm_single_energy_neopentane(neopentane_methane): + mol = neopentane_methane[0] + + # this function will extract the lambda0 or lambda1 end state + def get_end_state(mol, state, remove_state): + c = mol.cursor() + for key in c.keys(): + if key.endswith(state): + c[key.removesuffix(state)] = c[key] + del c[key] + elif key.endswith(remove_state): + del c[key] + + c["is_perturbable"] = False + return c.commit() + + mol0 = get_end_state(mol, "0", "1") + mol1 = get_end_state(mol, "1", "0") + + map = { + "space": sr.vol.Cartesian(), + "platform": "Reference", + "constraint": "none", + "ignore_perturbations": True, + } + + omm0 = sr.convert.to(mol0, "openmm", map=map) + + state0 = omm0.getState(getEnergy=True) + + energy0 = state0.getPotentialEnergy() + + energy0 = energy0.value_in_unit(energy0.unit) + + assert mol0.energy(map=map).to(sr.units.kJ_per_mol) == pytest.approx( + energy0, abs=0.1 + ) + + omm1 = sr.convert.to(mol1, "openmm", map=map) + + state1 = omm1.getState(getEnergy=True) + + energy1 = state1.getPotentialEnergy() + + energy1 = energy1.value_in_unit(energy1.unit) + + assert mol1.energy(map=map).to(sr.units.kJ_per_mol) == pytest.approx( + energy1, abs=0.1 + ) + + @pytest.mark.skipif( "openmm" not in sr.convert.supported_formats(), reason="openmm support is not available", @@ -39,9 +94,7 @@ def test_openmm_single_energy(kigaki_mols): energy = energy.value_in_unit(energy.unit) # these won't be exactly the same - this is 5227 +/- 0.1 kJ mol-1 - assert mol.energy(map=map).to(sr.units.kJ_per_mol) == pytest.approx( - energy, abs=0.1 - ) + assert mol.energy(map=map).to(sr.units.kJ_per_mol) == pytest.approx(energy, abs=0.1) @pytest.mark.skipif( @@ -294,9 +347,9 @@ def test_openmm_ignore_constrained(ala_mols): def test_openmm_no_zero_sigmas(zero_lj_mols): mols = zero_lj_mols - omm = sr.convert.to(mols, "openmm", - map={"constraint": "h-bonds", - "platform": "Reference"}) + omm = sr.convert.to( + mols, "openmm", map={"constraint": "h-bonds", "platform": "Reference"} + ) from openmm import XmlSerializer @@ -316,17 +369,21 @@ def test_openmm_skipped_constrained_bonds(zero_lj_mols): omm1 = sr.convert.to( mols, "openmm", - map={"constraint": "h-bonds", - "include_constrained_energies": True, - "platform": "Reference"}, + map={ + "constraint": "h-bonds", + "include_constrained_energies": True, + "platform": "Reference", + }, ) omm2 = sr.convert.to( mols, "openmm", - map={"constraint": "h-bonds", - "include_constrained_energies": False, - "platform": "Reference"}, + map={ + "constraint": "h-bonds", + "include_constrained_energies": False, + "platform": "Reference", + }, ) nrg1 = omm1.get_potential_energy().to(sr.units.kcal_per_mol) diff --git a/tests/convert/test_openmm_lambda.py b/tests/convert/test_openmm_lambda.py index 6e56c1de4..7fb7428fc 100644 --- a/tests/convert/test_openmm_lambda.py +++ b/tests/convert/test_openmm_lambda.py @@ -161,14 +161,6 @@ def test_openmm_scale_lambda_dichloroethane(ethane_12dichloroethane): _run_test(ethane_12dichloroethane.clone(), False) -@pytest.mark.skipif( - "openmm" not in sr.convert.supported_formats(), - reason="openmm support is not available", -) -def test_openmm_scale_lambda_neopentane(neopentane_methane): - _run_test(neopentane_methane.clone(), False) - - @pytest.mark.skipif( "openmm" not in sr.convert.supported_formats(), reason="openmm support is not available", diff --git a/tests/convert/test_openmm_minimise.py b/tests/convert/test_openmm_minimise.py index 28f62c716..cab4a4247 100644 --- a/tests/convert/test_openmm_minimise.py +++ b/tests/convert/test_openmm_minimise.py @@ -98,6 +98,6 @@ def test_openmm_minimise_vacuum(kigaki_mols): mols.add_shared_property("space", sr.vol.Cartesian()) - mols = mols.minimisation(platform="cpu").run(10).commit() + mols = mols.minimisation(platform="cpu", vacuum=True).run(10).commit() assert not mols.property("space").is_periodic() diff --git a/tests/io/test_pdbx.py b/tests/io/test_pdbx.py new file mode 100644 index 000000000..5d30f19bb --- /dev/null +++ b/tests/io/test_pdbx.py @@ -0,0 +1,57 @@ +import sire as sr + +import pytest + + +def _assert_equal(v0, v1, tol): + assert v0.x() == pytest.approx(v1.x(), tol) + assert v0.y() == pytest.approx(v1.y(), tol) + assert v0.z() == pytest.approx(v1.z(), tol) + + +@pytest.mark.skipif( + "gemmi" not in sr.convert.supported_formats(), reason="gemmi not available" +) +def test_pdbx(tmpdir, ala_mols): + mols = ala_mols + + dir = tmpdir.mkdir("test_pdbx") + + f = sr.save(mols, dir.join("test"), format=["pdbx"]) + + mols2 = sr.load(f) + + assert mols.num_molecules() == mols2.num_molecules() + assert mols.num_atoms() == mols2.num_atoms() + + assert len(mols.bonds()) == len(mols2.bonds()) + + for atom0, atom1 in zip(mols.atoms(), mols2.atoms()): + assert atom0.name() == atom1.name() + assert atom0.number() == atom1.number() + assert atom0.element() == atom1.element() + assert atom0.residue().name() == atom1.residue().name() + assert atom0.residue().number() == atom1.residue().number() + _assert_equal(atom0.coordinates(), atom1.coordinates(), 1e-6) + + +@pytest.mark.skipif( + "gemmi" not in sr.convert.supported_formats(), reason="gemmi not available" +) +def test_pdbx_pdb(pdb_3nss, pdbx_3nss): + mols = pdb_3nss + mols2 = pdbx_3nss + + assert mols.num_atoms() == mols2.num_atoms() + + # can only test the atoms in the protein as pdb is weird after that + nats = mols2[0].num_atoms() + + for atom1, atom2 in zip(mols.atoms()[0:100], mols2.atoms()[0:nats]): + assert atom1.name() == atom2.name() + assert atom1.number() == atom2.number() + assert atom1.element() == atom2.element() + assert atom1.residue().name() == atom2.residue().name() + assert atom1.residue().number() == atom2.residue().number() + assert atom1.chain().name() == atom2.chain().name() + _assert_equal(atom1.coordinates(), atom2.coordinates(), 1e-3) diff --git a/tests/mm/test_index_bonds.py b/tests/mm/test_index_bonds.py index 73b9a0152..795283525 100644 --- a/tests/mm/test_index_bonds.py +++ b/tests/mm/test_index_bonds.py @@ -1,9 +1,23 @@ import pytest +import sire as sr + def _assert_same_bonds(b0, b1): assert len(b0) == len(b1) + different = False + + # do the quick check assuming they are in the same order + for bond0, bond1 in zip(b0, b1): + if bond0 != bond1: + different = True + break + + if not different: + return + + # do the slower check if the order is different for bond0 in b0: same = False @@ -33,14 +47,20 @@ def test_index_bonds(ala_mols): assert len(mol["bonds with atoms in resnum 1"]) == 6 _assert_same_bonds(mol["bonds in resnum 1"], mol["resnum 1"].bonds()) - _assert_same_bonds(mol["bonds with (atoms in resnum 1)"], mol.bonds("resnum 1")) - _assert_same_bonds(mol["bonds with atoms in resnum 1"], mol.bonds("resnum 1")) + _assert_same_bonds( + mol["bonds with (atoms in resnum 1)"], mol.bonds("resnum 1") + ) + _assert_same_bonds( + mol["bonds with atoms in resnum 1"], mol.bonds("resnum 1") + ) # this is a single bond assert mol["bonds to resnum 1"] == mol.bond("resnum 1", "resnum 2") # still just a single bond - assert mol["bonds from resnum 1 to resnum 2"] == mol.bond("resnum 1", "resnum 2") + assert mol["bonds from resnum 1 to resnum 2"] == mol.bond( + "resnum 1", "resnum 2" + ) cx = mol["bonds to element C"] ccx = mol["bonds with element C"] @@ -58,8 +78,6 @@ def test_index_bonds(ala_mols): def test_index_mols_bonds(ala_mols): mols = ala_mols - import sire as sr - bnds = mols["bonds from element O to element H"] assert len(bnds) == 2 * (mols.num_molecules() - 1) @@ -81,10 +99,48 @@ def test_index_mols_bonds(ala_mols): assert bond.atom1().name().value() in ["O", "H1"] # mols[1:] are the water molecules - assert mols["bonds from element O to element H"].mass().value() == pytest.approx( - (mols[1:]["element O"].mass() + mols[1:]["element H"].mass()).value(), 0.0001 + assert mols[ + "bonds from element O to element H" + ].mass().value() == pytest.approx( + (mols[1:]["element O"].mass() + mols[1:]["element H"].mass()).value(), + 0.0001, + ) + + +def test_index_bonds_by_atom(ala_mols): + mols = ala_mols + + bnds0 = mols.bonds(mols["element O"], mols["element H"]) + + bnds1 = mols["bonds from element O to element H"] + + _assert_same_bonds(bnds0, bnds1) + + bnd0 = mols.bond(mols.atoms()[0], mols.atoms()[1]) + bnd1 = mols[0].bond("atomidx 0", "atomidx 1") + + assert bnd0 == bnd1 + + bnd0 = mols[0].bond(mols[0][0], mols[0][1]) + + assert bnd0 == bnd1 + + with pytest.raises(KeyError): + mols["bonds from atomidx 0 to atomidx 5"] + + assert len(mols.bonds(mols.atoms()[0], mols.atoms()[5])) == 0 + assert ( + len( + mols.bonds( + mols.atoms()[0], mols.atoms()[5], error_on_missing=False + ) + ) + == 0 ) + with pytest.raises(KeyError): + mols.bonds(mols.atoms()[0], mols.atoms()[5], error_on_missing=True) + if __name__ == "__main__": test_index_bonds() diff --git a/tests/mm/test_index_internals.py b/tests/mm/test_index_internals.py new file mode 100644 index 000000000..6400981a8 --- /dev/null +++ b/tests/mm/test_index_internals.py @@ -0,0 +1,71 @@ +import pytest + +import sire as sr + + +def test_index_angles(ala_mols): + mols = ala_mols[1:10] + + angs0 = mols.angles("element H", "element O", "element H") + + assert len(angs0) == len(mols) + + angs1 = mols.angles( + mols["element H"], mols["element O"], mols["element H"] + ) + + assert len(angs0) == len(angs1) + + for a0, a1 in zip(angs0, angs1): + assert a0 == a1 + + angs0 = mols.angles(mols.atoms()[0], mols.atoms()[1], mols.atoms()[2]) + angs1 = mols[0].angles("atomidx 0", "atomidx 1", "atomidx 2") + + assert len(angs0) == len(angs1) == 1 + + assert angs0[0] == angs1[0] + + +def test_index_dihedrals(ala_mols): + mol = ala_mols[0] + + dihs0 = mol.dihedrals("element H", "element C", "element N", "element H") + + assert len(dihs0) == 4 + + for dih in dihs0: + assert len(dih["element H"]) == 2 + assert len(dih.atoms("element C")) == 1 + assert len(dih.atoms("element N")) == 1 + + dihs1 = mol.dihedrals( + mol["element H"], mol["element C"], mol["element N"], mol["element H"] + ) + + assert len(dihs0) == len(dihs1) + + for d0, d1 in zip(dihs0, dihs1): + assert d0 == d1 + + +def test_index_impropers(ala_mols): + mol = ala_mols[0] + + imps0 = mol.impropers("element C", "element C", "element N", "element O") + + assert len(imps0) == 2 + + for imp in imps0: + assert len(imp["element C"]) == 2 + assert len(imp.atoms("element N")) == 1 + assert len(imp.atoms("element O")) == 1 + + imps1 = mol.impropers( + mol["element C"], mol["element C"], mol["element N"], mol["element O"] + ) + + assert len(imps0) == len(imps1) + + for i0, i1 in zip(imps0, imps1): + assert i0 == i1 diff --git a/tests/options/test_options.py b/tests/options/test_options.py new file mode 100644 index 000000000..17f46f0d2 --- /dev/null +++ b/tests/options/test_options.py @@ -0,0 +1,64 @@ +import pytest + +import sire as sr + + +def test_options(): + class TestOptions(sr.options.Option): + A = "a", "Option A" + B = "b", "Option B" + C = "c", "Option C" + + @staticmethod + def create(option: str): + return sr.options.Option._create(TestOptions, option) + + @staticmethod + def options(include_docs: bool = False): + return sr.options.Option._options( + TestOptions, include_docs=include_docs + ) + + assert TestOptions.A == "a" + assert TestOptions.B == "b" + assert TestOptions.C == "c" + + assert TestOptions.A.value == "a" + assert TestOptions.B.value == "b" + assert TestOptions.C.value == "c" + + assert TestOptions.A.__doc__ == "Option A" + assert TestOptions.B.__doc__ == "Option B" + assert TestOptions.C.__doc__ == "Option C" + + assert TestOptions.create("a") == TestOptions.A + assert TestOptions.create("b") == TestOptions.B + assert TestOptions.create("c") == TestOptions.C + + assert TestOptions.options() == ["a", "b", "c"] + assert TestOptions.options(include_docs=True) == [ + ("a", "Option A"), + ("b", "Option B"), + ("c", "Option C"), + ] + + +@pytest.mark.parametrize( + "cls, option, expected", + [ + (sr.options.Platform, "CPU", "cpu"), + ( + sr.options.Constraint, + " H-bonds-h-ANGLES ", + "h_bonds_h_angles", + ), + (sr.options.PerturbableConstraint, "None", "none"), + (sr.options.Cutoff, "Particle Mesh Ewald", "pme"), + (sr.options.Cutoff, "REACTION FIELD", "rf"), + (sr.options.Cutoff, " No CutoFF ", "none"), + (sr.options.Integrator, "Langevin", "langevin"), + (sr.options.Integrator, "Langevin-Middle", "langevin_middle"), + ], +) +def test_dynamics_options(cls, option, expected): + assert cls.create(option) == expected diff --git a/version.txt b/version.txt index b48a0e5e3..42ae83e93 100644 --- a/version.txt +++ b/version.txt @@ -1,2 +1 @@ -2023.4.2 - +2023.5.0 diff --git a/wrapper/Convert/CMakeLists.txt b/wrapper/Convert/CMakeLists.txt index 48e6627b2..ebfcd148c 100644 --- a/wrapper/Convert/CMakeLists.txt +++ b/wrapper/Convert/CMakeLists.txt @@ -6,6 +6,7 @@ add_subdirectory (SireRDKit) add_subdirectory (SireOpenMM) +add_subdirectory (SireGemmi) # installation set( INSTALLDIR ${SIRE_PYTHON}/sire/legacy/Convert ) diff --git a/wrapper/Convert/SireGemmi/CMakeLists.txt b/wrapper/Convert/SireGemmi/CMakeLists.txt new file mode 100644 index 000000000..c1e572a64 --- /dev/null +++ b/wrapper/Convert/SireGemmi/CMakeLists.txt @@ -0,0 +1,103 @@ +########################################## +# +# CMake file for Python wrapper:_SireGemmi +# +########################################## + +# Find gemmi and pybind11 libraries +find_package(gemmi) +find_package(pybind11) + +if (${gemmi_FOUND}) + # We can only link against the dynamic library - static doesn't work + get_target_property(GEMMI_LIBRARY gemmi::gemmi_cpp LOCATION) + + # Test if we can link to Gemmi on Windows + if (WIN32) + # test if GEMMI_LIBRARY ends with .lib + if (${GEMMI_LIBRARY} MATCHES "\\.lib$") + message( STATUS "GEMMI_LIBRARY is a static library." ) + message( STATUS "We can only link against the dynamic library." ) + message( STATUS "Disabling gemmi PDBx/mmCIF support.") + set(gemmi_FOUND FALSE) + endif() + else() + # test if GEMMI_LIBRARY ends with .a + if (${GEMMI_LIBRARY} MATCHES "\\.a$") + message( STATUS "GEMMI_LIBRARY is a static library." ) + message( STATUS "We can only link against the dynamic library." ) + message( STATUS "Disabling gemmi PDBx/mmCIF support.") + set(gemmi_FOUND FALSE) + endif() + endif() +endif() + +if (${gemmi_FOUND} AND ${pybind11_FOUND}) + # Only compile and install if we have gemmi + message( STATUS "Compiling SireGemmi converter" ) + + get_target_property(GEMMI_LIBRARY gemmi::gemmi_cpp LOCATION) + + message( STATUS "GEMMI LIBRARY ${GEMMI_LIBRARY}" ) + message( STATUS "PYBIND11 INCLUDE ${pybind11_INCLUDE_DIR}") + message( STATUS "PYBIND11 LIBRARY ${pybind11_LIBRARIES}" ) + message( STATUS "ZLIB LIBRARIES ${ZLIB_LIBRARIES}" ) + + # Third Party dependencies of this module + include_directories( ${PYTHON_INCLUDE_DIR} + ${pybind11_INCLUDE_DIR} + ${gemmi_INCLUDE_DIR} ) + + # Sire include paths + include_directories( BEFORE ${SIRE_INCLUDE_DIR} ) + + # Other python wrapping directories + include_directories(${CMAKE_SOURCE_DIR}) + + # Define the sources in SireRDKit + set ( SIREGEMMI_SOURCES + + _SireGemmi.main.cpp + + sire_gemmi.cpp + + ) + + # Create the library that holds all of the class wrappers + add_library (SireGemmi ${SIREGEMMI_SOURCES}) + + set_target_properties(SireGemmi + PROPERTIES + VERSION ${SIRE_VERSION} + SOVERSION ${SIRE_VERSION_MAJOR} + OUTPUT_NAME "SireGemmi" + PREFIX "_" + SUFFIX ${PYTHON_MODULE_EXTENSION} + ) + + target_link_libraries(SireGemmi + SirePython + SIRE_SireIO + SIRE_SireMM + SIRE_SireMol + SIRE_SireBase + SIRE_SireStream + SIRE_SireError + ${GEMMI_LIBRARY} + ${pybind11_LIBRARIES} + ) + + include( LimitSirePythonExportSymbols ) + export_this_symbol_only( "init_SireRDKit" "_init_SireGemmi" ) + + # installation + set( INSTALLDIR ${SIRE_PYTHON}/sire/legacy/Convert ) + + install( TARGETS SireGemmi + LIBRARY DESTINATION ${INSTALLDIR} + ARCHIVE DESTINATION ${INSTALLDIR} + RUNTIME DESTINATION ${INSTALLDIR} + ) +else() + message( WARNING "gemmi not found, so we cannot compile the converter." ) +endif() diff --git a/wrapper/Convert/SireGemmi/_SireGemmi.main.cpp b/wrapper/Convert/SireGemmi/_SireGemmi.main.cpp new file mode 100644 index 000000000..7c9857c14 --- /dev/null +++ b/wrapper/Convert/SireGemmi/_SireGemmi.main.cpp @@ -0,0 +1,32 @@ + +// (C) Christopher Woods, GPL >= 3 License + +#include "boost/python.hpp" +#include "boost/python/converter/registry.hpp" + +#include "sire_gemmi.h" + +#include "Helpers/pyboost11.hpp" + +namespace bp = boost::python; + +using namespace SireGemmi; + +BOOST_PYTHON_MODULE(_SireGemmi) +{ + bp::def("sire_to_gemmi", + &sire_to_gemmi, + (bp::arg("mols"), bp::arg("map")), + "Convert sire system to a gemmi structure"); + + bp::def("gemmi_to_sire", + &gemmi_to_sire, + (bp::arg("mols"), bp::arg("map")), + "Convert a gemmi Structure to a sire system"); + + bp::def("_register_pdbx_loader", + ®ister_pdbx_loader, + "Internal function called once used to register PDBx support"); + + pyboost11::converter(); +} diff --git a/wrapper/Convert/SireGemmi/sire_gemmi.cpp b/wrapper/Convert/SireGemmi/sire_gemmi.cpp new file mode 100644 index 000000000..c67e9f9a5 --- /dev/null +++ b/wrapper/Convert/SireGemmi/sire_gemmi.cpp @@ -0,0 +1,1274 @@ + +#include "sire_gemmi.h" + +#include "gemmi/cif.hpp" +#include "gemmi/modify.hpp" +#include "gemmi/polyheur.hpp" +#include "gemmi/to_cif.hpp" +#include "gemmi/to_mmcif.hpp" + +#include "SireIO/pdbx.h" + +#include "SireMol/core.h" +#include "SireMol/moleditor.h" +#include "SireMol/element.h" + +#include "SireMol/atomproperty.hpp" +#include "SireMol/atomelements.h" +#include "SireMol/atomcoords.h" +#include "SireMol/atomcharges.h" +#include "SireMol/connectivity.h" +#include "SireMol/bondhunter.h" + +#include "SireMol/iswater.h" + +#include "SireBase/propertylist.h" +#include "SireBase/stringproperty.h" + +#include "SireUnits/units.h" + +#include "SireError/errors.h" + +#include +#include +#include + +namespace cif = gemmi::cif; + +namespace SireGemmi +{ + template + void set_prop(T &atom, const QString &key, const V &value, + const SireBase::PropertyMap &map) + { + const auto name = map[key]; + + if (name.hasSource()) + atom.setProperty(name.source(), value); + } + + SireMaths::Vector from_vec(const gemmi::Position &pos) + { + return SireMaths::Vector(pos.x, pos.y, pos.z); + } + + void populate_atom(SireMol::AtomStructureEditor &atm, + const gemmi::Atom &atom, + const QString &is_hetatm, + const SireBase::PropertyMap &map) + { + // the charge is a signed-char + double chg = int(atom.charge); + + set_prop(atm, "element", SireMol::Element(atom.element.atomic_number()), map); + set_prop(atm, "formal_charge", SireUnits::Dimension::Charge(chg), map); + set_prop(atm, "occupancy", double(atom.occ), map); + set_prop(atm, "beta_factor", double(atom.b_iso), map); + set_prop(atm, "is_het", is_hetatm, map); + set_prop(atm, "alt_loc", QString(atom.altloc), map); + set_prop(atm, "coordinates", from_vec(atom.pos), map); + } + + void parse_polymer(const gemmi::Entity &entity, SireMol::MoleculeGroup &mols, + const QHash &subchains, + const gemmi::Structure &structure, + QHash &serial_to_molnum, + const QString &molname, + const SireBase::PropertyMap &map) + { + // parse this into a single polymer comprised of multiple chains + auto mol = SireMol::MolStructureEditor(SireMol::Molecule().edit()); + mol.renumber(); + mol.rename(SireMol::MolName(molname)); + + auto cg0 = mol.add(SireMol::CGName("0")); + + QHash segments; + + const auto molnum = cg0.molecule().number(); + + int cg_num = 0; + + for (const auto &subchain : entity.subchains) + { + auto model_id = subchains.value(QString::fromStdString(subchain), 0); + + auto subchain_residues = structure.models[model_id].get_subchain(subchain); + + if (subchain_residues.empty()) + { + continue; + } + + auto chain = mol.add(SireMol::ChainName(QString::fromStdString(subchain.c_str()))); + + for (const auto &residue : subchain_residues) + { + auto cg = cg0; + + if (cg_num > 0) + { + cg = mol.add(SireMol::CGName(QString::number(cg_num))); + } + + cg_num += 1; + + int resnum = cg_num; + + if (residue.seqid.num.has_value()) + resnum = residue.seqid.num.value; + + auto res = chain.add(SireMol::ResName(QString::fromStdString(residue.name))); + res.renumber(SireMol::ResNum(resnum)); + + if (residue.seqid.has_icode()) + set_prop(res, "insert_code", QString(residue.seqid.icode), map); + + SireMol::SegStructureEditor seg; + bool residue_in_segment = false; + + if (not residue.segment.empty()) + { + // this residue belongs in a segment + auto segname = QString::fromStdString(residue.segment); + + if (not segments.contains(segname)) + { + // create a new segment + auto seg = cg.molecule().add(SireMol::SegName(segname)); + segments.insert(segname, seg); + } + + seg = segments[segname]; + residue_in_segment = true; + } + + QString is_hetatm("False"); + + if (residue.het_flag == 'H') + is_hetatm = "True"; + + for (const auto &atom : residue.atoms) + { + auto atm = cg.add(SireMol::AtomNum(atom.serial)); + atm.reparent(res.index()); + atm.rename(SireMol::AtomName(QString::fromStdString(atom.name))); + populate_atom(atm, atom, is_hetatm, map); + + serial_to_molnum.insert(atom.serial, molnum); + + if (residue_in_segment) + atm.reparent(seg.index()); + } + } + } + + mols.add(mol.commit()); + } + + void parse_molecules(const gemmi::Entity &entity, SireMol::MoleculeGroup &mols, + const QHash &subchains, + const gemmi::Structure &structure, + QHash &serial_to_molnum, + const SireBase::PropertyMap &map) + { + // parse each chain into a separate molecule + + for (const auto &subchain : entity.subchains) + { + auto model_id = subchains.value(QString::fromStdString(subchain), 0); + + auto mol = SireMol::MolStructureEditor(SireMol::Molecule().edit()); + mol.renumber(); + + auto cg0 = mol.add(SireMol::CGName("0")); + + const auto molnum = mol.number(); + + QHash segments; + + int cg_num = 0; + + auto chain = mol.add(SireMol::ChainName(QString::fromStdString(subchain.c_str()))); + + for (const auto &residue : structure.models[model_id].get_subchain(subchain)) + { + auto cg = cg0; + + if (cg_num == 0) + { + // first residue - use the residue name as the atom name + mol.rename(SireMol::MolName(QString::fromStdString(residue.name))); + } + else + { + cg = mol.add(SireMol::CGName(QString::number(cg_num))); + } + + cg_num += 1; + + int resnum = cg_num; + + if (residue.seqid.num.has_value()) + resnum = residue.seqid.num.value; + + auto res = mol.add(SireMol::ResNum(resnum)); + res.rename(SireMol::ResName(QString::fromStdString(residue.name))); + res.reparent(chain.name()); + + if (residue.seqid.has_icode()) + set_prop(res, "insert_code", QString(residue.seqid.icode), map); + + SireMol::SegStructureEditor seg; + bool residue_in_segment = false; + + if (not residue.segment.empty()) + { + // this residue belongs in a segment + auto segname = QString::fromStdString(residue.segment); + + if (not segments.contains(segname)) + { + // create a new segment + auto seg = mol.add(SireMol::SegName(segname)); + segments.insert(segname, seg); + } + + seg = segments[segname]; + residue_in_segment = true; + } + + QString is_hetatm("False"); + + if (residue.het_flag == 'H') + is_hetatm = "True"; + + for (const auto &atom : residue.atoms) + { + auto atm = cg.add(SireMol::AtomNum(atom.serial)); + atm.reparent(res.index()); + atm.rename(SireMol::AtomName(QString::fromStdString(atom.name))); + populate_atom(atm, atom, is_hetatm, map); + + serial_to_molnum.insert(atom.serial, molnum); + + if (residue_in_segment) + atm.reparent(seg.index()); + } + } + + auto m = mol.commit(); + mols.add(m); + } + } + + void parse_waters(const gemmi::Entity &entity, SireMol::MoleculeGroup &mols, + const QHash &subchains, + const gemmi::Structure &structure, + QHash &serial_to_molnum, + const SireBase::PropertyMap &map) + { + // parse each residue in each chain into a separate water molecule + + for (const auto &subchain : entity.subchains) + { + auto model_id = subchains.value(QString::fromStdString(subchain), 0); + + int num_waters = 0; + + for (const auto &residue : structure.models[model_id].get_subchain(subchain)) + { + num_waters += 1; + + auto mol = SireMol::MolStructureEditor(SireMol::Molecule().edit()); + mol.renumber(); + mol.rename(SireMol::MolName("WAT")); + auto chain = mol.add(SireMol::ChainName(QString::fromStdString(subchain.c_str()))); + + auto cg = mol.add(SireMol::CGName("0")); + + const auto molnum = cg.molecule().number(); + + int resnum = num_waters; + + if (residue.seqid.num.has_value()) + resnum = residue.seqid.num.value; + + auto res = mol.add(SireMol::ResNum(resnum)); + res.rename(SireMol::ResName(QString::fromStdString(residue.name))); + res.reparent(chain.name()); + + if (residue.seqid.has_icode()) + set_prop(res, "insert_code", QString(residue.seqid.icode), map); + + QString is_hetatm("False"); + + if (residue.het_flag == 'H') + is_hetatm = "True"; + + for (const auto &atom : residue.atoms) + { + auto atm = cg.add(SireMol::AtomNum(atom.serial)); + atm.reparent(res.index()); + atm.rename(SireMol::AtomName(QString::fromStdString(atom.name))); + populate_atom(atm, atom, is_hetatm, map); + + serial_to_molnum.insert(atom.serial, molnum); + } + + auto m = mol.commit().edit(); + m.setProperty(map["is_water"], SireBase::BooleanProperty(true)); + + mols.add(m.commit()); + } + } + } + + SireSystem::System gemmi_to_sire(const gemmi::Structure &orig_structure, + const SireBase::PropertyMap &map) + { + auto mols = SireMol::MoleculeGroup("all"); + + gemmi::Structure structure = orig_structure; + + structure.merge_chain_parts(); + + gemmi::ensure_entities(structure); + + // create a dictionary of to locate which model contains which subchain + QHash subchains; + + for (int i = 0; i < structure.models.size(); ++i) + { + for (auto subchain : structure.models[i].subchains()) + { + auto id = QString::fromStdString(subchain.subchain_id()); + + if (not subchains.contains(id)) + subchains.insert(id, i); + } + } + + QHash serial_to_molnum; + + // this function doesn't appear to be in the gemmi API? + // structure.standardize_crystal_frame(); + + auto structure_name = QString::fromStdString(structure.name); + + QStringList polynames; + + for (const auto &entity : structure.entities) + { + if (entity.entity_type == gemmi::EntityType::Polymer) + { + polynames.append(structure_name); + } + } + + if (polynames.size() > 1) + { + int npolys = 0; + + for (const auto &entity : structure.entities) + { + if (entity.entity_type == gemmi::EntityType::Polymer) + { + polynames[npolys] = QString("[%1]-%2") + .arg(QString::fromStdString(entity.name)) + .arg(polynames[npolys]); + npolys += 1; + } + } + } + + int npolys = 0; + + for (const auto &entity : structure.entities) + { + switch (entity.entity_type) + { + case gemmi::EntityType::Polymer: + parse_polymer(entity, mols, subchains, structure, + serial_to_molnum, polynames[npolys], map); + npolys += 1; + break; + case gemmi::EntityType::NonPolymer: + parse_molecules(entity, mols, subchains, structure, + serial_to_molnum, map); + break; + case gemmi::EntityType::Branched: + parse_molecules(entity, mols, subchains, structure, + serial_to_molnum, map); + break; + case gemmi::EntityType::Water: + parse_waters(entity, mols, subchains, structure, + serial_to_molnum, map); + break; + default: + parse_molecules(entity, mols, subchains, structure, + serial_to_molnum, map); + } + } + + // now need to handle any connections + const auto connectivity_property = map["connectivity"]; + + if (connectivity_property.hasSource()) + { + bool auto_connect = true; + + if (map.specified("auto_connect")) + { + auto_connect = map["auto_connect"].value().asABoolean(); + } + + QHash connectivities; + + for (const auto &connection : structure.connections) + { + const auto model_num = subchains.value(QString::fromStdString(connection.partner1.chain_name), 0); + + // the connection must be between two atoms in the same model + const auto &atom1 = structure.models[model_num].find_cra(connection.partner1); + const auto &atom2 = structure.models[model_num].find_cra(connection.partner2); + + if (atom1.atom == 0 or atom2.atom == 0) + { + // one of the atoms is missing + continue; + } + + // get the atom numbers of both atoms + const auto serial1 = atom1.atom->serial; + const auto serial2 = atom2.atom->serial; + + // find the atoms - the serial is the AtomNum and should be unique + const auto molnum1 = serial_to_molnum.value(serial1, SireMol::MolNum(0)); + const auto molnum2 = serial_to_molnum.value(serial2, SireMol::MolNum(0)); + + if (molnum1 == SireMol::MolNum(0) or molnum1 != molnum2) + { + // bonds between molecules? Or something else wrong? + continue; + } + + // get the connectivity for this molecule + if (not connectivities.contains(molnum1)) + { + const auto mol = mols.molecule(molnum1).molecule(); + + // create a new connectivity + if (auto_connect) + { + try + { + // if we are auto-connecting, then we need to auto-generate + // the connectivity now, so that the CONECT records are added + // to this. If we don't, then the later stage of code won't + // want to overwrite the connectivity + auto hunter = SireMol::CovalentBondHunter(); + connectivities.insert(molnum1, hunter(mol).edit()); + } + catch (...) + { + connectivities.insert(molnum1, SireMol::ConnectivityEditor(mol)); + } + } + else + { + connectivities.insert(molnum1, SireMol::ConnectivityEditor(mol)); + } + } + + auto &connectivity = connectivities[molnum1]; + connectivity.connect(SireMol::AtomNum(serial1), SireMol::AtomNum(serial2)); + } + + for (auto it = connectivities.constBegin(); it != connectivities.constEnd(); ++it) + { + auto connectivity = it.value().commit(); + + auto molnum = it.key(); + + auto mol = mols.molecule(molnum).molecule().edit(); + + mol.setProperty(connectivity_property, connectivity); + + mols.update(mol.commit()); + } + } + + SireSystem::System system; + system.setName(structure_name); + + system.add(mols); + + if (map.specified("metadata")) + { + auto metadata = map["metadata"].value().asA(); + system.setProperty("metadata", metadata); + } + + return system; + } + + bool populate_atom(gemmi::Atom &gemmi_atom, + const SireMol::Atom &atom, + const SireBase::PropertyMap &map) + { + gemmi_atom.name = atom.name().value().toStdString(); + gemmi_atom.serial = atom.number().value(); + + auto coords = atom.property(map["coordinates"]); + gemmi_atom.pos = gemmi::Position(coords.x(), coords.y(), coords.z()); + + try + { + auto element = atom.property(map["element"]); + gemmi_atom.element = gemmi::Element(element.nProtons()); + } + catch (...) + { + } + + try + { + auto chg = atom.property(map["formal_charge"]).to(SireUnits::mod_electron); + gemmi_atom.charge = int(chg); + } + catch (...) + { + } + + try + { + auto occ = atom.property(map["occupancy"]); + gemmi_atom.occ = occ; + } + catch (...) + { + } + + try + { + auto b_iso = atom.property(map["beta_factor"]); + gemmi_atom.b_iso = b_iso; + } + catch (...) + { + } + + bool is_hetatm = false; + + try + { + auto is_hetatm = atom.property(map["is_het"]); + + if (is_hetatm == "True") + is_hetatm = true; + } + catch (...) + { + } + + try + { + auto alt_loc = atom.property(map["alt_loc"]); + + if (not alt_loc.isEmpty()) + gemmi_atom.altloc = alt_loc.toStdString()[0]; + } + catch (...) + { + } + + return is_hetatm; + } + + void convert_polymer(int molid, + const SireMol::Molecule &mol, gemmi::Chain &chain, + QHash &chains, + const SireBase::PropertyMap &map) + { + const auto residues = mol.residues(); + + const auto entity_id = QString::number(molid).toStdString(); + + for (int i = 0; i < residues.count(); ++i) + { + const auto residue = residues(i); + + gemmi::Residue gemmi_residue; + gemmi_residue.entity_type = gemmi::EntityType::Polymer; + gemmi_residue.entity_id = entity_id; + + gemmi_residue.name = residue.name().value().toStdString(); + gemmi_residue.seqid.num = residue.number().value(); + + const auto atoms = residue.atoms(); + + bool is_hetatm_residue = false; + + QString seg; + + for (int j = 0; j < atoms.count(); ++j) + { + const auto atom = atoms(j); + + gemmi::Atom gemmi_atom; + auto is_hetatm = populate_atom(gemmi_atom, atom, map); + + is_hetatm_residue = is_hetatm_residue or is_hetatm; + + gemmi_residue.atoms.push_back(gemmi_atom); + + if (atom.isWithinSegment()) + seg = atom.segment().name().value(); + } + + if (not seg.isEmpty()) + gemmi_residue.segment = seg.toStdString(); + + if (is_hetatm_residue) + gemmi_residue.het_flag = 'H'; + else + gemmi_residue.het_flag = 'A'; + + if (residue.isWithinChain()) + { + gemmi_residue.subchain = residue.chain().name().value().toStdString(); + chains.find(residue.chain().name().value()).value().residues.push_back(gemmi_residue); + } + else + { + chain.residues.push_back(gemmi_residue); + } + } + } + + void convert_molecule(int molid, + const SireMol::Molecule &mol, gemmi::Chain &chain, + QHash &chains, + const SireBase::PropertyMap &map) + { + const auto residues = mol.residues(); + + const auto entity_id = QString::number(molid).toStdString(); + + for (int i = 0; i < residues.count(); ++i) + { + const auto residue = residues(i); + + gemmi::Residue gemmi_residue; + gemmi_residue.entity_type = gemmi::EntityType::NonPolymer; + gemmi_residue.entity_id = entity_id; + + gemmi_residue.name = residue.name().value().toStdString(); + gemmi_residue.seqid.num = residue.number().value(); + + const auto atoms = residue.atoms(); + + bool is_hetatm_residue = false; + + QString seg; + + for (int j = 0; j < atoms.count(); ++j) + { + const auto atom = atoms(j); + + gemmi::Atom gemmi_atom; + auto is_hetatm = populate_atom(gemmi_atom, atom, map); + + is_hetatm_residue = is_hetatm_residue or is_hetatm; + + if (atom.isWithinSegment()) + seg = atom.segment().name().value(); + + gemmi_residue.atoms.push_back(gemmi_atom); + } + + if (not seg.isEmpty()) + gemmi_residue.segment = seg.toStdString(); + + if (is_hetatm_residue) + gemmi_residue.het_flag = 'H'; + else + gemmi_residue.het_flag = 'A'; + + if (residue.isWithinChain()) + { + gemmi_residue.subchain = residue.chain().name().value().toStdString(); + chains.find(residue.chain().name().value()).value().residues.push_back(gemmi_residue); + } + else + { + chain.residues.push_back(gemmi_residue); + } + } + } + + void convert_water(int molid, + const SireMol::Molecule &mol, gemmi::Chain &chain, + QHash &chains, + const SireBase::PropertyMap &map) + { + gemmi::Residue residue; + residue.entity_type = gemmi::EntityType::Water; + residue.entity_id = QString::number(molid).toStdString(); + + const auto atoms = mol.atoms(); + + if (atoms.isEmpty()) + return; + + auto first_res = atoms(0).residue(); + residue.name = first_res.name().value().toStdString(); + residue.seqid.num = first_res.number().value(); + + bool is_hetatm_residue = false; + + for (int i = 0; i < atoms.count(); ++i) + { + const auto atom = atoms(i); + + gemmi::Atom gemmi_atom; + auto is_hetatm = populate_atom(gemmi_atom, atom, map); + + is_hetatm_residue = is_hetatm_residue or is_hetatm; + + residue.atoms.push_back(gemmi_atom); + } + + if (is_hetatm_residue) + residue.het_flag = 'H'; + else + residue.het_flag = 'A'; + + if (first_res.isWithinChain()) + { + residue.subchain = first_res.chain().name().value().toStdString(); + chains.find(first_res.chain().name().value()).value().residues.push_back(residue); + } + else + { + chain.residues.push_back(residue); + } + } + + gemmi::Structure sire_to_gemmi(const SireSystem::System &system, + const SireBase::PropertyMap &map) + { + if (system.nAtoms() == 0) + return gemmi::Structure(); + + const auto mols = SireMol::SelectorMol(system); + + gemmi::Structure structure; + gemmi::Model model(system.name().value().toStdString()); + + auto name = system.name().value().toStdString(); + + if (name.empty()) + { + // try to find a name from the molecules + for (const auto &mol : mols) + { + name = mol.name().value().toStdString(); + + if (not name.empty()) + break; + } + + if (name.empty()) + name = "sire_system"; + } + + structure.name = name; + + // create a real chain for each named chain in the molecules + QHash chains; + + for (const auto &mol : mols) + { + if (mol.nChains() > 0) + { + const auto c = mol.chains(); + + for (int i = 0; i < c.count(); ++i) + { + auto name = c(i).name().value(); + + if (not chains.contains(name)) + chains.insert(name, gemmi::Chain(name.toStdString())); + } + } + } + + // also create a single chain for all other molecules + int chain_num = 1; + auto chain_name = QString::number(chain_num).toStdString(); + + while (true) + { + if (not chains.contains(QString::number(chain_num))) + { + break; + } + + chain_num += 1; + chain_name = QString::number(chain_num).toStdString(); + } + + auto chain = gemmi::Chain(chain_name); + + QHash molnum_to_chain; + + int molid = 0; + + for (const auto &mol : mols) + { + molid += 1; + + if (SireMol::is_water(mol, map)) + { + convert_water(molid, mol, chain, chains, map); + } + else if (mol.nAtoms() == 1) + { + if (mol.atoms()(0).property(map["element"]) == SireMol::Element(8)) + { + // single oxygen is a water without hydrogens + convert_water(molid, mol, chain, chains, map); + } + else + { + // convert as a normal molecule + convert_molecule(molid, mol, chain, chains, map); + } + } + else if (mol.nResidues() > 1) + { + convert_polymer(molid, mol, chain, chains, map); + } + else + { + // convert as a normal molecule + convert_molecule(molid, mol, chain, chains, map); + } + } + + auto chain_names = chains.keys(); + chain_names.sort(); + + for (const auto &name : chain_names) + { + auto &chain = chains.find(name).value(); + + if (not chain.residues.empty()) + model.chains.push_back(chain); + } + + if (not chain.residues.empty()) + model.chains.push_back(chain); + + structure.models.push_back(model); + + structure.renumber_models(); + gemmi::setup_entities(structure); + gemmi::assign_serial_numbers(structure, true); + + // now we have done this, we need to add in all of the bonds + const auto connectivity_property = map["connectivity"]; + + for (const auto &mol : mols) + { + try + { + const auto atoms = mol.atoms(); + const int nats = atoms.count(); + + std::vector chain_names; + + const auto residues = mol.residues(); + + for (int i = 0; i < residues.count(); ++i) + { + const auto res = residues(i); + + if (res.isWithinChain()) + { + auto name = res.chain().name().value().toStdString(); + chain_names.push_back(name); + } + else + { + chain_names.push_back(chain_name); + } + } + + const auto &connectivity = mol.property(connectivity_property).asA(); + + for (int i = 0; i < nats - 1; ++i) + { + const auto idx0 = SireMol::AtomIdx(i); + const auto atom0 = atoms(i); + + auto connections = connectivity.connectionsTo(idx0); + + if (connections.isEmpty()) + continue; + + gemmi::AtomAddress addr0; + + auto res0 = atom0.residue(); + + addr0.chain_name = chain_names[res0.index().value()]; + addr0.res_id.name = res0.name().value().toStdString(); + addr0.res_id.seqid.num = res0.number().value(); + addr0.atom_name = atom0.name().value().toStdString(); + + auto cra0 = model.find_cra(addr0, true); + + for (const auto &idx1 : connections) + { + if (idx1.value() <= i) + continue; + + const auto atom1 = atoms(idx1.value()); + const auto res1 = atom1.residue(); + + gemmi::AtomAddress addr1; + + addr1.chain_name = chain_names[res1.index().value()]; + addr1.res_id.name = res1.name().value().toStdString(); + addr1.res_id.seqid.num = res1.number().value(); + addr1.atom_name = atom1.name().value().toStdString(); + + auto cra1 = model.find_cra(addr1, true); + + gemmi::Connection connection; + connection.partner1 = addr0; + connection.partner2 = addr1; + + structure.connections.push_back(connection); + } + } + } + catch (...) + { + // no connectivity for this molecule + continue; + } + } + + return structure; + } + + QString _string_to_property(const std::string &s) + { + QString q = QString::fromStdString(s); + + if (q.startsWith("\"") and q.endsWith("\"")) + q = q.mid(1, q.size() - 2); + else if (q.startsWith("'") and q.endsWith("'")) + q = q.mid(1, q.size() - 2); + + return q; + } + + SireSystem::System pdbx_reader_function(const QStringList &lines, + const SireBase::PropertyMap &map) + { + // assemble all of the line into a single string + auto input_string = lines.join("\n").toStdString(); + + auto doc = cif::Document(cif::read_string(input_string)); + + input_string.clear(); + + int structure_block = -1; + + // mmCIF files for deposition may have more than one block: + // coordinates in the first block and restraints in the others. + for (size_t i = 0; i < doc.blocks.size(); ++i) + { + if (doc.blocks[i].has_tag("_atom_site.id")) + { + if (structure_block != -1) + throw SireError::unsupported( + QObject::tr("2+ blocks are ok if only the first one has coordinates. " + "_atom_site in block #%1 : %2") + .arg(i + 1) + .arg(QString::fromStdString(doc.source)), + CODELOC); + + structure_block = i; + } + } + + if (structure_block == -1) + // just use the first block + structure_block = 0; + + auto structure = gemmi::make_structure_from_block(doc.blocks.at(structure_block)); + + // check for any metadata - if there is, then add it to the map + auto *block = doc.find_block("sire"); + SireBase::Properties metadata; + + if (block != nullptr) + { + for (const auto &item : block->items) + { + switch (item.type) + { + case cif::ItemType::Pair: + { + const auto &p = item.pair; + + QString tag = QString::fromStdString(p[0]); + QString value = _string_to_property(p[1]); + + if (tag.startsWith("_")) + tag = tag.mid(1); + + metadata.setProperty(tag, SireBase::StringProperty(value)); + break; + } + case cif::ItemType::Loop: + { + const auto &l = item.loop; + + QStringList tags; + + for (const auto &tag : l.tags) + { + tags.append(QString::fromStdString(tag)); + } + + QStringList values; + + for (const auto &value : l.values) + { + values.append(_string_to_property(value)); + } + + if (tags.size() == 1 and tags[0].endsWith(".value")) + { + // this is an array of values + auto array = SireBase::PropertyList(); + + for (const auto &value : values) + { + array.append(SireBase::StringProperty(value)); + } + + auto tag = tags[0]; + + if (tag.startsWith("_")) + tag = tag.mid(1); + + // remove the .value on the end + tag = tag.left(tag.size() - 6); + + metadata.setProperty(tag, array); + } + else + { + // this is a set of property values + auto props = SireBase::Properties(); + + if (tags.size() == values.size()) + { + // one value per key + for (int i = 0; i < tags.size(); ++i) + { + auto subtag = tags[i].split(".").mid(1).join("."); + props.setProperty(subtag, SireBase::StringProperty(values[i])); + } + } + else + { + // multiple values per key + for (int i = 0; i < tags.size(); ++i) + { + auto subtag = tags[i].split(".").mid(1).join("."); + + auto subvals = SireBase::StringArrayProperty(); + + for (int j = i; j < values.size(); j += tags.size()) + { + subvals.append(values[j]); + } + + props.setProperty(subtag, subvals); + } + } + + auto tag = tags[0].split(".").at(0); + + if (tag.startsWith("_")) + tag = tag.mid(1); + + metadata.setProperty(tag, props); + } + + break; + } + } + } + } + + auto m = map; + + if (not metadata.isEmpty()) + { + m.set("metadata", metadata); + } + + return gemmi_to_sire(structure, m); + } + + std::string _property_to_string(const SireBase::Property &p) + { + QString s; + + try + { + s = p.asAString(); + } + catch (...) + { + s = p.toString(); + } + + // make sure we put any strings that contain spaces into quotes + if (s.contains(" ")) + s = QString("\"%1\"").arg(s); + + return s.toStdString(); + } + + QStringList pdbx_writer_function(const SireSystem::System &system, + const SireBase::PropertyMap &map) + { + auto structure = sire_to_gemmi(system, map); + + auto doc = gemmi::make_mmcif_document(structure); + + if (system.containsProperty(map["metadata"])) + { + auto &block = doc.add_new_block("sire"); + + auto metadata = system.property(map["metadata"]).asA(); + + auto keys = metadata.propertyKeys(); + keys.sort(); + + for (const auto &key : keys) + { + const auto &value = metadata.property(key); + + if (value.isA()) + { + const auto &props2 = value.asA(); + auto keys2 = props2.propertyKeys(); + keys2.sort(); + + std::vector tags; + + int nrows = 0; + + for (const auto &key2 : keys2) + { + tags.push_back(QString(".%1").arg(key2.simplified().replace(" ", "_")).toStdString()); + + const auto &value2 = props2.property(key2); + + if (value2.isAnArray()) + { + nrows = std::max(nrows, value2.asAnArray().count()); + } + } + + auto &loop = block.init_loop(QString("_%1").arg(key.simplified().replace(" ", "_")).toStdString(), tags); + + for (int i = 0; i < nrows; ++i) + { + std::vector values; + + for (const auto &key2 : keys2) + { + const auto &value2 = props2.property(key2); + + if (value2.isAnArray()) + { + const auto &array2 = value2.asAnArray(); + + if (i < array2.count()) + { + values.push_back(_property_to_string(array2[i])); + } + else + { + values.push_back("\"\""); + } + } + else + { + values.push_back(_property_to_string(value2)); + } + } + + loop.add_row(values); + } + } + else if (value.isAnArray()) + { + auto tag = QString("_%1").arg(key.simplified().replace(" ", "_")); + + auto array = value.asAnArray(); + + if (array.count() == 1) + { + block.set_pair(tag.toStdString(), _property_to_string(array[0])); + } + else + { + auto &loop = block.init_loop(tag.toStdString(), {".value"}); + + for (int i = 0; i < array.size(); ++i) + { + loop.add_row({_property_to_string(array[i])}); + } + } + } + else + { + auto tag = QString("_%1").arg(key.simplified().replace(" ", "_")); + + doc.blocks[0].set_pair(tag.toStdString(), _property_to_string(value)); + } + } + } + + std::stringstream stream; + + gemmi::cif::write_cif_to_stream(stream, doc); + + stream.flush(); + + auto lines = QString::fromStdString(stream.str()).split("\n"); + + return lines; + } + + void register_pdbx_loader() + { + SireIO::PDBxReaderFunction reader_function(&pdbx_reader_function); + SireIO::PDBxWriterFunction writer_function(&pdbx_writer_function); + + SireIO::register_pdbx_loader_functions(writer_function, reader_function); + } +} diff --git a/wrapper/Convert/SireGemmi/sire_gemmi.h b/wrapper/Convert/SireGemmi/sire_gemmi.h new file mode 100644 index 000000000..99888b775 --- /dev/null +++ b/wrapper/Convert/SireGemmi/sire_gemmi.h @@ -0,0 +1,24 @@ +#ifndef SIRE_GEMMI_H +#define SIRE_GEMMI_H + +#include "gemmi/mmcif.hpp" + +#include "SireSystem/system.h" + +#include "SireBase/propertymap.h" + +#include + +namespace SireGemmi +{ + + SireSystem::System gemmi_to_sire(const gemmi::Structure &structure, + const SireBase::PropertyMap &map); + + gemmi::Structure sire_to_gemmi(const SireSystem::System &system, + const SireBase::PropertyMap &map); + + void register_pdbx_loader(); +} + +#endif diff --git a/wrapper/Convert/SireOpenMM/CMakeLists.txt b/wrapper/Convert/SireOpenMM/CMakeLists.txt index bee36a1f6..ffde94ed2 100644 --- a/wrapper/Convert/SireOpenMM/CMakeLists.txt +++ b/wrapper/Convert/SireOpenMM/CMakeLists.txt @@ -24,6 +24,25 @@ if (${SIRE_USE_OPENMM}) # Other python wrapping directories include_directories(${CMAKE_SOURCE_DIR}) + # Check to see if we have support for updating some parameters in context + include(CheckCXXSourceCompiles) + check_cxx_source_compiles( "#include + int main() { + OpenMM::CustomNonbondedForce *force; + OpenMM::Context *context; + force->updateSomeParametersInContext(0, 0, *context); + return 0; + }" + SIREOPENMM_HAS_UPDATESOMEPARAMETERSINCONTEXT ) + + if ( ${SIREOPENMM_HAS_UPDATESOMEPARAMETERSINCONTEXT} ) + message( STATUS "OpenMM has support for updating some parameters in context") + add_definitions("-DSIRE_HAS_UPDATE_SOME_IN_CONTEXT") + else() + message( STATUS "OpenMM does not have support for updating some parameters in context") + message( STATUS "The free energy code will be a little slower.") + endif() + # Define the sources in SireOpenMM set ( SIREOPENMM_SOURCES diff --git a/wrapper/Convert/SireOpenMM/_sommcontext.py b/wrapper/Convert/SireOpenMM/_sommcontext.py index 5e27ed254..6af12732e 100644 --- a/wrapper/Convert/SireOpenMM/_sommcontext.py +++ b/wrapper/Convert/SireOpenMM/_sommcontext.py @@ -66,13 +66,14 @@ def __init__( # place the coordinates and velocities into the context _set_openmm_coordinates_and_velocities(self, metadata) - self._lambda_value = self._lambda_lever.set_lambda( - self, lambda_value - ) + self._lambda_value = self._lambda_lever.set_lambda(self, lambda_value) + + self._map = map else: self._atom_index = None self._lambda_lever = None self._lambda_value = 0.0 + self._map = None def __str__(self): p = self.getPlatform() @@ -88,6 +89,24 @@ def __str__(self): def __repr__(self): return self.__str__() + def get_constraint(self): + """ + Return the constraint applied to the system + """ + if self._map.specified("constraint"): + return self._map["constraint"].source() + else: + return None + + def get_perturbable_constraint(self): + """ + Return the perturbable constraint applied to the system + """ + if self._map.specified("perturbable_constraint"): + return self._map["perturbable_constraint"].source() + else: + return None + def get_platform(self): """ Return the platform used for this simulation @@ -235,10 +254,7 @@ def get_potential_energy(self, to_sire_units: bool = True): import openmm from ...units import kcal_per_mol - return ( - nrg.value_in_unit(openmm.unit.kilocalorie_per_mole) - * kcal_per_mol - ) + return nrg.value_in_unit(openmm.unit.kilocalorie_per_mole) * kcal_per_mol else: return nrg diff --git a/wrapper/Convert/SireOpenMM/lambdalever.cpp b/wrapper/Convert/SireOpenMM/lambdalever.cpp index 0f014f58f..a3cfccff6 100644 --- a/wrapper/Convert/SireOpenMM/lambdalever.cpp +++ b/wrapper/Convert/SireOpenMM/lambdalever.cpp @@ -35,6 +35,125 @@ using namespace SireOpenMM; using namespace SireCAS; +////// +////// Implementation of MolLambdaCache +////// + +MolLambdaCache::MolLambdaCache() : lam_val(0) +{ +} + +MolLambdaCache::MolLambdaCache(double lam) : lam_val(lam) +{ +} + +MolLambdaCache::MolLambdaCache(const MolLambdaCache &other) + : lam_val(other.lam_val), cache(other.cache) +{ +} + +MolLambdaCache::~MolLambdaCache() +{ +} + +MolLambdaCache &MolLambdaCache::operator=(const MolLambdaCache &other) +{ + if (this != &other) + { + lam_val = other.lam_val; + cache = other.cache; + } + + return *this; +} + +const QVector &MolLambdaCache::morph(const LambdaSchedule &schedule, + const QString &key, + const QVector &initial, + const QVector &final) const +{ + auto nonconst_this = const_cast(this); + + QReadLocker lkr(&(nonconst_this->lock)); + + auto it = cache.constFind(key); + + if (it != cache.constEnd()) + return it.value(); + + lkr.unlock(); + + QWriteLocker wkr(&(nonconst_this->lock)); + + // check that someone didn't beat us to create the values + it = cache.constFind(key); + + if (it != cache.constEnd()) + return it.value(); + + // create the values + nonconst_this->cache.insert(key, schedule.morph(key, initial, final, lam_val)); + + return cache.constFind(key).value(); +} + +////// +////// Implementation of LeverCache +////// + +LeverCache::LeverCache() +{ +} + +LeverCache::LeverCache(const LeverCache &other) : cache(other.cache) +{ +} + +LeverCache::~LeverCache() +{ +} + +LeverCache &LeverCache::operator=(const LeverCache &other) +{ + if (this != &other) + { + cache = other.cache; + } + + return *this; +} + +const MolLambdaCache &LeverCache::get(int molidx, double lam_val) const +{ + auto nonconst_this = const_cast(this); + + if (not this->cache.contains(molidx)) + { + nonconst_this->cache.insert(molidx, QHash()); + } + + auto &mol_cache = nonconst_this->cache.find(molidx).value(); + + auto it = mol_cache.constFind(lam_val); + + if (it == mol_cache.constEnd()) + { + // need to create a new cache for this lambda value + it = mol_cache.insert(lam_val, MolLambdaCache(lam_val)); + } + + return it.value(); +} + +void LeverCache::clear() +{ + cache.clear(); +} + +////// +////// Implementation of LambdaLever +////// + LambdaLever::LambdaLever() : SireBase::ConcreteProperty() { } @@ -46,7 +165,8 @@ LambdaLever::LambdaLever(const LambdaLever &other) lambda_schedule(other.lambda_schedule), perturbable_mols(other.perturbable_mols), start_indicies(other.start_indicies), - perturbable_maps(other.perturbable_maps) + perturbable_maps(other.perturbable_maps), + lambda_cache(other.lambda_cache) { } @@ -64,6 +184,7 @@ LambdaLever &LambdaLever::operator=(const LambdaLever &other) perturbable_mols = other.perturbable_mols; start_indicies = other.start_indicies; perturbable_maps = other.perturbable_maps; + lambda_cache = other.lambda_cache; Property::operator=(other); } @@ -108,6 +229,7 @@ bool LambdaLever::hasLever(const QString &lever_name) void LambdaLever::addLever(const QString &lever_name) { this->lambda_schedule.addLever(lever_name); + this->lambda_cache.clear(); } /** Get the index of the force called 'name'. Returns -1 if @@ -253,84 +375,90 @@ double LambdaLever::setLambda(OpenMM::Context &context, std::vector custom_params = {0.0, 0.0, 0.0, 0.0}; + // record the range of indicies of the atoms which change + int start_change_atom = -1; + int end_change_atom = -1; + // change the parameters for all of the perturbable molecules for (int i = 0; i < this->perturbable_mols.count(); ++i) { const auto &perturbable_mol = this->perturbable_mols[i]; const auto &start_idxs = this->start_indicies[i]; + const auto &cache = this->lambda_cache.get(i, lambda_value); + // calculate the new parameters for this lambda value - const auto morphed_charges = this->lambda_schedule.morph( + const auto morphed_charges = cache.morph( + this->lambda_schedule, "charge", perturbable_mol.getCharges0(), - perturbable_mol.getCharges1(), - lambda_value); + perturbable_mol.getCharges1()); - const auto morphed_sigmas = this->lambda_schedule.morph( + const auto morphed_sigmas = cache.morph( + this->lambda_schedule, "sigma", perturbable_mol.getSigmas0(), - perturbable_mol.getSigmas1(), - lambda_value); + perturbable_mol.getSigmas1()); - const auto morphed_epsilons = this->lambda_schedule.morph( + const auto morphed_epsilons = cache.morph( + this->lambda_schedule, "epsilon", perturbable_mol.getEpsilons0(), - perturbable_mol.getEpsilons1(), - lambda_value); + perturbable_mol.getEpsilons1()); - const auto morphed_alphas = this->lambda_schedule.morph( + const auto morphed_alphas = cache.morph( + this->lambda_schedule, "alpha", perturbable_mol.getAlphas0(), - perturbable_mol.getAlphas1(), - lambda_value); + perturbable_mol.getAlphas1()); - const auto morphed_bond_k = this->lambda_schedule.morph( + const auto morphed_bond_k = cache.morph( + this->lambda_schedule, "bond_k", perturbable_mol.getBondKs0(), - perturbable_mol.getBondKs1(), - lambda_value); + perturbable_mol.getBondKs1()); - const auto morphed_bond_length = this->lambda_schedule.morph( + const auto morphed_bond_length = cache.morph( + this->lambda_schedule, "bond_length", perturbable_mol.getBondLengths0(), - perturbable_mol.getBondLengths1(), - lambda_value); + perturbable_mol.getBondLengths1()); - const auto morphed_angle_k = this->lambda_schedule.morph( + const auto morphed_angle_k = cache.morph( + this->lambda_schedule, "angle_k", perturbable_mol.getAngleKs0(), - perturbable_mol.getAngleKs1(), - lambda_value); + perturbable_mol.getAngleKs1()); - const auto morphed_angle_size = this->lambda_schedule.morph( + const auto morphed_angle_size = cache.morph( + this->lambda_schedule, "angle_size", perturbable_mol.getAngleSizes0(), - perturbable_mol.getAngleSizes1(), - lambda_value); + perturbable_mol.getAngleSizes1()); - const auto morphed_torsion_phase = this->lambda_schedule.morph( + const auto morphed_torsion_phase = cache.morph( + this->lambda_schedule, "torsion_phase", perturbable_mol.getTorsionPhases0(), - perturbable_mol.getTorsionPhases1(), - lambda_value); + perturbable_mol.getTorsionPhases1()); - const auto morphed_torsion_k = this->lambda_schedule.morph( + const auto morphed_torsion_k = cache.morph( + this->lambda_schedule, "torsion_k", perturbable_mol.getTorsionKs0(), - perturbable_mol.getTorsionKs1(), - lambda_value); + perturbable_mol.getTorsionKs1()); - const auto morphed_charge_scale = this->lambda_schedule.morph( + const auto morphed_charge_scale = cache.morph( + this->lambda_schedule, "charge_scale", perturbable_mol.getChargeScales0(), - perturbable_mol.getChargeScales1(), - lambda_value); + perturbable_mol.getChargeScales1()); - const auto morphed_lj_scale = this->lambda_schedule.morph( + const auto morphed_lj_scale = cache.morph( + this->lambda_schedule, "lj_scale", perturbable_mol.getLJScales0(), - perturbable_mol.getLJScales1(), - lambda_value); + perturbable_mol.getLJScales1()); // now update the forcefields int start_index = start_idxs.value("clj", -1); @@ -339,6 +467,16 @@ double LambdaLever::setLambda(OpenMM::Context &context, { const int nparams = morphed_charges.count(); + if (start_change_atom == -1) + { + start_change_atom = start_index; + end_change_atom = start_index + nparams; + } + else if (start_index >= end_change_atom) + { + end_change_atom = start_index + nparams; + } + if (have_ghost_atoms) { for (int j = 0; j < nparams; ++j) @@ -532,17 +670,22 @@ double LambdaLever::setLambda(OpenMM::Context &context, cljff->updateParametersInContext(context); if (ghost_ghostff) +#ifdef SIRE_HAS_UPDATE_SOME_IN_CONTEXT + ghost_ghostff->updateSomeParametersInContext(start_change_atom, end_change_atom - start_change_atom, context); +#else ghost_ghostff->updateParametersInContext(context); +#endif if (ghost_nonghostff) +#ifdef SIRE_HAS_UPDATE_SOME_IN_CONTEXT + ghost_nonghostff->updateSomeParametersInContext(start_change_atom, end_change_atom - start_change_atom, context); +#else ghost_nonghostff->updateParametersInContext(context); +#endif if (ghost_14ff) ghost_14ff->updateParametersInContext(context); - // in OpenMM 8.1beta updating the bond parameters past lambda=0.25 - // causes a "All Forces must have identical exclusions" error, - // when running minimisation without h-bond constraints... if (bondff) bondff->updateParametersInContext(context); @@ -767,6 +910,7 @@ int LambdaLever::addPerturbableMolecule(const OpenMMMolecule &molecule, this->perturbable_mols.append(PerturbableOpenMMMolecule(molecule)); this->start_indicies.append(starts); this->perturbable_maps.insert(molecule.number, molecule.perturtable_map); + this->lambda_cache.clear(); return this->perturbable_mols.count() - 1; } @@ -805,4 +949,6 @@ void LambdaLever::setSchedule(const LambdaSchedule &sched) { lambda_schedule.addLever(lever); } + + this->lambda_cache.clear(); } diff --git a/wrapper/Convert/SireOpenMM/lambdalever.h b/wrapper/Convert/SireOpenMM/lambdalever.h index ee158ce25..f934c7d4a 100644 --- a/wrapper/Convert/SireOpenMM/lambdalever.h +++ b/wrapper/Convert/SireOpenMM/lambdalever.h @@ -33,10 +33,52 @@ #include "SireCAS/lambdaschedule.h" +#include + +#include + SIRE_BEGIN_HEADER namespace SireOpenMM { + class MolLambdaCache + { + public: + MolLambdaCache(); + MolLambdaCache(double lam_val); + MolLambdaCache(const MolLambdaCache &other); + ~MolLambdaCache(); + + MolLambdaCache &operator=(const MolLambdaCache &other); + + const QVector &morph(const SireCAS::LambdaSchedule &schedule, + const QString &key, + const QVector &initial, + const QVector &final) const; + + private: + QHash> cache; + QReadWriteLock lock; + double lam_val; + }; + + class LeverCache + { + public: + LeverCache(); + LeverCache(const LeverCache &other); + ~LeverCache(); + + LeverCache &operator=(const LeverCache &other); + + const MolLambdaCache &get(int molidx, double lam_val) const; + + void clear(); + + private: + QHash> cache; + }; + /** This is a lever that is used to change the parameters in an OpenMM * context according to a lambda value. This is actually a collection * of levers, each of which is controlled by the main lever. @@ -117,6 +159,9 @@ namespace SireOpenMM /** All of the property maps for the perturbable molecules */ QHash perturbable_maps; + + /** Cache of the parameters for different lambda values */ + LeverCache lambda_cache; }; #ifndef SIRE_SKIP_INLINE_FUNCTION diff --git a/wrapper/Convert/SireOpenMM/sire_to_openmm_system.cpp b/wrapper/Convert/SireOpenMM/sire_to_openmm_system.cpp index a7c7f47e4..92348aed1 100644 --- a/wrapper/Convert/SireOpenMM/sire_to_openmm_system.cpp +++ b/wrapper/Convert/SireOpenMM/sire_to_openmm_system.cpp @@ -479,6 +479,10 @@ void _set_clj_cutoff(OpenMM::NonbondedForce &cljff, const auto cutoff = ffinfo.cutoff().to(SireUnits::nanometers); cljff.setCutoffDistance(cutoff); } + else + { + cljff.setNonbondedMethod(OpenMM::NonbondedForce::NoCutoff); + } } /** Set the periodic space box vectors in the system, returning diff --git a/wrapper/Convert/__init__.py b/wrapper/Convert/__init__.py index 88d4c7158..e49865b76 100644 --- a/wrapper/Convert/__init__.py +++ b/wrapper/Convert/__init__.py @@ -11,6 +11,8 @@ "openmm_extract_coordinates", "openmm_extract_coordinates_and_velocities", "openmm_extract_space", + "sire_to_gemmi", + "gemmi_to_sire", "supported_formats", ] @@ -129,6 +131,7 @@ def sire_to_openmm(mols, map): f"'{timestep}'" ) + timestep_in_fs = timestep.to(femtosecond) timestep = timestep.to(picosecond) * openmm.unit.picosecond ensemble = Ensemble(map=map) @@ -170,36 +173,21 @@ def sire_to_openmm(mols, map): use_andersen = False temperature = None - if integrator is None: - if ensemble.is_nve(): - integrator = openmm.VerletIntegrator(timestep) - else: - integrator = openmm.LangevinMiddleIntegrator( - ensemble.temperature().to(kelvin) * openmm.unit.kelvin, - friction, - timestep, - ) - - temperature = ( - ensemble.temperature().to(kelvin) * openmm.unit.kelvin - ) + if isinstance(integrator, str): + from ...options import Integrator - elif type(integrator) is str: - integrator = integrator.lower() + integrator = Integrator.create(integrator) if integrator == "verlet" or integrator == "leapfrog": if not ensemble.is_nve(): raise ValueError( - "You cannot use a verlet integrator with the " - f"{ensemble}" + "You cannot use a verlet integrator with the " f"{ensemble}" ) integrator = openmm.VerletIntegrator(timestep) - else: - temperature = ( - ensemble.temperature().to(kelvin) * openmm.unit.kelvin - ) + elif integrator != "auto": + temperature = ensemble.temperature().to(kelvin) * openmm.unit.kelvin if ensemble.is_nve(): raise ValueError( @@ -236,12 +224,57 @@ def sire_to_openmm(mols, map): else: raise ValueError(f"Unrecognised integrator {integrator}") + if integrator is None: + if ensemble.is_nve(): + integrator = openmm.VerletIntegrator(timestep) + else: + integrator = openmm.LangevinMiddleIntegrator( + ensemble.temperature().to(kelvin) * openmm.unit.kelvin, + friction, + timestep, + ) + + temperature = ensemble.temperature().to(kelvin) * openmm.unit.kelvin elif openmm.Integrator not in type(integrator).mro(): raise TypeError( f"Cannot cast the integrator {integrator} to the correct " "type. It should be a string or an openmm.Integrator object" ) + if map.specified("constraint"): + from ...options import Constraint + + constraint = Constraint.create(map.get_string("constraint")) + + if constraint == "auto": + # choose the constraint based on the timestep + if timestep_in_fs > 4: + # need constraint on everything + constraint = "bonds" + + elif timestep_in_fs > 1: + # need it just on H bonds and angles + constraint = "h-bonds" + + else: + # can get away with no constraints + constraint = "none" + + map.set("constraint", constraint) + + if map.specified("perturbable_constraint"): + from ...options import PerturbableConstraint + + constraint = PerturbableConstraint.create( + map.get_string("perturbable_constraint") + ) + + if constraint == "auto": + # we don't apply the constraint to perturbable molecules + constraint = "none" + + map.set("perturbable_constraint", constraint) + # Next, convert the sire system to an openmm system # system must be an openmm.System() or else we will crash! @@ -267,31 +300,34 @@ def sire_to_openmm(mols, map): platform = None if map.specified("platform"): - desired_platform = map["platform"].source() + from ...options import Platform - platform = None - platforms = [] + desired_platform = Platform.create(map.get_string("platform")) - for i in range(0, openmm.Platform.getNumPlatforms()): - p = openmm.Platform.getPlatform(i) + # only look for the desired platform if it is not "auto" + if desired_platform != "auto": + platforms = [] - if (p.getName().lower() == desired_platform.lower()) or ( - p.getName() == "HIP" - and desired_platform.lower() == "metal" - ): - platform = p - break - else: - platforms.append(p.getName()) + for i in range(0, openmm.Platform.getNumPlatforms()): + p = openmm.Platform.getPlatform(i) - if platform is None: - platforms = ", ".join(platforms) - raise ValueError( - f"Cannot create the openmm platform {desired_platform} " - "as this is not supported by this installation of " - f"openmm. Available platforms are [{platforms}]" - ) - else: + if (p.getName().lower() == desired_platform.lower()) or ( + p.getName() == "HIP" and desired_platform.lower() == "metal" + ): + platform = p + break + else: + platforms.append(p.getName().lower()) + + if platform is None: + platforms = ", ".join(platforms) + raise ValueError( + f"Cannot create the openmm platform {desired_platform} " + "as this is not supported by this installation of " + f"openmm. Available platforms are [{platforms}]" + ) + + if platform is None: # just find the fastest platform - this will be "metal" if that # is available and we are on Mac, or CUDA if CUDA works, # or OpenCL if OpenCL works, or CPU if nothing is left... @@ -333,7 +369,7 @@ def sire_to_openmm(mols, map): supported_properties = platform.getPropertyNames() if "Precision" in supported_properties and map.specified("precision"): - precision = map["precision"].source() + precision = map.get_string("precision") platform.setPropertyDefaultValue("Precision", precision) if "Threads" in supported_properties and map.specified("threads"): @@ -375,9 +411,7 @@ def sire_to_openmm(mols, map): return context - def openmm_extract_coordinates( - state, mols, perturbable_maps=None, map=None - ): + def openmm_extract_coordinates(state, mols, perturbable_maps=None, map=None): from ...base import create_map map = create_map(map) @@ -446,6 +480,36 @@ def minimise_openmm_context(*args, **kwargs): _no_openmm() +try: + from ._SireGemmi import sire_to_gemmi, gemmi_to_sire, _register_pdbx_loader + + # make sure we have also import gemmi so that we + # have the gemmi objects registered with python + import gemmi as _gemmi # noqa: F401 + + _has_gemmi = True + _register_pdbx_loader() +except Exception as e: + _gemmi_import_error = e + + # Gemmi support is not available + def _no_gemmi(): + print(_gemmi_import_error) + raise ModuleNotFoundError( + "Unable to convert to/from Gemmi as it is not installed. " + "Please install using `mamba install -c conda-forge gemmi` " + "and then re-run this script." + ) + + _has_gemmi = False + + def sire_to_gemmi(*args, **kwargs): + _no_gemmi() + + def gemmi_to_sire(*args, **kwargs): + _no_gemmi() + + def supported_formats(): """Return all of the formats supported by this installation""" f = ["sire"] @@ -456,6 +520,9 @@ def supported_formats(): if _has_rdkit: f.append("rdkit") + if _has_gemmi: + f.append("gemmi") + import sys # BioSimSpace needs to have already been loaded diff --git a/wrapper/Helpers/pyboost11.hpp b/wrapper/Helpers/pyboost11.hpp new file mode 100644 index 000000000..e173366f2 --- /dev/null +++ b/wrapper/Helpers/pyboost11.hpp @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2021, Yung-Yu Chen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * - Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * - Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#pragma GCC diagnostic ignored "-Wunused-parameter" +#include +#pragma GCC diagnostic pop + +namespace pyboost11 +{ + + // Pybind11 cast by using boost.python. + template + struct caster + { + + caster(pybind11::handle src) + : obj(boost::python::handle<>(boost::python::borrowed(src.ptr()))), ext(obj) + { + } + + bool check() const { return ext.check(); } + + // From-Python conversion. + operator T() { return ext(); } + T operator()() { return ext(); } + + // To-Python conversion. + static pybind11::handle to_python(T &src) + { + namespace bpy = boost::python; + return bpy::incref(bpy::object(src).ptr()); + } + + boost::python::object obj; + boost::python::extract ext; + }; + +} // end namespace pyboost11 + +namespace pybind11 +{ + + namespace detail + { + + template + struct pyboost11_type_caster + { + + // Expanded from PYBIND11_TYPE_CASTER. + protected: + type value; + + public: + template >::value, int> = 0> + static handle cast(T_ *src, return_value_policy policy, handle parent) + { + if (!src) + return none().release(); + if (policy == return_value_policy::take_ownership) + { + auto h = cast(std::move(*src), policy, parent); + delete src; + return h; + } + else + { + return cast(*src, policy, parent); + } + } + operator type *() { return &value; } + operator type &() { return value; } + operator type &&() && { return std::move(value); } + template + using cast_op_type = pybind11::detail::movable_cast_op_type; + + // Boilerplate. + bool load(handle src, bool) + { + if (!src) + { + return false; + } + pyboost11::caster ext(src); + if (!ext.check()) + { + return false; + } + value = ext(); + return true; + } + static handle cast(type *src, return_value_policy /* policy */, handle /* parent */) + { + return pyboost11::caster::to_python(src); + } + static handle cast(type src, return_value_policy /* policy */, handle /* parent */) + { + return pyboost11::caster::to_python(src); + } + }; + +#define PYBOOST11_TYPE_CASTER(type, py_name) \ + template <> \ + struct type_caster : public pyboost11_type_caster \ + { \ + static constexpr auto name = py_name; \ + } + + } // end namespace detail + +} // end namespace pybind11 + +namespace pyboost11 +{ + + // Boost.python convert by using pybind11. + template + struct converter + { + + public: + converter() { init(); } + + void init() + { + static bool initialized = false; + if (!initialized) + { + namespace bpy = boost::python; + // From-Python conversion. + bpy::converter::registry::push_back( + &convertible, &construct, bpy::type_id()); + // To-Python conversion. + bpy::to_python_converter(); + + initialized = true; + } + } + + // From-Python convertibility. + static void *convertible(PyObject *objptr) + { + namespace pyb = pybind11; + try + { + pyb::handle(objptr).cast(); + return objptr; + } + catch (pyb::cast_error const &) + { + return nullptr; + } + } + + // From-Python conversion. + static void construct( + PyObject *objptr, boost::python::converter::rvalue_from_python_stage1_data *data) + { + namespace pyb = pybind11; + void *storage = reinterpret_cast< + boost::python::converter::rvalue_from_python_storage *>(data) + ->storage.bytes; + new (storage) T(pyb::handle(objptr).cast()); + data->convertible = storage; + } + + // To-Python conversion. + static PyObject *convert(T const &t) + { + return pybind11::cast(t).inc_ref().ptr(); + } + }; + +} // end namespace pyboost11 + +// vim: set ff=unix fenc=utf8 et sw=4 ts=4 sts=4: \ No newline at end of file diff --git a/wrapper/IO/CMakeAutogenFile.txt b/wrapper/IO/CMakeAutogenFile.txt index ef4a7028c..4695ffe6b 100644 --- a/wrapper/IO/CMakeAutogenFile.txt +++ b/wrapper/IO/CMakeAutogenFile.txt @@ -1,44 +1,45 @@ # WARNING - AUTOGENERATED FILE - CONTENTS WILL BE OVERWRITTEN! set ( PYPP_SOURCES - Amber.pypp.cpp + PerturbationsTemplate.pypp.cpp + ProtoMSParameters.pypp.cpp AmberPrm.pypp.cpp - AmberRst.pypp.cpp - AmberRst7.pypp.cpp - AmberTraj.pypp.cpp - BrokenParser.pypp.cpp + PDB2.pypp.cpp + SDF.pypp.cpp + TinkerParameters.pypp.cpp CharmmPSF.pypp.cpp - Cube.pypp.cpp - DCD.pypp.cpp - FileTrajectoryParser.pypp.cpp - FlexibilityLibrary.pypp.cpp - FlexibilityTemplate.pypp.cpp Gro87.pypp.cpp - GroAtom.pypp.cpp - GroMolType.pypp.cpp + _IO_free_functions.pypp.cpp + PDB.pypp.cpp + IOBase.pypp.cpp + PDBx.pypp.cpp + FlexibilityTemplate.pypp.cpp GroSystem.pypp.cpp + DCD.pypp.cpp + AmberRst.pypp.cpp + PDBParameters.pypp.cpp GroTop.pypp.cpp - IOBase.pypp.cpp - IOParametersBase.pypp.cpp - Mol2.pypp.cpp - MoleculeParser.pypp.cpp + ZmatrixMaker.pypp.cpp NullIO.pypp.cpp NullParser.pypp.cpp - PDB.pypp.cpp - PDB2.pypp.cpp - PDBParameters.pypp.cpp - PerturbationsLibrary.pypp.cpp - PerturbationsTemplate.pypp.cpp - ProtoMS.pypp.cpp - ProtoMSParameters.pypp.cpp - SDF.pypp.cpp - Supplementary.pypp.cpp TRR.pypp.cpp + Supplementary.pypp.cpp + IOParametersBase.pypp.cpp + Amber.pypp.cpp + GroMolType.pypp.cpp Tinker.pypp.cpp - TinkerParameters.pypp.cpp + GroAtom.pypp.cpp + AmberRst7.pypp.cpp TrajectoryMonitor.pypp.cpp + PerturbationsLibrary.pypp.cpp + Cube.pypp.cpp + Mol2.pypp.cpp + FileTrajectoryParser.pypp.cpp + MoleculeParser.pypp.cpp + ProtoMS.pypp.cpp + FlexibilityLibrary.pypp.cpp + AmberTraj.pypp.cpp + BrokenParser.pypp.cpp XTC.pypp.cpp - ZmatrixMaker.pypp.cpp - _IO_free_functions.pypp.cpp SireIO_containers.cpp SireIO_properties.cpp SireIO_registrars.cpp diff --git a/wrapper/IO/PDBx.pypp.cpp b/wrapper/IO/PDBx.pypp.cpp new file mode 100644 index 000000000..75c67ae48 --- /dev/null +++ b/wrapper/IO/PDBx.pypp.cpp @@ -0,0 +1,276 @@ +// This file has been generated by Py++. + +// (C) Christopher Woods, GPL >= 3 License + +#include "boost/python.hpp" +#include "PDBx.pypp.hpp" + +namespace bp = boost::python; + +#include "SireBase/parallel.h" + +#include "SireBase/stringproperty.h" + +#include "SireError/errors.h" + +#include "SireIO/errors.h" + +#include "SireIO/pdbx.h" + +#include "SireMol/atomcharges.h" + +#include "SireMol/atomcoords.h" + +#include "SireMol/atomelements.h" + +#include "SireMol/core.h" + +#include "SireMol/molecule.h" + +#include "SireMol/moleditor.h" + +#include "SireMol/trajectory.h" + +#include "SireStream/datastream.h" + +#include "SireStream/shareddatastream.h" + +#include "SireSystem/system.h" + +#include "SireUnits/units.h" + +#include "pdbx.h" + +#include "pdbx.h" + +SireIO::PDBx __copy__(const SireIO::PDBx &other){ return SireIO::PDBx(other); } + +#include "Qt/qdatastream.hpp" + +#include "Helpers/str.hpp" + +#include "Helpers/release_gil_policy.hpp" + +void register_PDBx_class(){ + + { //::SireIO::PDBx + typedef bp::class_< SireIO::PDBx, bp::bases< SireIO::MoleculeParser, SireBase::Property > > PDBx_exposer_t; + PDBx_exposer_t PDBx_exposer = PDBx_exposer_t( "PDBx", "This class holds a parser for reading and writing PDBxmmcif files", bp::init< >("Constructor") ); + bp::scope PDBx_scope( PDBx_exposer ); + PDBx_exposer.def( bp::init< QString const &, bp::optional< SireBase::PropertyMap const & > >(( bp::arg("filename"), bp::arg("map")=SireBase::PropertyMap() ), "Construct to read in the data from the file called filename. The\npassed property map can be used to pass extra parameters to control\nthe parsing") ); + PDBx_exposer.def( bp::init< QStringList const &, bp::optional< SireBase::PropertyMap const & > >(( bp::arg("lines"), bp::arg("map")=SireBase::PropertyMap() ), "Construct to read in the data from the passed text lines. The\npassed property map can be used to pass extra parameters to control\nthe parsing") ); + PDBx_exposer.def( bp::init< SireSystem::System const &, bp::optional< SireBase::PropertyMap const & > >(( bp::arg("system"), bp::arg("map")=SireBase::PropertyMap() ), "Construct this parser by extracting all necessary information from the\npassed SireSystem::System, looking for the properties that are specified\nin the passed property map") ); + PDBx_exposer.def( bp::init< SireIO::PDBx const & >(( bp::arg("other") ), "Copy constructor") ); + { //::SireIO::PDBx::construct + + typedef ::SireIO::MoleculeParserPtr ( ::SireIO::PDBx::*construct_function_type)( ::QString const &,::SireBase::PropertyMap const & ) const; + construct_function_type construct_function_value( &::SireIO::PDBx::construct ); + + PDBx_exposer.def( + "construct" + , construct_function_value + , ( bp::arg("filename"), bp::arg("map") ) + , bp::release_gil_policy() + , "Return the parser that has been constructed by reading in the passed\nfile using the passed properties" ); + + } + { //::SireIO::PDBx::construct + + typedef ::SireIO::MoleculeParserPtr ( ::SireIO::PDBx::*construct_function_type)( ::QStringList const &,::SireBase::PropertyMap const & ) const; + construct_function_type construct_function_value( &::SireIO::PDBx::construct ); + + PDBx_exposer.def( + "construct" + , construct_function_value + , ( bp::arg("lines"), bp::arg("map") ) + , bp::release_gil_policy() + , "Return the parser that has been constructed by reading in the passed\ntext lines using the passed properties" ); + + } + { //::SireIO::PDBx::construct + + typedef ::SireIO::MoleculeParserPtr ( ::SireIO::PDBx::*construct_function_type)( ::SireSystem::System const &,::SireBase::PropertyMap const & ) const; + construct_function_type construct_function_value( &::SireIO::PDBx::construct ); + + PDBx_exposer.def( + "construct" + , construct_function_value + , ( bp::arg("system"), bp::arg("map") ) + , bp::release_gil_policy() + , "Return the parser that has been constructed by extract all necessary\ndata from the passed SireSystem::System using the specified properties" ); + + } + { //::SireIO::PDBx::formatDescription + + typedef ::QString ( ::SireIO::PDBx::*formatDescription_function_type)( ) const; + formatDescription_function_type formatDescription_function_value( &::SireIO::PDBx::formatDescription ); + + PDBx_exposer.def( + "formatDescription" + , formatDescription_function_value + , bp::release_gil_policy() + , "Return a description of the file format" ); + + } + { //::SireIO::PDBx::formatName + + typedef ::QString ( ::SireIO::PDBx::*formatName_function_type)( ) const; + formatName_function_type formatName_function_value( &::SireIO::PDBx::formatName ); + + PDBx_exposer.def( + "formatName" + , formatName_function_value + , bp::release_gil_policy() + , "Return the format name that is used to identify this file format within Sire" ); + + } + { //::SireIO::PDBx::formatSuffix + + typedef ::QStringList ( ::SireIO::PDBx::*formatSuffix_function_type)( ) const; + formatSuffix_function_type formatSuffix_function_value( &::SireIO::PDBx::formatSuffix ); + + PDBx_exposer.def( + "formatSuffix" + , formatSuffix_function_value + , bp::release_gil_policy() + , "Return the suffixes that these files are normally associated with" ); + + } + { //::SireIO::PDBx::getFrame + + typedef ::SireMol::Frame ( ::SireIO::PDBx::*getFrame_function_type)( int ) const; + getFrame_function_type getFrame_function_value( &::SireIO::PDBx::getFrame ); + + PDBx_exposer.def( + "getFrame" + , getFrame_function_value + , ( bp::arg("i") ) + , bp::release_gil_policy() + , "" ); + + } + { //::SireIO::PDBx::isFrame + + typedef bool ( ::SireIO::PDBx::*isFrame_function_type)( ) const; + isFrame_function_type isFrame_function_value( &::SireIO::PDBx::isFrame ); + + PDBx_exposer.def( + "isFrame" + , isFrame_function_value + , bp::release_gil_policy() + , "" ); + + } + { //::SireIO::PDBx::isTopology + + typedef bool ( ::SireIO::PDBx::*isTopology_function_type)( ) const; + isTopology_function_type isTopology_function_value( &::SireIO::PDBx::isTopology ); + + PDBx_exposer.def( + "isTopology" + , isTopology_function_value + , bp::release_gil_policy() + , "" ); + + } + { //::SireIO::PDBx::nAtoms + + typedef int ( ::SireIO::PDBx::*nAtoms_function_type)( ) const; + nAtoms_function_type nAtoms_function_value( &::SireIO::PDBx::nAtoms ); + + PDBx_exposer.def( + "nAtoms" + , nAtoms_function_value + , bp::release_gil_policy() + , "Return the total number of atoms in all molecules." ); + + } + { //::SireIO::PDBx::nFrames + + typedef int ( ::SireIO::PDBx::*nFrames_function_type)( ) const; + nFrames_function_type nFrames_function_value( &::SireIO::PDBx::nFrames ); + + PDBx_exposer.def( + "nFrames" + , nFrames_function_value + , bp::release_gil_policy() + , "" ); + + } + PDBx_exposer.def( bp::self != bp::self ); + { //::SireIO::PDBx::operator= + + typedef ::SireIO::PDBx & ( ::SireIO::PDBx::*assign_function_type)( ::SireIO::PDBx const & ) ; + assign_function_type assign_function_value( &::SireIO::PDBx::operator= ); + + PDBx_exposer.def( + "assign" + , assign_function_value + , ( bp::arg("other") ) + , bp::return_self< >() + , "" ); + + } + PDBx_exposer.def( bp::self == bp::self ); + { //::SireIO::PDBx::toLines + + typedef ::QVector< QString > ( ::SireIO::PDBx::*toLines_function_type)( ) const; + toLines_function_type toLines_function_value( &::SireIO::PDBx::toLines ); + + PDBx_exposer.def( + "toLines" + , toLines_function_value + , bp::release_gil_policy() + , "Convert the the parsed data to a collection of PDBx record lines." ); + + } + { //::SireIO::PDBx::toString + + typedef ::QString ( ::SireIO::PDBx::*toString_function_type)( ) const; + toString_function_type toString_function_value( &::SireIO::PDBx::toString ); + + PDBx_exposer.def( + "toString" + , toString_function_value + , bp::release_gil_policy() + , "Return a string representation of this parser" ); + + } + { //::SireIO::PDBx::typeName + + typedef char const * ( *typeName_function_type )( ); + typeName_function_type typeName_function_value( &::SireIO::PDBx::typeName ); + + PDBx_exposer.def( + "typeName" + , typeName_function_value + , bp::release_gil_policy() + , "Return the C++ name for this class" ); + + } + { //::SireIO::PDBx::what + + typedef char const * ( ::SireIO::PDBx::*what_function_type)( ) const; + what_function_type what_function_value( &::SireIO::PDBx::what ); + + PDBx_exposer.def( + "what" + , what_function_value + , bp::release_gil_policy() + , "Return the C++ name for this class" ); + + } + PDBx_exposer.staticmethod( "typeName" ); + PDBx_exposer.def( "__copy__", &__copy__); + PDBx_exposer.def( "__deepcopy__", &__copy__); + PDBx_exposer.def( "clone", &__copy__); + PDBx_exposer.def( "__rlshift__", &__rlshift__QDataStream< ::SireIO::PDBx >, + bp::return_internal_reference<1, bp::with_custodian_and_ward<1,2> >() ); + PDBx_exposer.def( "__rrshift__", &__rrshift__QDataStream< ::SireIO::PDBx >, + bp::return_internal_reference<1, bp::with_custodian_and_ward<1,2> >() ); + PDBx_exposer.def_pickle(sire_pickle_suite< ::SireIO::PDBx >()); + PDBx_exposer.def( "__str__", &__str__< ::SireIO::PDBx > ); + PDBx_exposer.def( "__repr__", &__str__< ::SireIO::PDBx > ); + } + +} diff --git a/wrapper/IO/PDBx.pypp.hpp b/wrapper/IO/PDBx.pypp.hpp new file mode 100644 index 000000000..9472a87ae --- /dev/null +++ b/wrapper/IO/PDBx.pypp.hpp @@ -0,0 +1,10 @@ +// This file has been generated by Py++. + +// (C) Christopher Woods, GPL >= 3 License + +#ifndef PDBx_hpp__pyplusplus_wrapper +#define PDBx_hpp__pyplusplus_wrapper + +void register_PDBx_class(); + +#endif//PDBx_hpp__pyplusplus_wrapper diff --git a/wrapper/IO/SireIO_registrars.cpp b/wrapper/IO/SireIO_registrars.cpp index b80000e3c..4e90f4026 100644 --- a/wrapper/IO/SireIO_registrars.cpp +++ b/wrapper/IO/SireIO_registrars.cpp @@ -3,77 +3,79 @@ #include "SireIO_registrars.h" +#include "iobase.h" #include "amber.h" -#include "amberprm.h" -#include "amberrst.h" -#include "amberrst7.h" -#include "ambertraj.h" -#include "charmmpsf.h" -#include "dcd.h" -#include "filetrajectory.h" #include "filetrajectoryparser.h" #include "flexibilitylibrary.h" -#include "gro87.h" +#include "trr.h" #include "grotop.h" -#include "iobase.h" +#include "amberrst7.h" #include "mol2.h" -#include "moleculeparser.h" -#include "pdb.h" -#include "pdb2.h" -#include "perturbationslibrary.h" #include "protoms.h" -#include "sdf.h" -#include "supplementary.h" -#include "tinker.h" +#include "pdb2.h" +#include "moleculeparser.h" +#include "amberrst.h" #include "trajectorymonitor.h" -#include "trr.h" +#include "supplementary.h" #include "xtc.h" +#include "filetrajectory.h" +#include "gro87.h" +#include "charmmpsf.h" +#include "perturbationslibrary.h" +#include "dcd.h" +#include "amberprm.h" +#include "tinker.h" #include "zmatrixmaker.h" +#include "ambertraj.h" +#include "sdf.h" +#include "pdb.h" +#include "pdbx.h" #include "Helpers/objectregistry.hpp" void register_SireIO_objects() { + ObjectRegistry::registerConverterFor< SireIO::NullIO >(); ObjectRegistry::registerConverterFor< SireIO::Amber >(); - ObjectRegistry::registerConverterFor< SireIO::AmberPrm >(); - ObjectRegistry::registerConverterFor< SireIO::AmberRst >(); - ObjectRegistry::registerConverterFor< SireIO::AmberRst7 >(); - ObjectRegistry::registerConverterFor< SireIO::AmberTraj >(); - ObjectRegistry::registerConverterFor< SireIO::PSFAtom >(); - ObjectRegistry::registerConverterFor< SireIO::CharmmParam >(); - ObjectRegistry::registerConverterFor< SireIO::CharmmPSF >(); - ObjectRegistry::registerConverterFor< SireIO::DCD >(); - ObjectRegistry::registerConverterFor< SireIO::FileTrajectory >(); ObjectRegistry::registerConverterFor< SireIO::FileTrajectoryParser >(); ObjectRegistry::registerConverterFor< SireIO::FlexibilityLibrary >(); ObjectRegistry::registerConverterFor< SireIO::FlexibilityTemplate >(); - ObjectRegistry::registerConverterFor< SireIO::Gro87 >(); + ObjectRegistry::registerConverterFor< SireIO::TRR >(); ObjectRegistry::registerConverterFor< SireIO::GroTop >(); ObjectRegistry::registerConverterFor< SireIO::GroMolType >(); ObjectRegistry::registerConverterFor< SireIO::GroAtom >(); ObjectRegistry::registerConverterFor< SireIO::GroSystem >(); - ObjectRegistry::registerConverterFor< SireIO::NullIO >(); + ObjectRegistry::registerConverterFor< SireIO::AmberRst7 >(); ObjectRegistry::registerConverterFor< SireIO::Mol2Atom >(); ObjectRegistry::registerConverterFor< SireIO::Mol2Bond >(); ObjectRegistry::registerConverterFor< SireIO::Mol2Molecule >(); ObjectRegistry::registerConverterFor< SireIO::Mol2Substructure >(); ObjectRegistry::registerConverterFor< SireIO::Mol2 >(); - ObjectRegistry::registerConverterFor< SireIO::NullParser >(); - ObjectRegistry::registerConverterFor< SireIO::BrokenParser >(); - ObjectRegistry::registerConverterFor< SireIO::PDB >(); + ObjectRegistry::registerConverterFor< SireIO::ProtoMS >(); ObjectRegistry::registerConverterFor< SireIO::PDBAtom >(); ObjectRegistry::registerConverterFor< SireIO::PDB2 >(); + ObjectRegistry::registerConverterFor< SireIO::NullParser >(); + ObjectRegistry::registerConverterFor< SireIO::BrokenParser >(); + ObjectRegistry::registerConverterFor< SireIO::AmberRst >(); + ObjectRegistry::registerConverterFor< SireIO::TrajectoryMonitor >(); + ObjectRegistry::registerConverterFor< SireIO::Supplementary >(); + ObjectRegistry::registerConverterFor< SireIO::XTC >(); + ObjectRegistry::registerConverterFor< SireIO::FileTrajectory >(); + ObjectRegistry::registerConverterFor< SireIO::Gro87 >(); + ObjectRegistry::registerConverterFor< SireIO::PSFAtom >(); + ObjectRegistry::registerConverterFor< SireIO::CharmmParam >(); + ObjectRegistry::registerConverterFor< SireIO::CharmmPSF >(); ObjectRegistry::registerConverterFor< SireIO::PerturbationsLibrary >(); ObjectRegistry::registerConverterFor< SireIO::PerturbationsTemplate >(); - ObjectRegistry::registerConverterFor< SireIO::ProtoMS >(); - ObjectRegistry::registerConverterFor< SireIO::SDF >(); - ObjectRegistry::registerConverterFor< SireIO::Supplementary >(); + ObjectRegistry::registerConverterFor< SireIO::DCD >(); + ObjectRegistry::registerConverterFor< SireIO::AmberPrm >(); ObjectRegistry::registerConverterFor< SireIO::Tinker >(); - ObjectRegistry::registerConverterFor< SireIO::TrajectoryMonitor >(); - ObjectRegistry::registerConverterFor< SireIO::TRR >(); - ObjectRegistry::registerConverterFor< SireIO::XTC >(); ObjectRegistry::registerConverterFor< SireIO::ZmatrixMaker >(); + ObjectRegistry::registerConverterFor< SireIO::AmberTraj >(); + ObjectRegistry::registerConverterFor< SireIO::SDF >(); + ObjectRegistry::registerConverterFor< SireIO::PDB >(); + ObjectRegistry::registerConverterFor< SireIO::PDBx >(); } diff --git a/wrapper/IO/_IO.main.cpp b/wrapper/IO/_IO.main.cpp index 496ff9adc..b68fc28e6 100644 --- a/wrapper/IO/_IO.main.cpp +++ b/wrapper/IO/_IO.main.cpp @@ -57,6 +57,8 @@ #include "PDBParameters.pypp.hpp" +#include "PDBx.pypp.hpp" + #include "PerturbationsLibrary.pypp.hpp" #include "PerturbationsTemplate.pypp.hpp" @@ -150,6 +152,8 @@ BOOST_PYTHON_MODULE(_IO){ register_PDBParameters_class(); + register_PDBx_class(); + register_PerturbationsLibrary_class(); register_PerturbationsTemplate_class(); diff --git a/wrapper/IO/_IO_load.cpp b/wrapper/IO/_IO_load.cpp index 963f1b5b0..8cd5513db 100644 --- a/wrapper/IO/_IO_load.cpp +++ b/wrapper/IO/_IO_load.cpp @@ -40,6 +40,13 @@ System load_molecules(const QStringList &files, { auto gil = SireBase::release_gil(); + bool auto_connect = true; + + if (map.specified("auto_connect")) + { + auto_connect = map["auto_connect"].value().asABoolean(); + } + try { if (files.count() == 1) @@ -121,7 +128,7 @@ System load_molecules(const QStringList &files, } } - if (not mol.hasProperty("connectivity")) + if (auto_connect and not mol.hasProperty("connectivity")) { try { diff --git a/wrapper/IO/active_headers.h b/wrapper/IO/active_headers.h index f61ba997f..130070ce1 100644 --- a/wrapper/IO/active_headers.h +++ b/wrapper/IO/active_headers.h @@ -23,6 +23,7 @@ #include "moleculeparser.h" #include "pdb.h" #include "pdb2.h" +#include "pdbx.h" #include "perturbationslibrary.h" #include "protoms.h" #include "sdf.h"