Skip to content

Commit

Permalink
Write a preliminary implementation for MOFkey
Browse files Browse the repository at this point in the history
Exporting a format derived from InChIKey to provide a compact, interoperable hash in addition to the SMILES-based MOFid.  Since the organic portion is based on InChIKey, the user can search for the building blocks in specialized and even general search engines.

Note: invoking the InChI code may add substantially more warning/error messages to the log file, particularly for metal-containing linkers and cases when the valence is buggy (e.g. ZIFs and cases related to issue #8).  I still need to write some more diagnostic output and discuss a few decision points, such as the `DEFAULT_MOFKEY_TOPOLOGY`.
  • Loading branch information
bbucior committed Feb 18, 2019
1 parent f8e6f36 commit 16494a5
Show file tree
Hide file tree
Showing 3 changed files with 108 additions and 9 deletions.
87 changes: 85 additions & 2 deletions src/deconstructor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <openbabel/generic.h>
#include <openbabel/obconversion.h>
#include <openbabel/obiter.h>
#include <openbabel/elements.h>


namespace OpenBabel
Expand All @@ -40,15 +41,21 @@ std::string writeFragments(std::vector<OBMol> fragments, OBConversion obconv) {
}


std::string getSMILES(OBMol fragment, OBConversion obconv) {
// Prints SMILES based on OBConversion parameters
std::string exportNormalizedMol(OBMol fragment, OBConversion obconv) {
// Resets a fragment's bonding/location before format conversion
OBMol canon = fragment;
resetBonds(&canon);
unwrapFragmentMol(&canon);
return obconv.WriteString(&canon);
}


std::string getSMILES(OBMol fragment, OBConversion obconv) {
// Prints SMILES based on OBConversion parameters
return exportNormalizedMol(fragment, obconv);
}



Deconstructor::Deconstructor(OBMol* orig_mof) : simplified_net(orig_mof) {
//Deconstructor::Deconstructor(OBMol* orig_mof) {
Expand Down Expand Up @@ -455,6 +462,82 @@ void MOFidDeconstructor::PostSimplification() {
}


std::string MOFidDeconstructor::GetMOFkey(const std::string &topology) {
// Print out the detected MOFkey, optionally with the topology field.
// This method is implemented in MOFidDeconstructor instead of the others, because the
// organic building blocks must be intact (e.g. including carboxylates) to properly
// calculate the MOFkey.
std::stringstream mofkey;
mofkey << "MOFkey" << MOFKEY_SEP << "v" << MOFKEY_VERSION; // MOFkey format signature

if (!topology.empty()) {
mofkey << MOFKEY_SEP << topology;
}

// Get unique metal atoms from the nodes
std::vector<int> unique_elements;
VirtualMol full_node_export = simplified_net.GetAtomsOfRole("node");
full_node_export.AddVirtualMol(simplified_net.GetAtomsOfRole("node bridge"));
full_node_export = simplified_net.PseudoToOrig(full_node_export);
AtomSet node_set = full_node_export.GetAtoms();
for (AtomSet::iterator it=node_set.begin(); it!=node_set.end(); ++it) {
int it_element = (*it)->GetAtomicNum();
if (isMetal(*it) && !inVector<int>(it_element, unique_elements)) {
unique_elements.push_back(it_element);
}
}
if (unique_elements.size() == 0) {
mofkey << MOFKEY_SEP << MOFKEY_NO_METALS;
} else {
std::sort(unique_elements.begin(), unique_elements.end()); // sort by atomic number
bool first_element = true;
for (std::vector<int>::iterator element=unique_elements.begin(); element!=unique_elements.end(); ++element) {
if (first_element) {
mofkey << MOFKEY_SEP;
} else {
mofkey << MOFKEY_METAL_DELIM;
}
mofkey << OBElements::GetSymbol(*element);
}
}

// Then, write unique InChIKeys (sans protonation state)
OBConversion ikey;
ikey.SetOutFormat("inchikey");
ikey.AddOption("X", OBConversion::OUTOPTIONS, "SNon"); // ignoring stereochemistry, at least for now
ikey.AddOption("w"); // reduce verbosity about InChI behavior:
// 'Omitted undefined stereo', 'Charges were rearranged', 'Proton(s) added/removed', 'Metal was disconnected'
// See https://openbabel.org/docs/dev/FileFormats/InChI_format.html for more information.
std::vector<std::string> unique_ikeys;

VirtualMol linker_export = simplified_net.GetAtomsOfRole("linker");
linker_export = simplified_net.PseudoToOrig(linker_export);
OBMol linker_mol = linker_export.ToOBMol();
std::vector<OBMol> linker_frags = linker_mol.Separate();
for (std::vector<OBMol>::iterator frag=linker_frags.begin(); frag!=linker_frags.end(); ++frag) {
std::string frag_ikey = exportNormalizedMol(*frag, ikey);
const std::string ob_newline = "\n";
if (frag_ikey.length() != (27 + ob_newline.size())) {
obErrorLog.ThrowError(__FUNCTION__, "Unexpected length for InChIKey", obError);
continue;
}
frag_ikey = frag_ikey.substr(0, 25); // 14 + 1 + 10 (minus the -protonation flag and \n)
if (!inVector<std::string>(frag_ikey, unique_ikeys)) {
unique_ikeys.push_back(frag_ikey);
}
}
if (unique_ikeys.size() == 0) {
unique_ikeys.push_back(MOFKEY_NO_LINKERS);
}
std::sort(unique_ikeys.begin(), unique_ikeys.end()); // sort alphabetically
for (std::vector<std::string>::iterator it=unique_ikeys.begin(); it!=unique_ikeys.end(); ++it) {
mofkey << MOFKEY_SEP << *it;
}

return mofkey.str();
}



SingleNodeDeconstructor::SingleNodeDeconstructor(OBMol* orig_mof) : Deconstructor(orig_mof) {
}
Expand Down
10 changes: 10 additions & 0 deletions src/deconstructor.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ const std::string NO_SBU_SUFFIX = "/NoSBU"; // base MOFidDeconstructor class
const std::string SINGLE_NODE_SUFFIX = "/SingleNode";
const std::string ALL_NODE_SUFFIX = "/AllNode";

// Default placeholder topology and details for MOFkey
const std::string DEFAULT_MOFKEY_TOPOLOGY = ""; // Alternatively, "OPTIONAL_TOPOLOGY" for user-friendliness // TODO: MAYBE NA???
const std::string MOFKEY_VERSION = "1";
const std::string MOFKEY_SEP = "-";
const std::string MOFKEY_METAL_DELIM = ","; // If multiple metal nodes, the delimiter between elements
const std::string MOFKEY_NO_METALS = "NA";
const std::string MOFKEY_NO_LINKERS = "MISSING_LINKERS";

// PA's to describe points of extension
const int POE_EXTERNAL_ELEMENT = 118; // Og
const int SBU_EXTERNAL_ELEMENT = 117; // Ts
Expand All @@ -42,6 +50,7 @@ const int TREE_EXT_CONN = 115; // Mc

// Function prototypes
std::string writeFragments(std::vector<OBMol> fragments, OBConversion obconv);
std::string exportNormalizedMol(OBMol fragment, OBConversion obconv);
std::string getSMILES(OBMol fragment, OBConversion obconv);


Expand Down Expand Up @@ -101,6 +110,7 @@ class MOFidDeconstructor : public Deconstructor {
public:
MOFidDeconstructor(OBMol* orig_mof = NULL);
virtual ~MOFidDeconstructor() {};
virtual std::string GetMOFkey(const std::string &topology = DEFAULT_MOFKEY_TOPOLOGY);
};


Expand Down
20 changes: 13 additions & 7 deletions src/sbu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ std::string analyzeMOF(std::string filename, const std::string &output_dir=DEFAU
extern "C" void analyzeMOFc(const char *cifdata, char *analysis, int buflen);
extern "C" int SmilesToSVG(const char* smiles, int options, void* mbuf, unsigned int buflen);
void try_mkdir(const std::string &path);
void write_string(const std::string &contents, const std::string &path);


int main(int argc, char* argv[])
Expand Down Expand Up @@ -107,18 +108,14 @@ std::string analyzeMOF(std::string filename, const std::string &output_dir) {

// Save a copy of the original mol for debugging
writeCIF(&orig_mol, output_dir + "/orig_mol.cif");
std::ofstream file_info;
std::string mol_name_path = output_dir + "/mol_name.txt";
file_info.open(mol_name_path.c_str(), std::ios::out | std::ios::trunc);
if (file_info.is_open()) {
file_info << filename << std::endl;
file_info.close();
}
write_string(filename, output_dir + "/mol_name.txt");

MOFidDeconstructor simplifier(&orig_mol);
simplifier.SetOutputDir(output_dir + NO_SBU_SUFFIX);
simplifier.SimplifyMOF();
simplifier.WriteCIFs();
std::string mofkey_path = output_dir + NO_SBU_SUFFIX + "/mofkey_no_topology.txt";
write_string(simplifier.GetMOFkey(), mofkey_path);

SingleNodeDeconstructor sn_simplify(&orig_mol);
sn_simplify.SetOutputDir(output_dir + SINGLE_NODE_SUFFIX);
Expand Down Expand Up @@ -187,3 +184,12 @@ void try_mkdir(const std::string &path) {
}
}

void write_string(const std::string &contents, const std::string &path) {
std::ofstream file_info;
file_info.open(path.c_str(), std::ios::out | std::ios::trunc);
if (file_info.is_open()) {
file_info << contents << std::endl;
file_info.close();
}
}

0 comments on commit 16494a5

Please sign in to comment.