Skip to content

Commit

Permalink
Merge pull request #118 from rdkit-rs/validate_standardized_mols
Browse files Browse the repository at this point in the history
Validate standardized molecules
  • Loading branch information
JJ-Pineda authored Oct 1, 2024
2 parents 4c852c6 + 94e13d4 commit 560b85e
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 0 deletions.
4 changes: 4 additions & 0 deletions src/search/compound_processing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,10 @@ pub fn standardize_mol(romol: &ROMol) -> eyre::Result<ROMol> {
let te = TautomerEnumerator::new();
let canon_taut = te.canonicalize(&parent_rwmol.to_ro_mol())?;
let neutralized_canon = neutralize_atoms(&canon_taut)?;

// Validate
let _ = ROMol::from_smiles(&neutralized_canon.as_smiles())
.map_err(|_| eyre::eyre!("Canonicalization failed validation"))?;
Ok(neutralized_canon)
}

Expand Down
11 changes: 11 additions & 0 deletions tests/cpd_processing_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,17 @@ fn test_standardize_bad_smiles() {
assert!(standardize_smiles(smiles, false).is_err());
}

#[test]
fn test_bad_standardization() {
// The smiles below is technically a perfectly valid smiles
// but at the moment our standardization procedure can mess up isotopic hydrogens.
// This is a rare occurrence so it's maybe not worth fixing these cases specifically just yet.
// That said, we should at least force an error in these cases.
let smiles = "O=C(O[2H])C(F)(F)F";
let result = standardize_smiles(smiles, false);
assert!(result.is_err());
}

#[test]
fn test_get_tautomers() {
let smiles = "Oc1c(cccc3)c3nc2ccncc12";
Expand Down

0 comments on commit 560b85e

Please sign in to comment.