Skip to content

Commit

Permalink
Publication release
Browse files Browse the repository at this point in the history
  • Loading branch information
Daniel Probst committed Feb 17, 2022
1 parent 72ada5a commit fb4168e
Show file tree
Hide file tree
Showing 30 changed files with 64,436 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .flake8
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[flake8]
extend-ignore = E501
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,10 @@ dmypy.json

# Pyre type checker
.pyre/

# vscode settings
/.vscode

# Ignore figures and tables
/figures/svg/*
/figures/tables/*
308 changes: 306 additions & 2 deletions README.md

Large diffs are not rendered by default.

48 changes: 48 additions & 0 deletions bin/rbt-canonicalize.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/usr/bin/env python

import click
from rxn_biocatalysis_tools import EnzymaticReaction, tokenize_smiles


@click.command()
@click.argument("input_file", type=click.Path(exists=True))
@click.argument("output_file", type=click.Path())
@click.option("--pipe", is_flag=True)
def main(input_file: str, output_file: str, pipe: bool):
pipe_char = ""
if pipe:
pipe_char = " |"

smiles = []
with open(input_file, "r") as f:
for line in f:
smiles.append(line.strip())

with open(output_file, "w+") as f:
for original_smiles in smiles:
# In case of a predicted source which includes the EC number (always starting with "[v")
# we need to take care of this
smiles_part = original_smiles.replace(" ", "")
ec_part = ""

# Writing non-sense items that occur in the backward prediction as is
try:
ec_index = smiles_part.find("[v")
if ec_index > -1:
ec_part = f" {smiles_part[ec_index:].strip()}".replace("][", "] [")
smiles_part = smiles_part[:ec_index].strip()

# Using the EnzymaticReaction class here to get canonicalisation + ordering
# ">>" is needed for it to be recoganised as a valid rxn smiles / smarts
rxn = EnzymaticReaction(smiles_part + ">>")
rxn.sort()
sorted_canonicalised_smiles = ".".join(rxn.get_reactants_as_smiles())
f.write(
f"{tokenize_smiles(sorted_canonicalised_smiles)}{pipe_char}{ec_part}\n"
)
except:
f.write(f"{original_smiles}\n")


if __name__ == "__main__":
main()
Loading

0 comments on commit fb4168e

Please sign in to comment.