Skip to content

Commit

Permalink
Merge pull request #313 from marrink-lab/DNA-exten/itp
Browse files Browse the repository at this point in the history
[1] Dna exten/itp
  • Loading branch information
fgrunewald authored Sep 26, 2023
2 parents b8fb8b8 + f9117f5 commit 84d0c25
Show file tree
Hide file tree
Showing 15 changed files with 6,034 additions and 19 deletions.
3 changes: 3 additions & 0 deletions bin/polyply
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ def main(): # pylint: disable=too-many-locals,too-many-statements
help='A linear sequence of residue names.')
seq_group.add_argument('-seqf', dest='seq_file', type=Path,
help='A graph input file (JSON|TXT|FASTA|IG)')
dna_group = parser_gen_itp.add_argument_group('DNA specifc options')
dna_group.add_argument('-dsdna', dest='dsdna', action='store_true',
help='complement single sequence to dsDNA sequence')

parser_gen_itp.set_defaults(func=gen_itp)

Expand Down
25 changes: 25 additions & 0 deletions polyply/data/martini2/DNA_M2.ff
Original file line number Diff line number Diff line change
Expand Up @@ -566,3 +566,28 @@ BB3 +BB2
BB2 BB3 +BB1 +BB2 1 180.00000 2 3 {"group": "link"}
BB3 +BB1 +BB2 +BB3 9 85.00000 2 2 {"group": "link", "version": "1"}
BB3 +BB1 +BB2 +BB3 9 160.00000 2 3 {"group": "link", "version": "2"}

;
; CIRCULAR MARTINI DNA
;

[ link ]
resname "DT|DG|DA|DC"
[ atoms ]
BB3 { }
>BB1 { }
[ bonds ]
BB3 >BB1 1 0.35300 10000 {"group": "link-circle", "edge": false}
[ angles ]
BB2 BB3 >BB1 2 102.00000 150 {"group": "backbone-circle", "edge": false}
BB3 >BB1 >BB2 2 106.00000 75 {"group": "backbone-circle", "edge": false}
[ exclusions ]
#meta {"edge": false}
BB2 >BB1
BB3 >BB2
[ dihedrals ]
BB2 BB3 >BB1 >BB2 1 180.00000 2 3 {"group": "link-circle", "edge": false}
BB3 >BB1 >BB2 >BB3 9 85.00000 2 2 {"group": "link-circle", "version": "1", "edge": false}
BB3 >BB1 >BB2 >BB3 9 160.00000 2 3 {"group": "link-circle", "version": "2", "edge": false}
[ edges ]
BB3 >BB1 {"linktype": "circle"}
7 changes: 6 additions & 1 deletion polyply/src/apply_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -489,7 +489,12 @@ def run_molecule(self, meta_molecule):

# take care to remove nodes if there are any scheduled for removal
# we do this here becuase that's more efficent
molecule.remove_nodes_from(self.nodes_to_remove)
if self.nodes_to_remove:
molecule.remove_nodes_from(self.nodes_to_remove)
# make sure the residue graph is updated; this takes care that
# nodes are also removed from the fragment graphs in the
# meta_molecule.nodes['graph'] attribute
meta_molecule.relabel_and_redo_res_graph(mapping={})
# now we add all interactions but not the ones that contain the removed
# nodes
for inter_type in self.applied_links:
Expand Down
106 changes: 106 additions & 0 deletions polyply/src/gen_dna.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Copyright 2022 University of Groningen
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import networkx as nx
from tqdm import tqdm

BASE_LIBRARY = {"DA": "DT", "DT": "DA", "DG": "DC", "DC": "DG",
"DA5": "DT3", "DT5": "DA3", "DG5": "DC3", "DC5": "DG3",
"DA3": "DT5", "DT3": "DA5", "DG3": "DC5", "DC3": "DG5"
}

def _dna_edge_iterator(meta_molecule, source):
"""
Traversal method for DNA specifc meta-molecule.
Should become obsolete once vermouth commits to
directed graphs.
"""
first_node = source
count = 0
while True:
neighbors = meta_molecule.neighbors(source)
src_resid = meta_molecule.nodes[source]["resid"]
for next_node in neighbors:
next_resid = meta_molecule.nodes[next_node]["resid"]
diff = src_resid - next_resid
if diff == 1 :
yield (source, next_node)
source = next_node
break

if next_resid > src_resid and next_node == first_node:
yield (source, next_node)
return
else:
return

def complement_dsDNA(meta_molecule):
"""
Given a meta-molecule, which represents the residue graph
of a single strand of dsDNA add the complementray strand
to get a meta-molecule with two disconnected strands. The
update of the meta_molecule is done in place.
By convention the other strand is generate 5'-> 3' end,
meaning that the last residue of the first strand aligns
with the first residue of the second strand.
Parameters
----------
meta_molecule: :class:`polyply.src.meta_molecule.MetaMolecule`
Raises
------
IOError
when the resname does not match any of the know base-pair
names an error is raised.
"""
last_node = list(meta_molecule.nodes)[-1]
resname = BASE_LIBRARY[meta_molecule.nodes[last_node]["resname"]]
meta_molecule.add_monomer(last_node+1, resname, [])

correspondance = {last_node: last_node+1}
total = last_node+1

pbar = tqdm(total=len(meta_molecule.nodes))
for prev_node, next_node in _dna_edge_iterator(meta_molecule, source=last_node):
try:
resname = BASE_LIBRARY[meta_molecule.nodes[next_node]["resname"]]
except KeyError:
msg = ("Trying to complete a dsDNA strand. However, resname {resname} with resid {resid} "
"does not match any of the known base-pair resnames. Note that polyply "
"at the moment only allows base-pair completion for molecules that only "
"consist of dsDNA. Please conact the developers if you whish to create a "
"more complicated molecule.")

resname = meta_molecule.nodes[next_node]["resname"]
resid = meta_molecule.nodes[next_node]["resid"]
raise IOError(msg.format(resname=resname, resid=resid))

if next_node not in correspondance:
new_node = total + 1
meta_molecule.add_monomer(new_node, resname, [(correspondance[prev_node], new_node)])
correspondance[next_node] = new_node
else:
new_node = correspondance[next_node]
meta_molecule.add_edge(correspondance[prev_node], new_node)

# make sure that edge attributes are carried over
for attr, value in meta_molecule.edges[(prev_node, next_node)].items():
meta_molecule.edges[(correspondance[prev_node], new_node)][attr] = value

total += 1
pbar.update(1)
pbar.close()
return meta_molecule

16 changes: 9 additions & 7 deletions polyply/src/gen_itp.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
from polyply import (MetaMolecule, ApplyLinks, Monomer, MapToMolecule)
from polyply.src.graph_utils import find_missing_edges
from .load_library import load_ff_library
from .gen_dna import complement_dsDNA

LOGGER = StyleAdapter(get_logger(__name__))

Expand All @@ -59,7 +60,7 @@ def split_seq_string(sequence):
monomers.append(Monomer(resname=resname, n_blocks=n_blocks))
return monomers

def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[], lib=None, seq=None, seq_file=None):
def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[], lib=None, seq=None, seq_file=None, dsdna=False):
"""
Top level function for running the polyply parameter generation.
Parameters seq and seq_file are mutually exclusive. Set the other
Expand Down Expand Up @@ -96,19 +97,20 @@ def gen_params(name="polymer", outpath=Path("polymer.itp"), inpath=[], lib=None,
LOGGER.info("reading sequence from file", type="step")
meta_molecule = MetaMolecule.from_sequence_file(force_field, seq_file, name)

# Generate complementary DNA strand
if dsdna:
complement_dsDNA(meta_molecule)

# Do transformationa and apply link
LOGGER.info("mapping sequence to molecule", type="step")
meta_molecule = MapToMolecule(force_field).run_molecule(meta_molecule)
LOGGER.info("applying links between residues", type="step")
meta_molecule = ApplyLinks().run_molecule(meta_molecule)

# Raise warning if molecule is disconnected
if not nx.is_connected(meta_molecule.molecule):
LOGGER.warning("Your molecule consists of disjoint parts."
"Perhaps links were not applied correctly.")
msg = "Missing link between residue {idxA} {resA} and residue {idxB} {resB}"
for missing in find_missing_edges(meta_molecule, meta_molecule.molecule):
LOGGER.warning(msg, **missing)
msg = "Missing a link between residue {idxA} {resA} and residue {idxB} {resB}."
for missing in find_missing_edges(meta_molecule, meta_molecule.molecule):
LOGGER.warning(msg, **missing)

with deferred_open(outpath, 'w') as outfile:
header = [ ' '.join(sys.argv) + "\n" ]
Expand Down
19 changes: 10 additions & 9 deletions polyply/src/meta_molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ def __init__(self, *args, **kwargs):
self.__search_tree = None
self.root = None
self.dfs = False
self.max_resid = 0

# add resids to polyply meta-molecule nodes if they are not
# present. All algorithms rely on proper resids
Expand All @@ -117,6 +118,13 @@ def __init__(self, *args, **kwargs):
msg = "Couldn't add 1 to node. Either provide resids or use integers as node keys."
raise IOError(msg)

if self.max_resid < self.nodes[node]["resid"]:
self.max_resid = self.nodes[node]["resid"]

def add_node(self, *args, **kwargs):
self.max_resid += 1
kwargs["resid"] = self.max_resid
super().add_node(*args, **kwargs)

def add_monomer(self, current, resname, connections):
"""
Expand All @@ -125,14 +133,7 @@ def add_monomer(self, current, resname, connections):
that matches may only refer to already existing nodes.
But connections can be an empty list.
"""
resids = nx.get_node_attributes(self, "resid")

if resids:
resid = max(resids.values()) + 1
else:
resid = 1

self.add_node(current, resname=resname, resid=resid, build=True, backmap=True)
self.add_node(current, resname=resname, build=True, backmap=True)
for edge in connections:
if self.has_node(edge[0]) and self.has_node(edge[1]):
self.add_edge(edge[0], edge[1])
Expand All @@ -155,7 +156,7 @@ def relabel_and_redo_res_graph(self, mapping):
mapping of node-key to new residue name
"""
# find the maximum resiude id
max_resid = max(nx.get_node_attributes(self.molecule, "resid").values())
max_resid = self.max_resid
# resname the residues and increase with pseudo-resid
for node, resname in mapping.items():
self.molecule.nodes[node]["resname"] = resname
Expand Down
5 changes: 4 additions & 1 deletion polyply/src/simple_seq_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def _identify_residues(comments):

if "RNA" in comment:
RNA = True

if "PROTEIN" in comment:
AA = True

Expand Down Expand Up @@ -253,6 +253,9 @@ def parse_ig(filepath):
raise FileFormatError(msg)

DNA, RNA, AA = _identify_residues(comments)
if set(clean_lines[0]).issubset(set(['A', 'C', 'G', 'T'])):
LOGGER.warning("Found only the letters A, C, G, T on first line. Are you missing the title line in your .ig file?")

seq_graph = _parse_plain(clean_lines[1:], DNA=DNA, RNA=RNA, AA=AA)

if ter_char == '2':
Expand Down
Loading

0 comments on commit 84d0c25

Please sign in to comment.