Skip to content

Commit

Permalink
Fix reading/writing CONECT records in PDB files with many atoms (#3670)
Browse files Browse the repository at this point in the history
* disable write conect with more than 100000 atoms

* raise warning if conect is corrupt

* add tests for conect

* changelog

* doc

* move doc to parser

* add new pdb to all

* check every conect entry

* changelog

* move conect check forward

* mapping instead of raw bug

* changelog
  • Loading branch information
yuxuanzhuang authored Jul 24, 2022
1 parent d5c7cb9 commit 9131388
Show file tree
Hide file tree
Showing 6 changed files with 53 additions and 9 deletions.
7 changes: 5 additions & 2 deletions package/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,16 @@ The rules for this file:
* release numbers follow "Semantic Versioning" http://semver.org

------------------------------------------------------------------------------
??/??/?? IAlibay, PicoCentauri, orbeckst, hmacdope, rmeli, miss77jun, rzhao271
??/??/?? IAlibay, PicoCentauri, orbeckst, hmacdope, rmeli, miss77jun, rzhao271,
yuxuanzhuang

* 2.3.0

Fixes
* Fixes awk call in deploy.yaml tests for macos runners (Issue #3693)
* add a 0.5 for correct midpoints in hole analysis (Issue #3715)
* Fix reading error when PDB CONECT records are corrupt. (Issue #988)
* Fix writing unusable PDB CONECT records with index>100000. (Issue #988)

Enhancements
* Add a new `formalcharge` attribute for storing formal charges (PR #3755)
Expand Down Expand Up @@ -51,7 +54,7 @@ Deprecations
06/01/22 IAlibay, BFedder, inomag, Agorfa, aya9aladdin, shudipto-amin, cbouy,
HenokB, umak1106, tamandeeps, Mrqeoqqt, megosato, AnirG, rishu235,
mtiberti, manishsaini6421, Sukeerti1, robotjellyzone, markvrma, alescoulie,
mjtadema, PicoCentauri, Atharva7K, aditi2906, orbeckst, yuxuanzhuang,
mjtadema, PicoCentauri, Atharva7K, aditi2906, orbeckst, yuxuanzhuang,
rsexton2, rafaelpap, richardjgowers, orioncohen

* 2.2.0
Expand Down
19 changes: 13 additions & 6 deletions package/MDAnalysis/coordinates/PDB.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,12 +540,15 @@ class PDBWriter(base.WriterBase):
An indexing issue meant it previously used the first charater (Issue #2224)
.. versionchanged:: 2.0.0
Add the `redindex` argument. Setting this keyword to ``True``
(the default) preserves the behavior in earlier versions of MDAnalysis.
The PDB writer checks for a valid chainID entry instead of using the
last character of segid. Should a chainID not be present, or not
conform to the PDB standard, the default value of 'X' is used.
Add the `redindex` argument. Setting this keyword to ``True``
(the default) preserves the behavior in earlier versions of MDAnalysis.
The PDB writer checks for a valid chainID entry instead of using the
last character of segid. Should a chainID not be present, or not
conform to the PDB standard, the default value of 'X' is used.
.. versionchanged:: 2.3.0
Do not write unusable conect records when ag index
is larger than 100000.
"""
fmt = {
'ATOM': (
Expand Down Expand Up @@ -851,6 +854,10 @@ def _write_pdb_bonds(self):
for a1, a2 in bonds:
if not (a1 in mapping and a2 in mapping):
continue
if mapping[a1] >= 100000 or mapping[a2] >= 100000:
warnings.warn("Atom with index >=100000 cannot write "
"bonds to PDB CONECT records.")
return
con[a2].append(a1)
con[a1].append(a2)

Expand Down
5 changes: 5 additions & 0 deletions package/MDAnalysis/topology/PDBParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,8 @@ class PDBParser(TopologyReaderBase):
Formal charges are now read from PDB files if present. No formalcharge
attribute is created if no formal charges are present in the PDB file.
Any formal charges not set are assumed to have a value of 0.
Raise `UserWarning` instead `RuntimeError`
when CONECT records are corrupt.
"""
format = ['PDB', 'ENT']

Expand All @@ -209,6 +211,9 @@ def parse(self, **kwargs):
except AttributeError:
warnings.warn("Invalid atom serials were present, "
"bonds will not be parsed")
except RuntimeError:
warnings.warn("CONECT records was corrupt, "
"bonds will not be parsed")
else:
# Issue 2832: don't append Bonds if there are no bonds
if bonds:
Expand Down
18 changes: 17 additions & 1 deletion testsuite/MDAnalysisTests/coordinates/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@
PDB_cm, PDB_cm_gz, PDB_cm_bz2,
PDB_mc, PDB_mc_gz, PDB_mc_bz2,
PDB_CRYOEM_BOX, MMTF_NOCRYST,
PDB_HOLE, mol2_molecule, PDB_charges,)
PDB_HOLE, mol2_molecule, PDB_charges,
CONECT_ERROR,)
from numpy.testing import (assert_equal,
assert_array_almost_equal,
assert_almost_equal)
Expand Down Expand Up @@ -657,6 +658,10 @@ def test_conect_bonds_conect(self, tmpdir, conect):
assert_equal(len(u1.atoms), 1890)
assert_equal(len(u1.bonds), 1922)

def test_conect_error(self):
with pytest.warns(UserWarning, match='CONECT records was corrupt'):
u = mda.Universe(CONECT_ERROR)

def test_numconnections(self, multiverse):
u = multiverse

Expand Down Expand Up @@ -748,6 +753,17 @@ def test_write_bonds_partial(tmpdir):
assert len(a_ref.bonds) == len(atom.bonds)


def test_write_bonds_with_100000_ag_index(tmpdir):
u = mda.Universe(CONECT)

ag = u.atoms
ag.ids = ag.ids + 100000

with pytest.warns(UserWarning, match='Atom with index'):
outfile = os.path.join(str(tmpdir), 'test.pdb')
ag.write(outfile, reindex=False)


class TestMultiPDBWriter(object):
# 3 decimals in PDB spec
# http://www.wwpdb.org/documentation/format32/sect9.html#ATOM
Expand Down
11 changes: 11 additions & 0 deletions testsuite/MDAnalysisTests/data/conect_error.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
ATOM 1 N PRO A 1 -12.735 38.918 31.287 1.00 39.83 N
ATOM 2 CA PRO A 1 -12.709 39.097 29.830 1.00 39.29 C
ATOM 3 C PRO A 1 -13.575 38.051 29.162 1.00 39.78 C
ATOM 4 O PRO A 1 -14.097 37.126 29.753 1.00 38.67 O
ATOM 5 CB PRO A 1 -11.243 39.010 29.398 1.00 37.79 C
ATOM 6 CG PRO A 1 -10.636 38.128 30.469 1.00 38.69 C
ATOM 7 CD PRO A 1 -11.368 38.593 31.729 1.00 37.10 C
ATOM 8 H2 PRO A 1 -13.142 39.756 31.758 0.00 15.00 H
ATOM 9 H3 PRO A 1 -13.429 38.158 31.502 0.00 15.00 H
CONECT100000100001100002
END
2 changes: 2 additions & 0 deletions testsuite/MDAnalysisTests/datafiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@
"DMS_DOMAINS", # ADK closed with multiple segids
"DMS_NO_SEGID", # ADK closed with no segids or chains
"CONECT", # HIV Reverse Transcriptase with inhibitor
"CONECT_ERROR", # PDB file with corrupt CONECT
"TRZ", "TRZ_psf",
"TRIC",
"XTC_multi_frame",
Expand Down Expand Up @@ -452,6 +453,7 @@
DMS_NO_SEGID = resource_filename(__name__, 'data/adk_closed_no_segid.dms')

CONECT = resource_filename(__name__, 'data/1hvr.pdb')
CONECT_ERROR = resource_filename(__name__, 'data/conect_error.pdb')

TRZ = resource_filename(__name__, 'data/trzfile.trz')
TRZ_psf = resource_filename(__name__, 'data/trz_psf.psf')
Expand Down

0 comments on commit 9131388

Please sign in to comment.