Skip to content

Commit

Permalink
Allow reading starting models in BinaryCIF format
Browse files Browse the repository at this point in the history
  • Loading branch information
benmwebb committed Feb 1, 2025
1 parent e5d5aad commit 62d4b3f
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 12 deletions.
17 changes: 9 additions & 8 deletions pyext/src/topology/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
like Molecule.get_atomic_residues() and Molecule.get_non_atomic_residues().
These functions all return Python sets for easy set arithmetic using
& (and), | (or), - (difference)
* Molecule.add_structure() to add structural information from an mmCIF
or PDB file.
* Molecule.add_structure() to add structural information from an mmCIF,
BinaryCIF, or PDB file.
* Molecule.add_representation() to create a representation unit - here you
can choose bead resolutions as well as alternate representations like
densities or ideal helices.
Expand Down Expand Up @@ -556,7 +556,7 @@ def add_structure(self, pdb_fn, chain_id, res_range=[],
soft_check=False):
"""Read a structure and store the coordinates.
@return the atomic residues (as a set)
@param pdb_fn The file to read (in PDB or mmCIF format)
@param pdb_fn The file to read (in PDB, mmCIF or BinaryCIF format)
@param chain_id Chain ID to read
@param res_range Add only a specific set of residues from the PDB
file. res_range[0] is the starting and res_range[1]
Expand Down Expand Up @@ -1378,11 +1378,12 @@ class TopologyReader:
of "cop9" and UniProt accession of "Q13098". If such an accession is
present, it is added to the generated structure (and ultimately
recorded in any output RMF file).
- `pdb_fn`: Name of PDB or mmCIF file with coordinates (if available).
If left empty, will set up as BEADS (you can also specify "BEADS")
Can also write "IDEAL_HELIX".
- `chain`: Chain ID of this domain in the PDB or mmCIF file. This is
the "author-provided" chain ID for mmCIF files, not the asym_id.
- `pdb_fn`: Name of PDB, mmCIF, or BinaryCIF file with coordinates
(if available). If left empty, will set up as BEADS (you can also
specify "BEADS") Can also write "IDEAL_HELIX".
- `chain`: Chain ID of this domain in the PDB, mmCIF or BinaryCIF file.
This is the "author-provided" chain ID for mmCIF or BinaryCIF files,
not the asym_id.
- `residue_range`: Comma delimited pair defining range.
Can leave empty or use 'all' for entire sequence from PDB file.
The second item in the pair can be END to select the last residue in the
Expand Down
7 changes: 5 additions & 2 deletions pyext/src/topology/system_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def get_structure(model, pdb_fn, chain_id, res_range=None, offset=0,
model_num=None, ca_only=False):
"""read a structure from a PDB file and return a list of residues
@param model The IMP model
@param pdb_fn The file to read (in traditional PDB or mmCIF format)
@param pdb_fn The file to read (in mmCIF, BinaryCIF, or legacy PDB format)
@param chain_id Chain ID to read
@param res_range Add only a specific set of residues.
res_range[0] is the starting and res_range[1] is the ending
Expand All @@ -66,10 +66,13 @@ def get_structure(model, pdb_fn, chain_id, res_range=None, offset=0,
@param model_num Read multi-model PDB and return that model (0-based index)
@param ca_only Read only CA atoms (by default, all non-waters are read)
"""
# Read file in mmCIF format if requested
# Read file in mmCIF or BinaryCIF format if requested
if pdb_fn.endswith('.cif'):
read_file = IMP.atom.read_mmcif
read_multi_file = IMP.atom.read_multimodel_mmcif
elif pdb_fn.endswith('.bcif'):
read_file = IMP.atom.read_bcif
read_multi_file = IMP.atom.read_multimodel_bcif
else:
read_file = IMP.atom.read_pdb
read_multi_file = IMP.atom.read_multimodel_pdb
Expand Down
Binary file added test/input/prot.bcif
Binary file not shown.
2 changes: 2 additions & 0 deletions test/input/topology_bcif.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
|molecule_name | color | fasta_fn | fasta_id | pdb_fn | chain | residue_range | pdb_offset | bead_size | em_residues_per_gaussian | rigid_body | super_rigid_body | chain_of_super_rigid_bodies |
|Prot1 |blue |seqs.fasta|Protein_1|prot.bcif |A|55,65 |-54 |5|10|1|1,2| |
15 changes: 13 additions & 2 deletions test/test_topology_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,8 +363,20 @@ def test_build_system_mmcif(self):
import sklearn
except ImportError:
self.skipTest("no sklearn package")
mdl = IMP.Model()
tfile = self.get_input_file_name('topology_mmcif.txt')
self._internal_test_build_system(tfile)

def test_build_system_binary_cif(self):
"""Test BuildSystem macro with BinaryCIF input files"""
try:
import sklearn
except ImportError:
self.skipTest("no sklearn package")
tfile = self.get_input_file_name('topology_bcif.txt')
self._internal_test_build_system(tfile)

def _internal_test_build_system(self, tfile):
mdl = IMP.Model()
input_dir = os.path.dirname(tfile)
t = IMP.pmi.topology.TopologyReader(tfile,
pdb_dir=input_dir,
Expand All @@ -381,6 +393,5 @@ def test_build_system_mmcif(self):
self.assertEqual(len(sel1), 7 + 2 )



if __name__=="__main__":
IMP.test.main()

0 comments on commit 62d4b3f

Please sign in to comment.