Skip to content

Commit

Permalink
Set sequence offset on mmCIF file read
Browse files Browse the repository at this point in the history
When reading an mmCIF file, set each chain's
sequence offset using the internal and author
provided numbering of its first residue (rather
than leaving it as zero). This gives us a better
chance of having the correct seq_id in any future
output mmCIF file.
  • Loading branch information
benmwebb committed Aug 23, 2024
1 parent 770099c commit a92c68b
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 3 deletions.
16 changes: 13 additions & 3 deletions modules/atom/src/mmcif.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,9 @@ class AtomSiteCategory : public Category {
return true;
}

void get_chain_particle(const std::string &chain,
bool get_chain_particle(const std::string &chain,
const std::string &label_asym_id) {
bool new_chain = false;
if (cp_ == nullptr || chain != curr_chain_) {
curr_chain_ = chain;
std::pair<Particle *, std::string> root_chain(root_p_, chain);
Expand All @@ -144,11 +145,13 @@ class AtomSiteCategory : public Category {
Chain(cp_).set_label_asym_id(label_asym_id);
Hierarchy(root_p_).add_child(Chain(cp_));
chain_map_[root_chain] = cp_;
new_chain = true;
} else {
cp_ = chain_map_[root_chain];
}
rp_ = nullptr; // make sure we get a new residue
}
return new_chain;
}

// Replace at most maxlen chars in dest, starting at pos, with repl
Expand Down Expand Up @@ -178,10 +181,11 @@ class AtomSiteCategory : public Category {

// Use author-provided chain ID if available
std::string label_asym_id = chain_.as_str();
bool new_chain;
if (strlen(auth_chain_.as_str()) > 0) {
get_chain_particle(auth_chain_.as_str(), label_asym_id);
new_chain = get_chain_particle(auth_chain_.as_str(), label_asym_id);
} else {
get_chain_particle(label_asym_id, label_asym_id);
new_chain = get_chain_particle(label_asym_id, label_asym_id);
}
std::string auth_seq_id_str = auth_seq_id_.as_str();
// Check if new residue
Expand All @@ -200,6 +204,12 @@ class AtomSiteCategory : public Category {
// if auth_seq_id is blank, use seq_id instead
if (endptr == start) auth_seq_id = seq_id;
char one_icode = 32; // default insertion code (space)

// Set the chain's sequence offset based on the first residue numbering
if (new_chain) {
Chain(cp_).set_sequence_offset(auth_seq_id - seq_id);
}

// if auth_seq_id is not blank and contains something after the number,
// use the first character of that as the insertion code
if (endptr != start && *endptr) {
Expand Down
10 changes: 10 additions & 0 deletions modules/atom/test/test_mmcif.py
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,16 @@ def test_chain_selector_multi_char(self):
for x in IMP.atom.get_by_type(mp, IMP.atom.CHAIN_TYPE)]
self.assertEqual([c.get_id() for c in chains], ['ZB'])

def test_chain_read_offset(self):
"""Check reading of chain sequence offset from an mmCIF file"""
m = IMP.Model()

mp = IMP.atom.read_mmcif(self.get_input_file_name('chaintest.cif'), m,
IMP.atom.ChainPDBSelector(["ZK"]))
chains = [IMP.atom.Chain(x)
for x in IMP.atom.get_by_type(mp, IMP.atom.CHAIN_TYPE)]
self.assertEqual(chains[0].get_sequence_offset(), 286)


if __name__ == '__main__':
IMP.test.main()

0 comments on commit a92c68b

Please sign in to comment.