Skip to content

Commit

Permalink
Fix compatibility with MassModifications encoder.py; add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
RalfG committed Aug 30, 2023
1 parent 29521f9 commit 5725c26
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 44 deletions.
31 changes: 16 additions & 15 deletions ms2pip/_utils/encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@

class Encoder:
"""Modification-aware encoding of peptidoforms."""

def __init__(self) -> None:
"""
Modification-aware encoding of peptidoforms.
Expand Down Expand Up @@ -98,7 +99,7 @@ def __exit__(self, exc_type, exc_value, traceback):
self.remove_encoder_files()

def __repr__(self) -> str:
return "{}.{}({})".format(
return "{}.{}(modifications={})".format(
self.__class__.__module__,
self.__class__.__qualname__,
self.modifications,
Expand Down Expand Up @@ -168,7 +169,7 @@ def _configure_modification(self, target: str, modification: proforma.TagBase):
logger.warning(f"Skipping modification for invalid amino acid: {target}")
return None

self.modifications[(target, modification.key)] = {
self.modifications[(target, str(modification))] = {
"mod_id": self._next_mod_id,
"mass_shift": modification.mass,
"amino_acid": target,
Expand All @@ -180,42 +181,42 @@ def _configure_modification(self, target: str, modification: proforma.TagBase):
def _configure_from_peptidoform(self, peptidoform: Peptidoform):
"""Configure encoder with modifications from single Peptidoform."""
# Get unique modifications from psm
unique_modifications = dict()
try:
unique_modifications = set()
for aa, mods in peptidoform.parsed_sequence:
if mods:
unique_modifications.update([(aa, mod) for mod in mods])
unique_modifications.update({(aa, str(mod)): mod for mod in mods})
for term in ["n_term", "c_term"]:
if peptidoform.properties[term]:
unique_modifications.update(
[(term, mod) for mod in peptidoform.properties[term]]
{(term, str(mod)): mod for mod in peptidoform.properties[term]}
)
except KeyError as e:
raise exceptions.UnresolvableModificationError(e.args[0]) from e

# Add modification entries
for target, mod in unique_modifications:
for (target, _), mod in unique_modifications.items():
self._configure_modification(target, mod)

def _configure_from_psm_list(self, psm_list: PSMList):
"""Configure encoder with modifications from PSMList."""
# Get unique modifications from psm_list
unique_modifications = dict()
try:
unique_modifications = set()
for psm in psm_list:
for aa, mods in psm.peptidoform.parsed_sequence:
if mods:
unique_modifications.update([(aa, mod) for mod in mods])
unique_modifications.update({(aa, str(mod)): mod for mod in mods})
for term in ["n_term", "c_term"]:
if psm.peptidoform.properties[term]:
unique_modifications.update(
[(term, mod) for mod in psm.peptidoform.properties[term]]
{(term, str(mod)): mod for mod in psm.peptidoform.properties[term]}
)
except KeyError as e:
raise exceptions.UnresolvableModificationError(e.args[0]) from e

# Add modification entries
for target, mod in unique_modifications:
for (target, _), mod in unique_modifications.items():
self._configure_modification(target, mod)

def write_encoder_files(self) -> str:
Expand Down Expand Up @@ -295,8 +296,8 @@ def encode_peptidoform(self, peptidoform: Peptidoform) -> np.ndarray:

def _generate_encoding(peptidoform) -> Generator[int, None, None]:
if peptidoform.properties["n_term"]:
mod_key = peptidoform.properties["n_term"][0].key
yield self.modifications["n_term", mod_key]["mod_id"]
mod_str = str(peptidoform.properties["n_term"][0])
yield self.modifications["n_term", mod_str]["mod_id"]
else:
yield 0

Expand All @@ -305,15 +306,15 @@ def _generate_encoding(peptidoform) -> Generator[int, None, None]:
if not mods:
yield AMINO_ACID_IDS[aa]
else:
yield self.modifications[aa, mods[0].key]["mod_id"]
yield self.modifications[aa, str(mods[0])]["mod_id"]
except KeyError as e:
raise exceptions.InvalidAminoAcidError(
f"Unsupported amino acid found in peptide `{peptidoform.proforma}`"
) from e

if peptidoform.properties["c_term"]:
mod_key = peptidoform.properties["c_term"][0].key
yield self.modifications["c_term", mod_key]["mod_id"]
mod_str = str(peptidoform.properties["c_term"][0])
yield self.modifications["c_term", mod_str]["mod_id"]
else:
yield 0

Expand Down
91 changes: 91 additions & 0 deletions tests/test_encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import pytest
from psm_utils import Peptidoform, PSM, PSMList

from ms2pip._utils.encoder import Encoder


class TestEncoder:
def test_from_peptidoform(self):
test_cases = [
# Peptidoform, {(target, label): (amino_acid, amino_acid_id, mass_shift)}
("ACDEK", {}),
("AC[+57.021464]DEK", {("C", "+57.021464"): ("C", 1, 57.021464)}),
("AC[U:4]", {("C", "UNIMOD:4"): ("C", 1, 57.021464)}),
("AC[formula:H3C2NO]", {("C", "Formula:H3C2NO"): ("C", 1, 57.021464)}),
("[Acetyl]-ACDE", {("n_term", "Acetyl"): ("n_term", -1, 42.010565)}),
("ACDE-[Amidated]", {("c_term", "Amidated"): ("c_term", -2, -0.984016)}),
(
"AC[+57.021464]DE-[Amidated]",
{
("C", "+57.021464"): ("C", 1, 57.021464),
("c_term", "Amidated"): ("c_term", -2, -0.984016),
},
),
(
"[Acetyl]-AC[+57.021464]DE",
{
("n_term", "Acetyl"): ("n_term", -1, 42.010565),
("C", "+57.021464"): ("C", 1, 57.021464),
},
),
]

for peptidoform, expected_mods in test_cases:
encoder = Encoder.from_peptidoform(Peptidoform(peptidoform))
for key, modification in encoder.modifications.items():
for item_key, expected_item in zip(
["amino_acid", "amino_acid_id", "mass_shift"], expected_mods[key]
):
if isinstance(expected_item, float):
assert modification[item_key] == pytest.approx(expected_item)
else:
assert modification[item_key] == expected_item

def test_from_psm_list(self):
psm_list = PSMList(psm_list=[
PSM(peptidoform="AC[+57.021464]DEK", spectrum_id=0),
PSM(peptidoform="AC[U:4]", spectrum_id=1),
PSM(peptidoform="AC[formula:H3C2NO]", spectrum_id=2),
PSM(peptidoform="[Acetyl]-ACDE", spectrum_id=3),
PSM(peptidoform="ACDE-[Amidated]",spectrum_id= 4)
])
expected = {
("C", "+57.021464"): {
"mod_id": 38,
"mass_shift": 57.021464,
"amino_acid": "C",
"amino_acid_id": 1,
},
("C", "UNIMOD:4"): {
"mod_id": 39,
"mass_shift": 57.021464,
"amino_acid": "C",
"amino_acid_id": 1,
},
("C", "Formula:H3C2NO"): {
"mod_id": 40,
"mass_shift": 57.02146372057,
"amino_acid": "C",
"amino_acid_id": 1,
},
("n_term", "Acetyl"): {
"mod_id": 41,
"mass_shift": 42.010565,
"amino_acid": "n_term",
"amino_acid_id": -1,
},
("c_term", "Amidated"): {
"mod_id": 42,
"mass_shift": -0.984016,
"amino_acid": "c_term",
"amino_acid_id": -2,
},
}

encoder = Encoder.from_psm_list(psm_list)
for modification_key, modification_dict in encoder.modifications.items():
for item_key, expected_item in expected[modification_key].items():
if isinstance(expected_item, float):
assert modification_dict[item_key] == pytest.approx(expected_item)
else:
assert modification_dict[item_key] == expected_item
29 changes: 0 additions & 29 deletions tests/test_modifications.py

This file was deleted.

0 comments on commit 5725c26

Please sign in to comment.