CompOmics · RalfG · Jun 11, 2025
diff --git a/psm_utils/peptidoform.py b/psm_utils/peptidoform.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from collections import defaultdict
-from typing import Iterable, List, Tuple, Union
+from typing import Iterable, List, Tuple, TypedDict, Union, cast
 
 import numpy as np
 from pyteomics import mass, proforma
@@ -29,8 +29,10 @@ def __init__(self, proforma_sequence: Union[str, proforma.ProForma]) -> None:
         ----------
         parsed_sequence : list
             List of tuples with residue and modifications for each location.
-        properties : dict[str, Any]
-            Dict with sequence-wide properties.
+        properties : :py:class:`PeptidoformProperties`
+            Dictionary with properties of the peptidoform, including N- and C-terminal
+            modifications, unlocalized modifications, labile modifications, fixed
+            modifications, and charge state.
 
         Examples
         --------
@@ -39,6 +41,10 @@ def __init__(self, proforma_sequence: Union[str, proforma.ProForma]) -> None:
         711.2567622919099
 
         """
+        self.parsed_sequence: List[Tuple[str, List[proforma.TagBase] | None]]
+        self.properties: PeptidoformProperties
+
+        # Parse ProForma
         if isinstance(proforma_sequence, str):
             try:
                 self.parsed_sequence, self.properties = proforma.parse(proforma_sequence)
@@ -66,13 +72,21 @@ def __str__(self) -> str:
     def __hash__(self) -> int:
         return hash(self.proforma)
 
-    def __eq__(self, __o: Union[Peptidoform, str]) -> bool:
+    def __eq__(self, __o: object) -> bool:
         if isinstance(__o, str):
             return self.proforma == __o
-        elif isinstance(__o, Peptidoform):
+        elif isinstance(__o, Peptidoform):  # type: ignore[return]
             return self.proforma == __o.proforma
         else:
-            raise TypeError(f"Cannot compare {type(__o)} with Peptidoform.")
+            raise TypeError(f"Unsupported comparison type for Peptidoform: {type(__o)}")
+
+    def __lt__(self, __o: object) -> bool:
+        if isinstance(__o, str):
+            return self.proforma < __o
+        elif isinstance(__o, Peptidoform):
+            return self.proforma < __o.proforma
+        else:
+            raise TypeError(f"Unsupported comparison type for Peptidoform: {type(__o)}")
 
     def __iter__(self) -> Iterable[Tuple[str, Union[None, List[proforma.TagBase]]]]:
         return self.parsed_sequence.__iter__()
@@ -188,8 +202,9 @@ def sequential_composition(self) -> list[mass.Composition]:
         # Get compositions for fixed modifications by amino acid
         fixed_rules = {}
         for rule in self.properties["fixed_modifications"]:
-            for aa in rule.targets:
-                fixed_rules[aa] = rule.modification_tag.composition
+            if rule.targets is not None:
+                for aa in rule.targets:
+                    fixed_rules[aa] = rule.modification_tag.composition
 
         comp_list = []
 
@@ -220,6 +235,7 @@ def sequential_composition(self) -> list[mass.Composition]:
             # Localized modifications
             if tags:
                 for tag in tags:
+                    tag = cast(proforma.ModificationBase, tag)
                     try:
                         position_comp += tag.composition
                     except (AttributeError, KeyError) as e:
@@ -275,7 +291,7 @@ def composition(self) -> mass.Composition:
         return comp
 
     @property
-    def sequential_theoretical_mass(self) -> float:
+    def sequential_theoretical_mass(self) -> list[float]:
         """
         Monoisotopic mass of both termini and each (modified) residue.
 
@@ -296,8 +312,9 @@ def sequential_theoretical_mass(self) -> float:
         """
         fixed_rules = {}
         for rule in self.properties["fixed_modifications"]:
-            for aa in rule.targets:
-                fixed_rules[aa] = rule.modification_tag.mass
+            if rule.targets is not None:
+                for aa in rule.targets:
+                    fixed_rules[aa] = rule.modification_tag.mass
 
         mass_list = []
 
@@ -326,6 +343,7 @@ def sequential_theoretical_mass(self) -> float:
             # Localized modifications
             if tags:
                 for tag in tags:
+                    tag = cast(proforma.ModificationBase, tag)
                     try:
                         position_mass += tag.mass
                     except (AttributeError, KeyError) as e:
@@ -496,15 +514,14 @@ def add_fixed_modifications(
 
         """
         if isinstance(modification_rules, dict):
-            modification_rules = modification_rules.items()
-        modification_rules = [
+            modification_rules = list(modification_rules.items())
+
+        parsed_modification_rules = [
             proforma.ModificationRule(proforma.process_tag_tokens(mod), targets)
             for mod, targets in modification_rules
         ]
-        if self.properties["fixed_modifications"]:
-            self.properties["fixed_modifications"].extend(modification_rules)
-        else:
-            self.properties["fixed_modifications"] = modification_rules
+
+        self.properties.setdefault("fixed_modifications", []).extend(parsed_modification_rules)
 
     def apply_fixed_modifications(self):
         """
@@ -530,8 +547,9 @@ def apply_fixed_modifications(self):
             # Setup target_aa -> modification_list dictionary
             rule_dict = defaultdict(list)
             for rule in self.properties["fixed_modifications"]:
-                for target_aa in rule.targets:
-                    rule_dict[target_aa].append(rule.modification_tag)
+                if rule.targets is not None:
+                    for target_aa in rule.targets:
+                        rule_dict[target_aa].append(rule.modification_tag)
 
             # Apply modifications to sequence
             for i, (aa, site_mods) in enumerate(self.parsed_sequence):
@@ -553,6 +571,18 @@ def apply_fixed_modifications(self):
             self.properties["fixed_modifications"] = []
 
 
+class PeptidoformProperties(TypedDict):
+    """Property items of a :py:class:`Peptidoform`."""
+
+    n_term: list[proforma.ModificationBase] | None
+    c_term: list[proforma.ModificationBase] | None
+    unlocalized_modifications: list[proforma.ModificationBase]
+    labile_modifications: list[proforma.ModificationBase]
+    fixed_modifications: list[proforma.ModificationRule]
+    charge_state: proforma.ChargeState
+    isotopes: list[proforma.StableIsotope]
+
+
 def format_number_as_string(num):
     """Format number as string for ProForma mass modifications."""
     # Using this method over `:+g` string formatting to avoid rounding and scientific notation

diff --git a/psm_utils/psm.py b/psm_utils/psm.py
@@ -10,8 +10,8 @@
 class PSM(BaseModel):
     """Data class representing a peptide-spectrum match (PSM)."""
 
-    peptidoform: Union[Peptidoform, str]
-    spectrum_id: Union[str]
+    peptidoform: Union[Peptidoform, str]  # type: ignore
+    spectrum_id: str
     run: Optional[str] = None
     collection: Optional[str] = None
     spectrum: Optional[Any] = None
@@ -89,25 +89,30 @@ def __init__(self, **data):
         super().__init__(**data)
         # Parse peptidoform
         if isinstance(self.peptidoform, str):
-            self.peptidoform = Peptidoform(self.peptidoform)
+            self.peptidoform: Peptidoform = Peptidoform(self.peptidoform)
         elif not isinstance(self.peptidoform, Peptidoform):
             raise TypeError(
                 f"Peptidoform or str expected for `peptidoform`, not `{type(self.peptidoform)}`."
             )
 
-    def __getitem__(self, item) -> any:
+    def __getitem__(self, item) -> Any:
         return getattr(self, item)
 
-    def __setitem__(self, item, value: any) -> None:
+    def __setitem__(self, item, value: Any) -> None:
         setattr(self, item, value)
 
     @property
     def precursor_mz_error(self) -> float:
         """Difference between observed and theoretical m/z in Da."""
         theoretical_mz = self.peptidoform.theoretical_mz
+        if theoretical_mz is None or self.precursor_mz is None:
+            raise ValueError(
+                "Cannot calculate precursor m/z error: "
+                "precursor m/z is not set or theoretical m/z cannot be calculated."
+            )
         return self.precursor_mz - theoretical_mz
 
-    def get_precursor_charge(self) -> int:
+    def get_precursor_charge(self) -> int | None:
         """Precursor charge, as embedded in :py:attr:`PSM.peptidoform`."""
         return self.peptidoform.precursor_charge
 

diff --git a/psm_utils/psm_list.py b/psm_utils/psm_list.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import re
-from typing import Iterable, List, Sequence
+from typing import Iterator, List, Sequence, cast
 
 import numpy as np
 import pandas as pd
@@ -83,13 +83,13 @@ def __str__(self):
     def __add__(self, other):
         return PSMList(psm_list=self.psm_list + other.psm_list)
 
-    def __iter__(self) -> Iterable[PSM]:
+    def __iter__(self) -> Iterator[PSM]:  # type: ignore[override]
         return self.psm_list.__iter__()
 
     def __len__(self) -> int:
         return self.psm_list.__len__()
 
-    def __getitem__(self, item) -> PSM | list[PSM]:
+    def __getitem__(self, item) -> PSM | PSMList | np.ndarray:
         if isinstance(item, (int, np.integer)):
             # Return single PSM by index
             return self.psm_list[item]
@@ -127,16 +127,18 @@ def __setitem__(self, item, values: Sequence) -> None:
     @property
     def collections(self) -> list:
         """List of collections in :py:class:`PSMList`."""
-        if (self["collection"] != None).any():  # noqa: E711
-            return list(np.unique(self["collection"]))
+        collection_array = np.asarray(self["collection"])
+        if (collection_array != None).any():  # noqa: E711
+            return np.unique(collection_array).tolist()
         else:
             return [None]
 
     @property
     def runs(self) -> list:
         """List of runs in :py:class:`PSMList`."""
-        if (self["run"] != None).any():  # noqa: E711
-            return list(np.unique(self["run"]))
+        run_array = np.asarray(self["run"])
+        if (run_array != None).any():  # noqa: E711
+            return np.unique(run_array).tolist()
         else:
             return [None]
 
@@ -168,14 +170,14 @@ def set_ranks(self, lower_score_better: bool = False):
         """Set identification ranks for all PSMs in :py:class:`PSMList`."""
         columns = ["collection", "run", "spectrum_id", "score"]
         self["rank"] = (
-            pd.DataFrame(self[columns], columns=columns)
+            pd.DataFrame(np.array([self[c] for c in columns]).transpose(), columns=columns)
             .sort_values("score", ascending=lower_score_better)
             .fillna(0)  # groupby does not play well with None values
             .groupby(["collection", "run", "spectrum_id"])
             .cumcount()
             .sort_index()
             + 1  # 1-based counting
-        )
+        ).to_list()
 
     def get_rank1_psms(self, *args, **kwargs) -> PSMList:
         """
@@ -184,9 +186,10 @@ def get_rank1_psms(self, *args, **kwargs) -> PSMList:
         First runs :py:meth:`~set_ranks` with ``*args`` and ``**kwargs`` if if any PSM
         has no rank yet.
         """
-        if None in self["rank"]:
+        rank_array = np.asarray(self["rank"])
+        if None in rank_array:
             self.set_ranks(*args, **kwargs)
-        return self[self["rank"] == 1]
+        return PSMList(psm_list=[self.psm_list[i] for i in np.flatnonzero(rank_array == 1)])
 
     def find_decoys(self, decoy_pattern: str) -> None:
         """
@@ -211,9 +214,12 @@ def find_decoys(self, decoy_pattern: str) -> None:
         >>> psm_list.find_decoys(r"^DECOY_")
 
         """
-        decoy_pattern = re.compile(decoy_pattern)
+        pattern = re.compile(decoy_pattern)
         for psm in self:
-            psm.is_decoy = all([decoy_pattern.search(p) is not None for p in psm.protein_list])
+            if psm.protein_list is not None:
+                psm.is_decoy = all(pattern.search(p) is not None for p in psm.protein_list)
+            else:
+                psm.is_decoy = None
 
     def calculate_qvalues(self, reverse: bool = True, **kwargs) -> None:
         """
@@ -233,7 +239,7 @@ def calculate_qvalues(self, reverse: bool = True, **kwargs) -> None:
 
         """
         for key in ["score", "is_decoy"]:
-            if (self[key] == None).any():  # noqa: E711 (self[key] is a Numpy array)
+            if (np.asarray(self[key]) == None).any():  # noqa: E711 (self[key] is a Numpy array)
                 raise ValueError(
                     f"Cannot calculate q-values if not all PSMs have `{key}` assigned."
                 )
@@ -294,16 +300,17 @@ def add_fixed_modifications(
 
         """
         if isinstance(modification_rules, dict):
-            modification_rules = modification_rules.items()
-        modification_rules = [
+            modification_rules = list(modification_rules.items())
+
+        parsed_modification_rules = [
             proforma.ModificationRule(proforma.process_tag_tokens(mod), targets)
             for mod, targets in modification_rules
         ]
+
         for psm in self.psm_list:
-            if psm.peptidoform.properties["fixed_modifications"]:
-                psm.peptidoform.properties["fixed_modifications"].extend(modification_rules)
-            else:
-                psm.peptidoform.properties["fixed_modifications"] = modification_rules
+            psm.peptidoform.properties.setdefault("fixed_modifications", []).extend(  # type: ignore[union-attr]
+                cast(list, parsed_modification_rules)
+            )
 
     def apply_fixed_modifications(self):
         """