Skip to content

Commit

Permalink
Address requested changes
Browse files Browse the repository at this point in the history
  • Loading branch information
mobiusklein committed Oct 22, 2024
1 parent 5ecea8d commit 0c408c3
Show file tree
Hide file tree
Showing 14 changed files with 816 additions and 329 deletions.
4 changes: 2 additions & 2 deletions mzspeclib/analyte.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pyteomics import proforma

from mzspeclib.attributes import AttributedEntity, IdentifiedAttributeManager, AttributeManagedProperty, AttributeProxy, AttributeGroupFacet
from mzspeclib.const import (ANALYTE_MIXTURE_TERM, CHARGE_STATE, PROFORMA_ION, PROFORMA_SEQ, STRIPPED_PEPTIDE_SEQ, FIRST_ANALYTE_KEY, FIRST_INTERPRETATION_KEY)
from mzspeclib.const import (ANALYTE_MIXTURE, CHARGE_STATE, PROFORMA_ION, PROFORMA_SEQ, STRIPPED_PEPTIDE_SEQ, FIRST_ANALYTE_KEY, FIRST_INTERPRETATION_KEY)


class _AnalyteMappingProxy(Mapping[str, 'Analyte']):
Expand Down Expand Up @@ -132,7 +132,7 @@ def __init__(self, id, attributes: Iterable = None, analytes: Dict = None, membe

def _update_mixture_members_term(self):
value = sorted(map(int, self.analytes.keys()))
self.replace_attribute(ANALYTE_MIXTURE_TERM, value)
self.replace_attribute(ANALYTE_MIXTURE, value)

def get_analyte(self, analyte_id) -> 'Analyte':
"""Retrieve an analyte by its identifier"""
Expand Down
36 changes: 18 additions & 18 deletions mzspeclib/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@
from mzspeclib.attributes import Attributed, AttributedEntity, AttributeSet, AttributeManagedProperty
from mzspeclib.ontology import _VocabularyResolverMixin
from mzspeclib.const import (
FORMAT_VERSION_TERM,
LIBRARY_NAME_TERM,
LIBRARY_IDENTIFIER_TERM,
LIBRARY_VERSION_TERM,
LIBRARY_URI_TERM,
LIBRARY_DESCRIPTION_TERM,
ANALYTE_MIXTURE_TERM,
FORMAT_VERSION,
LIBRARY_NAME,
LIBRARY_IDENTIFIER,
LIBRARY_VERSION,
LIBRARY_URI,
LIBRARY_DESCRIPTION,
ANALYTE_MIXTURE,
LIBRARY_SPECTRUM_INDEX,
LIBRARY_SPECTRUM_KEY
)
Expand All @@ -40,7 +40,7 @@
logger = logging.getLogger(__name__.rsplit(".", 1)[0])
logger.addHandler(logging.NullHandler())

ANALYTE_MIXTURE_CURIE = ANALYTE_MIXTURE_TERM.split("|")[0]
ANALYTE_MIXTURE_CURIE = ANALYTE_MIXTURE.split("|")[0]

DEFAULT_VERSION = '1.0'

Expand Down Expand Up @@ -98,20 +98,20 @@ def type_for_format(cls, format_or_extension: str) -> Type:

class _LibraryViewMixin:

name = AttributeManagedProperty[str](LIBRARY_NAME_TERM)
identifier = AttributeManagedProperty[str](LIBRARY_IDENTIFIER_TERM)
description = AttributeManagedProperty[str](LIBRARY_DESCRIPTION_TERM)
uri = AttributeManagedProperty[str](LIBRARY_URI_TERM)
library_version = AttributeManagedProperty[str](LIBRARY_VERSION_TERM)
name = AttributeManagedProperty[str](LIBRARY_NAME)
identifier = AttributeManagedProperty[str](LIBRARY_IDENTIFIER)
description = AttributeManagedProperty[str](LIBRARY_DESCRIPTION)
uri = AttributeManagedProperty[str](LIBRARY_URI)
library_version = AttributeManagedProperty[str](LIBRARY_VERSION)

@property
def format_version(self):
try:
value = self.get_attribute(FORMAT_VERSION_TERM)
value = self.get_attribute(FORMAT_VERSION)
return value
except KeyError:
value = DEFAULT_VERSION
self.add_attribute(FORMAT_VERSION_TERM, value)
self.add_attribute(FORMAT_VERSION, value)
return value


Expand Down Expand Up @@ -281,9 +281,9 @@ def _new_cluster(self) -> SpectrumCluster:

def _analyte_interpretation_link(self, spectrum: Spectrum,
interpretation: Interpretation):
if (interpretation.has_attribute(ANALYTE_MIXTURE_TERM) and
if (interpretation.has_attribute(ANALYTE_MIXTURE) and
not interpretation.analytes):
analyte_ids = interpretation.get_attribute(ANALYTE_MIXTURE_TERM)
analyte_ids = interpretation.get_attribute(ANALYTE_MIXTURE)
if isinstance(analyte_ids, str):
term = self.find_term_for(ANALYTE_MIXTURE_CURIE)
analyte_ids = term.value_type(analyte_ids)
Expand Down Expand Up @@ -699,7 +699,7 @@ def _filter_attributes(self, attributes: Attributed,
def _not_analyte_mixture_term(self, attrib):
if attrib:
key = attrib[0]
if key == ANALYTE_MIXTURE_TERM:
if key == ANALYTE_MIXTURE:
return False
return True

Expand Down
47 changes: 31 additions & 16 deletions mzspeclib/backends/bibliospec.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,23 @@
from mzspeclib.analyte import FIRST_ANALYTE_KEY, FIRST_INTERPRETATION_KEY, Analyte
from mzspeclib.spectrum import Spectrum, SPECTRUM_NAME, CHARGE_STATE
from mzspeclib.attributes import AttributeManager, Attributed

from mzspeclib.backends.base import SpectralLibraryBackendBase, FORMAT_VERSION_TERM, DEFAULT_VERSION
from mzspeclib.const import (
CHARGE_STATE,
LIBRARY_IDENTIFIER,
PROFORMA_ION,
LIBRARY_NAME,
SCAN_NUMBER,
SOURCE_FILE,
STRIPPED_PEPTIDE_SEQ,
RETENTION_TIME,
SW_VERSION,
THEORETICAL_MASS,
NUMBER_OF_REPLICATE_SPECTRA_AVAILABLE,
NUMBER_OF_REPLICATE_SPECTRA_USED,
LIBRARY_CREATION_SW,
)

from mzspeclib.backends.base import SpectralLibraryBackendBase, FORMAT_VERSION, DEFAULT_VERSION

from mzspeclib.index.base import IndexBase

Expand Down Expand Up @@ -110,16 +125,16 @@ def __init__(self, filename, **kwargs):

def read_header(self) -> bool:
attribs = AttributeManager()
attribs.add_attribute(FORMAT_VERSION_TERM, DEFAULT_VERSION)
attribs.add_attribute("MS:1003207|library creation software", "Bibliospec")
attribs.add_attribute(FORMAT_VERSION, DEFAULT_VERSION)
attribs.add_attribute(LIBRARY_CREATION_SW, "Bibliospec")

info = self.connection.execute("SELECT * FROM LibInfo;").fetchone()
library_id = info['libLSID']
_, pfx_name = library_id.split("bibliospec:")
_, name = pfx_name.split(":", 1)
attribs.add_attribute("MS:1003188|library name", name)
attribs.add_attribute("MS:1003187|library identifier", library_id)
attribs.add_attribute("MS:1003200|software version", f"{info['majorVersion']}.{info['minorVersion']}")
attribs.add_attribute(LIBRARY_NAME, name)
attribs.add_attribute(LIBRARY_IDENTIFIER, library_id)
attribs.add_attribute(SW_VERSION, f"{info['majorVersion']}.{info['minorVersion']}")
self.attributes = attribs
return True

Expand All @@ -131,10 +146,10 @@ def _populate_analyte(self, analyte: Analyte, row: Mapping):
Bibliospec only stores modifications as delta masses.
"""
peptide = self._correct_modifications_in_sequence(row)
analyte.add_attribute("MS:1003169|proforma peptidoform sequence", str(peptide))
analyte.add_attribute("MS:1001117|theoretical mass", peptide.mass)
analyte.add_attribute("MS:1000888|stripped peptide sequence", row['peptideSeq'])
analyte.add_attribute(CHARGE_STATE, row['precursorCharge'])
peptide.charge_state = row['precursorCharge']
analyte.add_attribute(PROFORMA_ION, str(peptide))
analyte.add_attribute(THEORETICAL_MASS, peptide.mass)
analyte.add_attribute(STRIPPED_PEPTIDE_SEQ, row['peptideSeq'])

def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None):
"""
Expand All @@ -156,13 +171,13 @@ def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None):
spectrum.precursor_mz = info['precursorMZ']
spectrum.add_attribute(CHARGE_STATE, info['precursorCharge'])
try:
spectrum.add_attribute("MS:1000894|retention time", info['retentionTime'])
spectrum.add_attribute(RETENTION_TIME, info['retentionTime'])
except KeyError:
pass

try:
spectrum.add_attribute("MS:1003069|number of replicate spectra available", info['copies'])
spectrum.add_attribute("MS:1003070|number of replicate spectra used", 1)
spectrum.add_attribute(NUMBER_OF_REPLICATE_SPECTRA_AVAILABLE, info['copies'])
spectrum.add_attribute(NUMBER_OF_REPLICATE_SPECTRA_USED, 1)
except KeyError:
pass

Expand All @@ -171,14 +186,14 @@ def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None):

try:
spectrum.add_attribute(
"MS:1003203|constituent spectrum file",
SOURCE_FILE,
self.connection.execute("SELECT fileName FROM SpectrumSourceFiles WHERE id = ?",
(info['fileID'], )).fetchone()['fileName']
)
except KeyError:
pass
spectrum.add_attribute(
"MS:1003057|scan number",
SCAN_NUMBER,
info["SpecIDinFile"]
)

Expand Down
32 changes: 18 additions & 14 deletions mzspeclib/backends/diann.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,26 @@
from pyteomics import proforma

from mzspeclib import annotation
from mzspeclib.backends.base import DEFAULT_VERSION, FORMAT_VERSION_TERM, LIBRARY_NAME_TERM, _CSVSpectralLibraryBackendBase
from mzspeclib.backends.base import DEFAULT_VERSION, FORMAT_VERSION, LIBRARY_NAME, _CSVSpectralLibraryBackendBase
from mzspeclib.backends.utils import open_stream, urlify
from mzspeclib.spectrum import Spectrum, SPECTRUM_NAME
from mzspeclib.const import (
PROFORMA_SEQ as PROFORMA_PEPTIDE_TERM,
PROFORMA_ION,
STRIPPED_PEPTIDE_SEQ as STRIPPED_PEPTIDE_TERM,
SELECTED_ION_MZ as SPECTRUM_SELECTED_ION_MZ,
CHARGE_STATE,
SOURCE_FILE,
CUSTOM_ATTRIBUTE_NAME,
CUSTOM_ATTRIBUTE_VALUE
)


def _rewrite_unimod_peptide_as_proforma(sequence: str) -> str:
return sequence.replace("(", '[').replace(')', ']').replace("UniMod", "UNIMOD")


CHARGE_STATE = "MS:1000041|charge state"
SELECTED_ION_MZ = "MS:1003053|theoretical monoisotopic m/z"
SOURCE_FILE = "MS:1003203|constituent spectrum file"
STRIPPED_PEPTIDE_TERM = "MS:1000888|stripped peptide sequence"
PROFORMA_PEPTIDE_TERM = "MS:1003169|proforma peptidoform sequence"

CUSTOM_ATTRIBUTE_NAME = "MS:1003275|other attribute name"
CUSTOM_ATTRIBUTE_VALUE = "MS:1003276|other attribute value"
THEO_SELECTED_ION_MZ = "MS:1003053|theoretical monoisotopic m/z"

NO_LOSS = 'noloss'

Expand Down Expand Up @@ -79,12 +82,12 @@ def _spectrum_aggregation_type(self):

def read_header(self) -> bool:
result = super().read_header()
self.add_attribute(FORMAT_VERSION_TERM, DEFAULT_VERSION)
self.add_attribute(FORMAT_VERSION, DEFAULT_VERSION)
if hasattr(self.filename, 'name'):
name = self.filename.name.replace(".gz", '').rsplit('.', 1)[0].split(os.sep)[-1]
else:
name = self.filename.replace(".gz", '').rsplit(".", 1)[0].split(os.sep)[-1]
self.add_attribute(LIBRARY_NAME_TERM, name)
self.add_attribute(LIBRARY_NAME, name)
self.add_attribute("MS:1003207|library creation software", "MS:1003253|DIA-NN")
return result

Expand Down Expand Up @@ -141,7 +144,7 @@ def _parse_from_buffer(self, buffer: List[Dict[str, Any]], spectrum_index: Optio
descr = buffer[0]

spec.add_attribute(SPECTRUM_NAME, descr['transition_group_id'])
spec.add_attribute(SELECTED_ION_MZ, float(descr['PrecursorMz']))
spec.add_attribute(SPECTRUM_SELECTED_ION_MZ, float(descr['PrecursorMz']))

if 'FileName' in descr:
spec.add_attribute(SOURCE_FILE, urlify(descr['FileName']))
Expand All @@ -163,9 +166,10 @@ def _parse_from_buffer(self, buffer: List[Dict[str, Any]], spectrum_index: Optio

if 'PeptideSequence' in descr:
analyte.add_attribute(STRIPPED_PEPTIDE_TERM, descr['PeptideSequence'])
analyte.add_attribute(PROFORMA_PEPTIDE_TERM, pf_seq)
peptide.charge_state = descr['PrecursorCharge']
analyte.add_attribute(PROFORMA_ION, str(peptide))
analyte.add_attribute("MS:1001117|theoretical mass", peptide.mass)
analyte.add_attribute(CHARGE_STATE, int(descr['PrecursorCharge']))
spec.add_attribute(CHARGE_STATE, int(descr['PrecursorCharge']))

protein_group_id = analyte.get_next_group_identifier()
if "UniprotID" in descr:
Expand Down
27 changes: 13 additions & 14 deletions mzspeclib/backends/encyclopedia.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,11 @@
from mzspeclib.spectrum import Spectrum, SPECTRUM_NAME, CHARGE_STATE
from mzspeclib.attributes import AttributeManager, Attributed, Attribute

from mzspeclib.backends.base import SpectralLibraryBackendBase, FORMAT_VERSION_TERM, DEFAULT_VERSION
from mzspeclib.backends.base import SpectralLibraryBackendBase, FORMAT_VERSION, DEFAULT_VERSION

from mzspeclib.index.base import IndexBase


DECOY_SPECTRUM = "MS:1003192|decoy spectrum"
DECOY_PEPTIDE_SPECTRUM = "MS:1003195|unnatural peptidoform decoy spectrum"
from mzspeclib import const as c
from mzspeclib.const import CHARGE_STATE, DECOY_SPECTRUM, DECOY_PEPTIDE_SPECTRUM, PROFORMA_ION, STRIPPED_PEPTIDE_SEQ, LIBRARY_CREATION_SW


def _decode_peaks(record: sqlite3.Row):
Expand Down Expand Up @@ -96,8 +94,8 @@ def __init__(self, filename: str, **kwargs):

def read_header(self) -> bool:
attribs = AttributeManager()
attribs.add_attribute(FORMAT_VERSION_TERM, DEFAULT_VERSION)
attribs.add_attribute("MS:1003207|library creation software", "EncyclopeDIA")
attribs.add_attribute(FORMAT_VERSION, DEFAULT_VERSION)
attribs.add_attribute(LIBRARY_CREATION_SW, "EncyclopeDIA")
self.attributes = attribs
return True

Expand All @@ -109,10 +107,10 @@ def _populate_analyte(self, analyte: Analyte, row: Mapping[str, Any]):
EncyclopeDIA only stores modifications as delta masses.
"""
peptide = proforma.ProForma.parse(row['PeptideModSeq'])
analyte.add_attribute("MS:1003169|proforma peptidoform sequence", str(peptide))
analyte.add_attribute("MS:1001117|theoretical mass", peptide.mass)
analyte.add_attribute("MS:1000888|stripped peptide sequence", row['PeptideSeq'])
analyte.add_attribute(CHARGE_STATE, row['PrecursorCharge'])
peptide.charge_state = row['PrecursorCharge']
analyte.add_attribute(PROFORMA_ION, str(peptide))
analyte.add_attribute(c.THEORETICAL_MASS, peptide.mass)
analyte.add_attribute(STRIPPED_PEPTIDE_SEQ, row['PeptideSeq'])

cursor = self.connection.execute(
"SELECT ProteinAccession, isDecoy FROM peptidetoprotein WHERE PeptideSeq = ?;", (row['PeptideSeq'], ))
Expand Down Expand Up @@ -146,14 +144,15 @@ def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None):
spectrum.key = info['rowid']
spectrum.index = info['rowid'] - 1
spectrum.precursor_mz = info['PrecursorMz']
spectrum.add_attribute(CHARGE_STATE, row["PrecursorCharge"])
try:
spectrum.add_attribute("MS:1000894|retention time", info['RTInSeconds'] / 60.0)
spectrum.add_attribute(c.RETENTION_TIME, info['RTInSeconds'] / 60.0)
except KeyError:
pass

try:
spectrum.add_attribute(
"MS:1003203|constituent spectrum file",
c.SOURCE_FILE,
f"file://{info['SourceFile']}"
)
except KeyError:
Expand All @@ -173,7 +172,7 @@ def get_spectrum(self, spectrum_number: int = None, spectrum_name: str = None):

mz_array, intensity_array = _decode_peaks(info)
n_peaks = len(mz_array)
spectrum.add_attribute("MS:1003059|number of peaks", n_peaks)
spectrum.add_attribute(c.NUM_PEAKS, n_peaks)

peak_list = []
# EncyclopeDIA does not encode product ion identities
Expand Down
12 changes: 6 additions & 6 deletions mzspeclib/backends/json.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@
from mzspeclib.analyte import Analyte, Interpretation
from mzspeclib.spectrum import Spectrum
from mzspeclib.utils import ValidationWarning
from mzspeclib.const import ATTRIBUTE_SET_NAME

from .base import (
DEFAULT_VERSION,
SpectralLibraryBackendBase,
SpectralLibraryWriterBase,
FORMAT_VERSION_TERM,
FORMAT_VERSION,
AttributeSetTypes,
)
from .utils import open_stream
Expand Down Expand Up @@ -50,8 +51,7 @@
INTERPRETATION_CLASSES = "interpretation_attribute_sets"
CLUSTER_CLASSES = "cluster_attribute_sets"

FORMAT_VERSION_ACC = FORMAT_VERSION_TERM.split("|")[0]
ATTRIBUTE_SET_NAME = "MS:1003212|library attribute set name"
FORMAT_VERSION_ACC = FORMAT_VERSION.split("|")[0]


class JSONSpectralLibrary(SpectralLibraryBackendBase):
Expand Down Expand Up @@ -108,12 +108,12 @@ def _load_attribute_sets(self, attribute_sets: dict):
def read_header(self) -> bool:
if self.buffer:
self._fill_attributes(self.buffer.get(LIBRARY_METADATA_KEY), self.attributes)
if not self.attributes.has_attribute(FORMAT_VERSION_TERM):
if not self.attributes.has_attribute(FORMAT_VERSION):
warnings.warn(
f"Library does not have a {FORMAT_VERSION_TERM}, assuming current version",
f"Library does not have a {FORMAT_VERSION}, assuming current version",
category=ValidationWarning,
)
attributes = [Attribute(FORMAT_VERSION_TERM, DEFAULT_VERSION)] + list(self.attributes)
attributes = [Attribute(FORMAT_VERSION, DEFAULT_VERSION)] + list(self.attributes)
self.attributes.clear()
self.attributes._attributes_from_iterable(attributes)
self.analyte_attribute_sets.update(self._load_attribute_sets(self.buffer.get(ANALYTE_CLASSES, {})))
Expand Down
Loading

0 comments on commit 0c408c3

Please sign in to comment.