Skip to content

Commit

Permalink
Fix grouping error in cyclic deconvoluter
Browse files Browse the repository at this point in the history
  • Loading branch information
mobiusklein committed Jun 1, 2023
1 parent d67a55a commit f9dab31
Show file tree
Hide file tree
Showing 5 changed files with 50 additions and 25 deletions.
16 changes: 8 additions & 8 deletions examples/waters_cyclic_deconvolute.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def __init__(self, ms_file,

if storage_type is None:
storage_type = IonMobilityAware3DThreadedMzMLScanStorageHandler

self.stream_cls = None
self.ms_file = ms_file
self.storage_path = storage_path
self.sample_name = sample_name
Expand Down Expand Up @@ -398,8 +398,7 @@ def cli():
def feature_deconvolution(input_path, output_path, lockmass_config=None, start_time=0, end_time=None, averagine='glycopeptide',
minimum_intensity=10.0, lock_mass_function=3, processes: int = 4,
isolation_window_width=0.0, denoise=1.0, signal_averaging=2):
"""Extract features from each IM-MS cycle followed by deisotoping and charge state deconvolution.
"""
"""Extract features from each IM-MS cycle followed by deisotoping and charge state deconvolution."""
sample_name = os.path.basename(input_path).rsplit(".", 1)[0]
logging.basicConfig(
level="INFO", format='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S %p',
Expand Down Expand Up @@ -429,10 +428,9 @@ def feature_deconvolution(input_path, output_path, lockmass_config=None, start_t

task = MSESampleConsumer(
input_path, storage_path=output_path,
ms1_peak_picking_args={"error_tolerance": 4e-5,
"minimum_intensity": minimum_intensity / 2, "denoise": denoise},
ms1_peak_picking_args={"minimum_intensity": minimum_intensity / 2, "denoise": denoise},
msn_peak_picking_args={
"average_within": signal_averaging, "error_tolerance": 4e-5, "denoise": denoise},
"average_within": signal_averaging, "denoise": denoise},
ms1_deconvolution_args={
"averagine": averagine,
"truncate_after": 0.95,
Expand Down Expand Up @@ -469,7 +467,8 @@ def feature_deconvolution(input_path, output_path, lockmass_config=None, start_t
@click.option("-w", "--weight-scaling", type=float, default=1.0,
help="A weight scaling factor. >1 to incrase the overall abundance of deconvolved peaks")
def precursor_product_deconvolution(input_path, output_path, edges_per_feature: int=1000, weight_scaling: float=1.0):
"""Takes a deconvolved LC-IM-MSe run and generate pseudospectra for
"""
Takes a deconvolved LC-IM-MSe run and generate pseudospectra for
precursor ions using correlated product ion features enclosed in the IM
and RT dimensions.
"""
Expand Down Expand Up @@ -534,7 +533,8 @@ def precursor_product_deconvolution(input_path, output_path, edges_per_feature:
@click.argument("input_path", type=click.Path())
@click.argument("output_path", type=click.Path(writable=True))
def naive_ion_mobility_overlap_pseudospectra(input_path, output_path):
"""Generate pseudo-spectra from deconvolved cycles where the precursor ion spans
"""
Generate pseudo-spectra from deconvolved cycles where the precursor ion spans
some or all of the ion dimension of the product ion's mobility dimension and has
a larger neutral mass. Makes no use of retention time.
"""
Expand Down
12 changes: 9 additions & 3 deletions src/ms_deisotope/data_source/_vendor/masslynx/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import sys

from collections import defaultdict, OrderedDict
from typing import Iterator
from typing import Any, Iterator

import numpy as np
from ms_deisotope.data_source.metadata.data_transformation import DataProcessingInformation, ProcessingMethod
Expand Down Expand Up @@ -40,7 +40,8 @@


def is_waters_raw_dir(path):
"""Detect whether or not the file referenced by ``path``
"""
Detect whether or not the file referenced by ``path``
is a Waters RAW directory.
Parameters
Expand All @@ -66,7 +67,8 @@ def is_waters_raw_dir(path):


def infer_reader(path):
"""If the file referenced by ``path`` is a Waters RAW
"""
If the file referenced by ``path`` is a Waters RAW
directory, return the callable (:class:`MassLynxRawLoader`) to
open it, otherwise raise an exception.
Expand Down Expand Up @@ -431,6 +433,7 @@ def __init__(self, raw_path, lockmass_config=None, default_isolation_width=0.0,
self.source_file)
self.lockmass_processor = MassLynxLockMassProcessor.MassLynxLockMassProcessor()
self.lockmass_processor.SetRawData(self.scan_reader)
self.lockmass_config = lockmass_config
self.configure_lockmass(lockmass_config)
self.index = []
self.cycle_index = []
Expand Down Expand Up @@ -467,6 +470,9 @@ def configure_lockmass(self, lockmass_config=None):
if self.lockmass_processor.CanLockMassCorrect():
self.lockmass_processor.LockMassCorrect()

def __reduce__(self):
return self.__class__, (self.source_file, self.lockmass_config, self.default_isolation_width)

def __repr__(self):
return "MassLynxRawLoader(%r)" % (self.source_file)

Expand Down
30 changes: 20 additions & 10 deletions src/ms_deisotope/data_source/scan/mobility_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,8 @@ def __call__(self, value):


class RawDataArrays3D(namedtuple("RawDataArrays3D", ['mz', 'intensity', 'ion_mobility'])):
"""Represent the m/z, intensity, and ion mobility arrays associated with a raw
"""
Represent the m/z, intensity, and ion mobility arrays associated with a raw
ion mobility frame of mass spectra.
Thin wrapper around a ``namedtuple``, so this object supports
Expand Down Expand Up @@ -233,7 +234,8 @@ def drift_time(self):
return self.ion_mobility

def has_array(self, array_type):
"""Check if this array set contains an array of the
"""
Check if this array set contains an array of the
requested type.
This method uses the semantic lookup mechanism to test
Expand Down Expand Up @@ -286,7 +288,8 @@ def __copy__(self):
return inst

def copy(self):
"""Make a deep copy of this object.
"""
Make a deep copy of this object.
Returns
-------
Expand Down Expand Up @@ -322,7 +325,8 @@ def __getitem__(self, i):

@classmethod
def stack(cls, scans, ion_mobility_array_type=None):
"""Combine multiple :class:`~.Scan` objects or (ion mobility, :class:`~.RawDataArrays`)
"""
Combine multiple :class:`~.Scan` objects or (ion mobility, :class:`~.RawDataArrays`)
pairs into a single :class:`~.RawDataArrays3D`
Parameters
Expand Down Expand Up @@ -367,7 +371,8 @@ def stack(cls, scans, ion_mobility_array_type=None):

@classmethod
def from_arrays(cls, mz_array, intensity_array, ion_mobility_array, ion_mobility_array_type=None, data_arrays=None):
"""Build a new :class:`~.RawDataArrays3D` from parallel arrays.
"""
Build a new :class:`~.RawDataArrays3D` from parallel arrays.
This will sort all arrays w.r.t. m/z and ion mobility.
Expand Down Expand Up @@ -414,7 +419,8 @@ def _slice(self, i, include_ion_mobility=True):
self.ion_mobility_array_type, data_arrays)

def unstack(self, include_empty=True):
"""Convert this 3D array into a list of (ion mobility, :class:`~.RawDataArrays`) pairs
"""
Convert this 3D array into a list of (ion mobility, :class:`~.RawDataArrays`) pairs
Parameters
----------
Expand Down Expand Up @@ -448,7 +454,8 @@ def unstack(self, include_empty=True):
return acc

def grid(self):
"""Convert this 3D array into an intensity grid with
"""
Convert this 3D array into an intensity grid with
m/z along the rows and ion mobility along the columns.
Returns
Expand All @@ -469,7 +476,8 @@ def grid(self):
return mz_axis, im_axis, intensity

def find_mz(self, mz):
"""Find the nearest index to the query ``mz``
"""
Find the nearest index to the query ``mz``
Parameters
----------
Expand Down Expand Up @@ -524,7 +532,8 @@ def find_mz(self, mz):
return 0

def between_mz(self, low, high):
"""Return a slice of the arrays between ``low`` and ``high`` m/z.
"""
Return a slice of the arrays between ``low`` and ``high`` m/z.
Parameters
----------
Expand Down Expand Up @@ -622,7 +631,8 @@ def to_deconvoluted_peak_set(self, time_bound=None, mass_upper_bound: float = fl


class IonMobilityFrame(FrameBase):
"""A :class:`IonMobilityFrame` represents an single time point acquisition of
"""
A :class:`IonMobilityFrame` represents an single time point acquisition of
multiple mass spectra across multiple ion mobility drift time points. Because
of its drift time by m/z structure, it does not have 1-D peak sets, but pseudo-
:class:`~.LCMSFeatureMap`-like data structures which conserve the over-time
Expand Down
3 changes: 2 additions & 1 deletion src/ms_deisotope/feature_map/mobility_frame_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ def __init__(self, data_source, ms1_peak_picking_args=None,

@property
def reader(self) -> IonMobilitySourceRandomAccessFrameSource:
"""The :class:`~.IonMobilitySourceRandomAccessFrameSource` which generates the raw scans that will
"""
The :class:`~.IonMobilitySourceRandomAccessFrameSource` which generates the raw scans that will
be processed.
Returns
Expand Down
14 changes: 11 additions & 3 deletions src/ms_deisotope/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ def _loader_creator(specification, **kwargs):
raise ValueError("Cannot determine how to get a ScanIterator from %r" % (specification,))


# NOTE: This simplification step leads to potentially large deviations from the "unthresholded"
# deconvolution solution. This procedure is too aggressive for real world data.
def _simplify_peak_set(peaks, bin_width=5.0):
bin_edges = np.arange(0, peaks[-1].mz + bin_width, bin_width)
bins = []
Expand Down Expand Up @@ -428,7 +430,8 @@ def _get_envelopes(self, precursor_scan: Scan) -> Optional[List[Tuple[float, flo
return chosen_envelopes

def _pick_precursor_scan_peaks(self, precursor_scan: Scan, chosen_envelopes: Optional[List[Tuple[float, float]]]=None) -> Union[PeakIndex, PeakSet]:
"""Pick peaks from the given precursor scan
"""
Pick peaks from the given precursor scan
Parameters
----------
Expand Down Expand Up @@ -457,7 +460,8 @@ def _pick_precursor_scan_peaks(self, precursor_scan: Scan, chosen_envelopes: Opt
return prec_peaks

def _average_ms1(self, precursor_scan: Scan) -> Union[PeakIndex, PeakSet]:
"""Average signal from :attr:`self.ms1_averaging` scans from
"""
Average signal from :attr:`self.ms1_averaging` scans from
before and after ``precursor_scan`` and pick peaks from the
averaged arrays.
Expand All @@ -478,7 +482,8 @@ def _average_ms1(self, precursor_scan: Scan) -> Union[PeakIndex, PeakSet]:
return prec_peaks

def pick_precursor_scan_peaks(self, precursor_scan: Scan) -> Union[PeakIndex, PeakSet]:
"""Picks peaks for the given ``precursor_scan`` using the
"""
Picks peaks for the given ``precursor_scan`` using the
appropriate strategy.
If :attr:`ms1_averaging` > 0, then the signal averaging strategy
Expand All @@ -487,6 +492,7 @@ def pick_precursor_scan_peaks(self, precursor_scan: Scan) -> Union[PeakIndex, Pe
Parameters
----------
precursor_scan: Scan
The scan to pick peaks from
Returns
-------
Expand All @@ -498,6 +504,8 @@ def pick_precursor_scan_peaks(self, precursor_scan: Scan) -> Union[PeakIndex, Pe
else:
prec_peaks = self._pick_precursor_scan_peaks(precursor_scan)
n_peaks = len(prec_peaks)
# NOTE: This simplification step leads to potentially large deviations from the "unthresholded"
# deconvolution solution. This procedure is too aggressive for real world data.
if n_peaks > self.too_many_peaks_threshold:
self.log("%d peaks found for %r, applying local intensity threshold." % (n_peaks, precursor_scan))
prec_peaks = _simplify_peak_set(prec_peaks)
Expand Down

0 comments on commit f9dab31

Please sign in to comment.