Skip to content

Commit

Permalink
Merge pull request #204 from MannLabs/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
swillems authored Jun 9, 2022
2 parents f6e55e8 + dab477d commit 1c06ff1
Show file tree
Hide file tree
Showing 28 changed files with 920 additions and 503 deletions.
21 changes: 16 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ AlphaTims is an open-source Python package that provides fast accession and visu
* [**Citing AlphaTims**](#citing-alphatims)
* [**How to contribute**](#how-to-contribute)
* [**Changelog**](#changelog)
* [**1.0.0**](#100)
* [**0.3.2**](#032)
* [**0.3.1**](#031)
* [**0.3.0**](#030)
* [**0.2.8**](#028)
Expand Down Expand Up @@ -95,12 +97,18 @@ pip install "alphatims[stable]"

NOTE: You might need to run `pip install pip==21.0` before installing AlphaTims like this. Also note the double quotes `"`.

Alternatively, some basic plotting functions and the complete GUI can be installed with the following command:
Alternatively, some basic plotting functions can be installed with the following command:

```bash
pip install "alphatims[plotting]"
```

While the above command does allow usage of the full GUI, there are some known compatability issues with newer versions of bokeh. As such, it is generally advised to not use loose plotting dependancies and force a stable installation with:

```bash
pip install "alphatims[plotting-stable]"
```

When older samples need to be analyzed, it might be essential to install the `legacy` version as well (See also the [troubleshooting](#troubleshooting) section):

```bash
Expand Down Expand Up @@ -385,15 +393,18 @@ For more information see [the Contributors License Agreement](misc/CLA.md).

The following changes were introduced in the following versions of AlphaTims. Download the latest version in the [installation section](#installation).

### 1.0.0

* FEAT: tempmmap for large arrays by default.

### 0.3.2

* FEAT: cli/gui allow bruker data as argument.
* FEAT/FIX: Polarity included in frame table.
* FIX: utils cleanup.
* FEAT: cli/gui allow bruker data as argument
* FIX: utils issues
* FEAT: by default use -1 threads in utils
* FIX: disable cla check
* FIX: utils issues.
* FEAT: by default use -1 threads in utils.
* FIX: disable cla check.

### 0.3.1

Expand Down
2 changes: 1 addition & 1 deletion alphatims/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


__project__ = "alphatims"
__version__ = "0.3.2"
__version__ = "1.0.0"
__license__ = "Apache"
__description__ = "A Python package to index Bruker TimsTOF raw data for fast and easy accession and visualization"
__author__ = "Sander Willems, Eugenia Voytik"
Expand Down
66 changes: 39 additions & 27 deletions alphatims/bruker.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# local
import alphatims
import alphatims.utils
import alphatims.tempmmap as tm

if sys.platform[:5] == "win32":
BRUKER_DLL_FILE_NAME = os.path.join(
Expand Down Expand Up @@ -414,7 +415,7 @@ def process_frame(
Should be treieved from the global metadata.
max_peaks_per_scan : int
The maximum number of peaks per scan.
Should be treieved from the global metadata.
Should be retrieved from the global metadata.
"""
with open(tdf_bin_file_name, "rb") as infile:
frame_start = frame_indptr[frame_id]
Expand Down Expand Up @@ -486,6 +487,7 @@ def read_bruker_binary(
bruker_d_folder_name: str,
compression_type: int,
max_peaks_per_scan: int,
mmap_detector_events: bool = None,
) -> tuple:
"""Read all data from an "analysis.tdf_bin" of a Bruker .d folder.
Expand All @@ -501,6 +503,10 @@ def read_bruker_binary(
max_peaks_per_scan : int
The maximum number of peaks per scan.
Should be treieved from the global metadata.
mmap_detector_events : bool
Do not save the intensity_values and tof_indices in memory,
but use an mmap instead.
Default is True
Returns
-------
Expand All @@ -513,8 +519,12 @@ def read_bruker_binary(
max_scan_count = frames.NumScans.max() + 1
scan_count = max_scan_count * frames.shape[0]
scan_indptr = np.zeros(scan_count + 1, dtype=np.int64)
intensities = np.empty(frame_indptr[-1], dtype=np.uint16)
tof_indices = np.empty(frame_indptr[-1], dtype=np.uint32)
if mmap_detector_events:
intensities = tm.empty(int(frame_indptr[-1]), dtype=np.uint16)
tof_indices = tm.empty(int(frame_indptr[-1]), dtype=np.uint32)
else:
intensities = np.empty(int(frame_indptr[-1]), dtype=np.uint16)
tof_indices = np.empty(int(frame_indptr[-1]), dtype=np.uint32)
tdf_bin_file_name = os.path.join(bruker_d_folder_name, "analysis.tdf_bin")
tims_offset_values = frames.TimsId.values
logging.info(
Expand Down Expand Up @@ -920,8 +930,8 @@ def __init__(
mobility_estimation_from_frame: int = 1,
slice_as_dataframe: bool = True,
use_calibrated_mz_values_as_default: int = 0,
use_hdf_if_available: bool = False,
mmap_detector_events: bool = None,
use_hdf_if_available: bool = True,
mmap_detector_events: bool = True,
drop_polarity: bool = True,
convert_polarity_to_int: bool = True,
):
Expand Down Expand Up @@ -961,14 +971,12 @@ def __init__(
If 2, calibration at the MS2 level is performed.
Default is 0.
use_hdf_if_available : bool
If an HDF file is available, use this instead of the
.d folder.
Default is False.
If an HDF file is available, use this instead of the .d folder.
Default is True.
mmap_detector_events : bool
Do not save the intensity_values and tof_indices in memory,
but use an mmap instead. If no .hdf file is available to use for
mmapping, one will be created automatically.
Default is False for .d folders and True for .hdf files.
but use an mmap instead.
Default is True
drop_polarity : bool
The polarity column of the frames table contains "+" or "-" and
is not numerical.
Expand All @@ -983,9 +991,9 @@ def __init__(
This is ignored if the polarity is dropped.
Default is True.
"""
if bruker_d_folder_name.endswith("/"):
bruker_d_folder_name = bruker_d_folder_name[:-1]
logging.info(f"Importing data from {bruker_d_folder_name}")
if (mmap_detector_events is None) and bruker_d_folder_name.endswith(".hdf"):
mmap_detector_events = True
if bruker_d_folder_name.endswith(".d"):
bruker_hdf_file_name = f"{bruker_d_folder_name[:-2]}.hdf"
hdf_file_exists = os.path.exists(bruker_hdf_file_name)
Expand All @@ -999,18 +1007,13 @@ def __init__(
self.bruker_d_folder_name = os.path.abspath(
bruker_d_folder_name
)
if mmap_detector_events:
raise IOError(
f"Can only use mmapping from .hdf files. "
f"Either use the .hdf file as input directly, "
"or use the use_hdf_if_available option."
)
self._import_data_from_d_folder(
bruker_d_folder_name,
mz_estimation_from_frame,
mobility_estimation_from_frame,
drop_polarity,
convert_polarity_to_int,
mmap_detector_events,
)
elif bruker_d_folder_name.endswith(".hdf"):
self._import_data_from_hdf_file(
Expand All @@ -1023,7 +1026,7 @@ def __init__(
"WARNING: file extension not understood"
)
if not hasattr(self, "version"):
self._version = "none"
self._version = "N.A."
if self.version != alphatims.__version__:
logging.info(
"WARNING: "
Expand All @@ -1037,7 +1040,7 @@ def __init__(
)
# Precompile
self[0, "raw"]
logging.info(f"Succesfully imported data from {bruker_d_folder_name}")
logging.info(f"Successfully imported data from {bruker_d_folder_name}")

def __len__(self):
return len(self.intensity_values)
Expand All @@ -1052,7 +1055,9 @@ def _import_data_from_d_folder(
mobility_estimation_from_frame: int,
drop_polarity: bool = True,
convert_polarity_to_int: bool = True,
mmap_detector_events: bool = True
):
logging.info(f"Using .d import for {bruker_d_folder_name}")
self._version = alphatims.__version__
self._zeroth_frame = True
(
Expand Down Expand Up @@ -1235,7 +1240,7 @@ def save_as_hdf(
full_file_name.seek(0)
else:
logging.info(
f"Succesfully wrote TimsTOF data to {full_file_name}."
f"Successfully wrote TimsTOF data to {full_file_name}."
)
return full_file_name

Expand All @@ -1244,6 +1249,7 @@ def _import_data_from_hdf_file(
bruker_d_folder_name: str,
mmap_detector_events: bool = False,
):
logging.info(f"Using HDF import for {bruker_d_folder_name}")
with h5py.File(bruker_d_folder_name, "r") as hdf_root:
mmap_arrays = []
if mmap_detector_events:
Expand All @@ -1257,7 +1263,7 @@ def _import_data_from_hdf_file(

def convert_from_indices(
self,
raw_indices=None,
raw_indices,
*,
frame_indices=None,
quad_indices=None,
Expand All @@ -1277,7 +1283,7 @@ def convert_from_indices(
return_mz_values: bool = False,
return_intensity_values: bool = False,
return_corrected_intensity_values: bool = False,
raw_indices_sorted: bool = True,
raw_indices_sorted: bool = False,
) -> dict:
"""Convert selected indices to a dict.
Expand Down Expand Up @@ -1339,13 +1345,19 @@ def convert_from_indices(
raw_indices_sorted : bool
If True, raw_indices are assumed to be sorted,
resulting in a faster conversion.
Default is True.
Default is False.
Returns
-------
dict
A dict with all requested columns.
"""
try:
iter(raw_indices)
except TypeError:
raw_indices = [raw_indices]
if not isinstance(raw_indices, np.ndarray):
raw_indices = np.array(raw_indices)
result = {}
if (raw_indices is not None) and any(
[
Expand Down Expand Up @@ -1720,7 +1732,7 @@ def as_dataframe(
mz_values: bool = True,
intensity_values: bool = True,
corrected_intensity_values: bool = True,
raw_indices_sorted: bool = True,
raw_indices_sorted: bool = False,
):
"""Convert raw indices to a pd.DataFrame.
Expand Down Expand Up @@ -1774,7 +1786,7 @@ def as_dataframe(
raw_indices_sorted : bool
If True, raw_indices are assumed to be sorted,
resulting in a faster conversion.
Default is True.
Default is False.
Returns
-------
Expand Down
Loading

0 comments on commit 1c06ff1

Please sign in to comment.