Merge pull request #204 from MannLabs/develop

Develop
MannLabs · Jun 9, 2022 · 1c06ff1 · 1c06ff1
2 parents f6e55e8 + dab477d
commit 1c06ff1
Show file tree

Hide file tree

Showing 28 changed files with 920 additions and 503 deletions.
diff --git a/README.md b/README.md
@@ -40,6 +40,8 @@ AlphaTims is an open-source Python package that provides fast accession and visu
   * [**Citing AlphaTims**](#citing-alphatims)
   * [**How to contribute**](#how-to-contribute)
   * [**Changelog**](#changelog)
+    * [**1.0.0**](#100)
+    * [**0.3.2**](#032)
     * [**0.3.1**](#031)
     * [**0.3.0**](#030)
     * [**0.2.8**](#028)
@@ -95,12 +97,18 @@ pip install "alphatims[stable]"
 
 NOTE: You might need to run `pip install pip==21.0` before installing AlphaTims like this. Also note the double quotes `"`.
 
-Alternatively, some basic plotting functions and the complete GUI can be installed with the following command:
+Alternatively, some basic plotting functions can be installed with the following command:
 
 ```bash
 pip install "alphatims[plotting]"
 ```
 
+While the above command does allow usage of the full GUI, there are some known compatability issues with newer versions of bokeh. As such, it is generally advised to not use loose plotting dependancies and force a stable installation with:
+
+```bash
+pip install "alphatims[plotting-stable]"
+```
+
 When older samples need to be analyzed, it might be essential to install the `legacy` version as well (See also the [troubleshooting](#troubleshooting) section):
 
 ```bash
@@ -385,15 +393,18 @@ For more information see [the Contributors License Agreement](misc/CLA.md).
 
 The following changes were introduced in the following versions of AlphaTims. Download the latest version in the [installation section](#installation).
 
+### 1.0.0
+
+  * FEAT: tempmmap for large arrays by default.
+
 ### 0.3.2
 
   * FEAT: cli/gui allow bruker data as argument.
   * FEAT/FIX: Polarity included in frame table.
   * FIX: utils cleanup.
-  * FEAT: cli/gui allow bruker data as argument
-  * FIX: utils issues
-  * FEAT: by default use -1 threads in utils
-  * FIX: disable cla check
+  * FIX: utils issues.
+  * FEAT: by default use -1 threads in utils.
+  * FIX: disable cla check.
 
 ### 0.3.1
 

diff --git a/alphatims/__init__.py b/alphatims/__init__.py
@@ -2,7 +2,7 @@
 
 
 __project__ = "alphatims"
-__version__ = "0.3.2"
+__version__ = "1.0.0"
 __license__ = "Apache"
 __description__ = "A Python package to index Bruker TimsTOF raw data for fast and easy accession and visualization"
 __author__ = "Sander Willems, Eugenia Voytik"

diff --git a/alphatims/bruker.py b/alphatims/bruker.py
@@ -16,6 +16,7 @@
 # local
 import alphatims
 import alphatims.utils
+import alphatims.tempmmap as tm
 
 if sys.platform[:5] == "win32":
     BRUKER_DLL_FILE_NAME = os.path.join(
@@ -414,7 +415,7 @@ def process_frame(
         Should be treieved from the global metadata.
     max_peaks_per_scan : int
         The maximum number of peaks per scan.
-        Should be treieved from the global metadata.
+        Should be retrieved from the global metadata.
     """
     with open(tdf_bin_file_name, "rb") as infile:
         frame_start = frame_indptr[frame_id]
@@ -486,6 +487,7 @@ def read_bruker_binary(
     bruker_d_folder_name: str,
     compression_type: int,
     max_peaks_per_scan: int,
+    mmap_detector_events: bool = None,
 ) -> tuple:
     """Read all data from an "analysis.tdf_bin" of a Bruker .d folder.
 
@@ -501,6 +503,10 @@ def read_bruker_binary(
     max_peaks_per_scan : int
         The maximum number of peaks per scan.
         Should be treieved from the global metadata.
+    mmap_detector_events : bool
+        Do not save the intensity_values and tof_indices in memory,
+        but use an mmap instead.
+        Default is True
 
     Returns
     -------
@@ -513,8 +519,12 @@ def read_bruker_binary(
     max_scan_count = frames.NumScans.max() + 1
     scan_count = max_scan_count * frames.shape[0]
     scan_indptr = np.zeros(scan_count + 1, dtype=np.int64)
-    intensities = np.empty(frame_indptr[-1], dtype=np.uint16)
-    tof_indices = np.empty(frame_indptr[-1], dtype=np.uint32)
+    if mmap_detector_events:
+        intensities = tm.empty(int(frame_indptr[-1]), dtype=np.uint16)
+        tof_indices = tm.empty(int(frame_indptr[-1]), dtype=np.uint32)
+    else:
+        intensities = np.empty(int(frame_indptr[-1]), dtype=np.uint16)
+        tof_indices = np.empty(int(frame_indptr[-1]), dtype=np.uint32)
     tdf_bin_file_name = os.path.join(bruker_d_folder_name, "analysis.tdf_bin")
     tims_offset_values = frames.TimsId.values
     logging.info(
@@ -920,8 +930,8 @@ def __init__(
         mobility_estimation_from_frame: int = 1,
         slice_as_dataframe: bool = True,
         use_calibrated_mz_values_as_default: int = 0,
-        use_hdf_if_available: bool = False,
-        mmap_detector_events: bool = None,
+        use_hdf_if_available: bool = True,
+        mmap_detector_events: bool = True,
         drop_polarity: bool = True,
         convert_polarity_to_int: bool = True,
     ):
@@ -961,14 +971,12 @@ def __init__(
             If 2, calibration at the MS2 level is performed.
             Default is 0.
         use_hdf_if_available : bool
-            If an HDF file is available, use this instead of the
-            .d folder.
-            Default is False.
+            If an HDF file is available, use this instead of the .d folder.
+            Default is True.
         mmap_detector_events : bool
             Do not save the intensity_values and tof_indices in memory,
-            but use an mmap instead. If no .hdf file is available to use for
-            mmapping, one will be created automatically.
-            Default is False for .d folders and True for .hdf files.
+            but use an mmap instead.
+            Default is True
         drop_polarity : bool
             The polarity column of the frames table contains "+" or "-" and
             is not numerical.
@@ -983,9 +991,9 @@ def __init__(
             This is ignored if the polarity is dropped.
             Default is True.
         """
+        if bruker_d_folder_name.endswith("/"):
+            bruker_d_folder_name = bruker_d_folder_name[:-1]
         logging.info(f"Importing data from {bruker_d_folder_name}")
-        if (mmap_detector_events is None) and bruker_d_folder_name.endswith(".hdf"):
-            mmap_detector_events = True
         if bruker_d_folder_name.endswith(".d"):
             bruker_hdf_file_name = f"{bruker_d_folder_name[:-2]}.hdf"
             hdf_file_exists = os.path.exists(bruker_hdf_file_name)
@@ -999,18 +1007,13 @@ def __init__(
                 self.bruker_d_folder_name = os.path.abspath(
                     bruker_d_folder_name
                 )
-                if mmap_detector_events:
-                    raise IOError(
-                        f"Can only use mmapping from .hdf files. "
-                        f"Either use the .hdf file as input directly, "
-                        "or use the use_hdf_if_available option."
-                    )
                 self._import_data_from_d_folder(
                     bruker_d_folder_name,
                     mz_estimation_from_frame,
                     mobility_estimation_from_frame,
                     drop_polarity,
                     convert_polarity_to_int,
+                    mmap_detector_events,
                 )
         elif bruker_d_folder_name.endswith(".hdf"):
             self._import_data_from_hdf_file(
@@ -1023,7 +1026,7 @@ def __init__(
                 "WARNING: file extension not understood"
             )
         if not hasattr(self, "version"):
-            self._version = "none"
+            self._version = "N.A."
         if self.version != alphatims.__version__:
             logging.info(
                 "WARNING: "
@@ -1037,7 +1040,7 @@ def __init__(
         )
         # Precompile
         self[0, "raw"]
-        logging.info(f"Succesfully imported data from {bruker_d_folder_name}")
+        logging.info(f"Successfully imported data from {bruker_d_folder_name}")
 
     def __len__(self):
         return len(self.intensity_values)
@@ -1052,7 +1055,9 @@ def _import_data_from_d_folder(
         mobility_estimation_from_frame: int,
         drop_polarity: bool = True,
         convert_polarity_to_int: bool = True,
+        mmap_detector_events: bool = True
     ):
+        logging.info(f"Using .d import for {bruker_d_folder_name}")
         self._version = alphatims.__version__
         self._zeroth_frame = True
         (
@@ -1235,7 +1240,7 @@ def save_as_hdf(
             full_file_name.seek(0)
         else:
             logging.info(
-                f"Succesfully wrote TimsTOF data to {full_file_name}."
+                f"Successfully wrote TimsTOF data to {full_file_name}."
             )
         return full_file_name
 
@@ -1244,6 +1249,7 @@ def _import_data_from_hdf_file(
         bruker_d_folder_name: str,
         mmap_detector_events: bool = False,
     ):
+        logging.info(f"Using HDF import for {bruker_d_folder_name}")
         with h5py.File(bruker_d_folder_name, "r") as hdf_root:
             mmap_arrays = []
             if mmap_detector_events:
@@ -1257,7 +1263,7 @@ def _import_data_from_hdf_file(
 
     def convert_from_indices(
         self,
-        raw_indices=None,
+        raw_indices,
         *,
         frame_indices=None,
         quad_indices=None,
@@ -1277,7 +1283,7 @@ def convert_from_indices(
         return_mz_values: bool = False,
         return_intensity_values: bool = False,
         return_corrected_intensity_values: bool = False,
-        raw_indices_sorted: bool = True,
+        raw_indices_sorted: bool = False,
     ) -> dict:
         """Convert selected indices to a dict.
 
@@ -1339,13 +1345,19 @@ def convert_from_indices(
         raw_indices_sorted : bool
             If True, raw_indices are assumed to be sorted,
             resulting in a faster conversion.
-            Default is True.
+            Default is False.
 
         Returns
         -------
         dict
             A dict with all requested columns.
         """
+        try:
+            iter(raw_indices)
+        except TypeError:
+            raw_indices = [raw_indices]
+        if not isinstance(raw_indices, np.ndarray):
+            raw_indices = np.array(raw_indices)
         result = {}
         if (raw_indices is not None) and any(
             [
@@ -1720,7 +1732,7 @@ def as_dataframe(
         mz_values: bool = True,
         intensity_values: bool = True,
         corrected_intensity_values: bool = True,
-        raw_indices_sorted: bool = True,
+        raw_indices_sorted: bool = False,
     ):
         """Convert raw indices to a pd.DataFrame.
 
@@ -1774,7 +1786,7 @@ def as_dataframe(
         raw_indices_sorted : bool
             If True, raw_indices are assumed to be sorted,
             resulting in a faster conversion.
-            Default is True.
+            Default is False.
 
         Returns
         -------