Merge pull request #170 from psavery/archiving-speed-up

Archiving performance improvements
HEXRD · Feb 3, 2021 · 4befc8e · 4befc8e
2 parents f1549ee + 464bfd7
commit 4befc8e
Show file tree

Hide file tree

Showing 3 changed files with 52 additions and 12 deletions.
diff --git a/hexrd/imageseries/process.py b/hexrd/imageseries/process.py
@@ -63,7 +63,19 @@ def _process_frame(self, key):
     def _subtract_dark(self, img, dark):
         # need to check for values below zero
         # !!! careful, truncation going on here;necessary to promote dtype?
-        return np.where(img > dark, img - dark, 0)
+
+        # This has been performance tested with the following:
+        # 1. return np.where(img > dark, img - dark, 0)
+        # 2. return np.clip(img - dark, a_min=0, a_max=None)
+        # 3. return (img - dark).clip(min=0)
+        # 4. ret = img - dark
+        #    ret[ret < 0] = 0
+        #    return ret
+        # Method 1 was the slowest, and method 4 was the fastest, perhaps
+        # because it creates fewer copies of the data.
+        ret = img - dark
+        ret[ret < 0] = 0
+        return ret
 
     def _rectangle(self, img, r):
         # restrict to rectangle

diff --git a/hexrd/imageseries/save.py b/hexrd/imageseries/save.py
@@ -1,7 +1,10 @@
 """Write imageseries to various formats"""
 
 import abc
+from concurrent.futures import ThreadPoolExecutor
+import multiprocessing
 import os
+import threading
 import warnings
 
 import numpy as np
@@ -167,6 +170,7 @@ def __init__(self, ims, fname, **kwargs):
             cdir = os.path.dirname(fname)
             self._cache = os.path.join(cdir, cf)
         self._cachename = cf
+        self.max_workers = kwargs.get('max_workers', None)
 
     def _process_meta(self, save_omegas=False):
         d = {}
@@ -199,12 +203,28 @@ def _write_yml(self):
     def _write_frames(self):
         """also save shape array as originally done (before yaml)"""
         buff_size = self._ims.shape[0]*self._ims.shape[1]
-        rows = np.empty(buff_size, dtype=np.uint16)
-        cols = np.empty(buff_size, dtype=np.uint16)
-        vals = np.empty(buff_size, dtype=self._ims.dtype)
-        arrd = dict()
-        for i in range(len(self._ims)):
-            # ???: make it so we can use emumerate on self._ims?
+        arrd = {}
+
+        ncpus = multiprocessing.cpu_count()
+        max_workers = ncpus if self.max_workers is None else self.max_workers
+        num_workers = min(max_workers, len(self._ims))
+
+        row_buffers = np.empty((num_workers, buff_size), dtype=np.uint16)
+        col_buffers = np.empty((num_workers, buff_size), dtype=np.uint16)
+        val_buffers = np.empty((num_workers, buff_size), dtype=self._ims.dtype)
+        buffer_ids = {}
+        assign_buffer_lock = threading.Lock()
+
+        def assign_buffer_id():
+            with assign_buffer_lock:
+                buffer_ids[threading.get_ident()] = len(buffer_ids)
+
+        def extract_data(i):
+            buffer_id = buffer_ids[threading.get_ident()]
+            rows = row_buffers[buffer_id]
+            cols = col_buffers[buffer_id]
+            vals = val_buffers[buffer_id]
+
             # FIXME: in __init__() of ProcessedImageSeries:
             # 'ProcessedImageSeries' object has no attribute '_adapter'
 
@@ -223,10 +243,18 @@ def _write_frames(self):
                 msg = "frame %d is %4.2f%% sparse (cutoff is 95%%)" \
                     % (i, sparseness)
                 warnings.warn(msg)
-            arrd['%d_row' % i] = rows[:count].copy()
-            arrd['%d_col' % i] = cols[:count].copy()
-            arrd['%d_data' % i] = vals[:count].copy()
-            pass
+
+            arrd[f'{i}_row'] = rows[:count].copy()
+            arrd[f'{i}_col'] = cols[:count].copy()
+            arrd[f'{i}_data'] = vals[:count].copy()
+
+        kwargs = {
+            'max_workers': num_workers,
+            'initializer': assign_buffer_id,
+        }
+        with ThreadPoolExecutor(**kwargs) as executor:
+            executor.map(extract_data, range(len(self._ims)))
+
         arrd['shape'] = self._ims.shape
         arrd['nframes'] = len(self._ims)
         arrd['dtype'] = str(self._ims.dtype).encode()

diff --git a/hexrd/matrixutil.py b/hexrd/matrixutil.py
@@ -852,7 +852,7 @@ def symmToVecds(A):
 
 
 if USE_NUMBA:
-    @numba.njit
+    @numba.njit(cache=True, nogil=True)
     def extract_ijv(in_array, threshold, out_i, out_j, out_v):
         n = 0
         w, h = in_array.shape