Skip to content

Commit

Permalink
Merge pull request #170 from psavery/archiving-speed-up
Browse files Browse the repository at this point in the history
Archiving performance improvements
  • Loading branch information
joelvbernier authored Feb 3, 2021
2 parents f1549ee + 464bfd7 commit 4befc8e
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 12 deletions.
14 changes: 13 additions & 1 deletion hexrd/imageseries/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,19 @@ def _process_frame(self, key):
def _subtract_dark(self, img, dark):
# need to check for values below zero
# !!! careful, truncation going on here;necessary to promote dtype?
return np.where(img > dark, img - dark, 0)

# This has been performance tested with the following:
# 1. return np.where(img > dark, img - dark, 0)
# 2. return np.clip(img - dark, a_min=0, a_max=None)
# 3. return (img - dark).clip(min=0)
# 4. ret = img - dark
# ret[ret < 0] = 0
# return ret
# Method 1 was the slowest, and method 4 was the fastest, perhaps
# because it creates fewer copies of the data.
ret = img - dark
ret[ret < 0] = 0
return ret

def _rectangle(self, img, r):
# restrict to rectangle
Expand Down
48 changes: 38 additions & 10 deletions hexrd/imageseries/save.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
"""Write imageseries to various formats"""

import abc
from concurrent.futures import ThreadPoolExecutor
import multiprocessing
import os
import threading
import warnings

import numpy as np
Expand Down Expand Up @@ -167,6 +170,7 @@ def __init__(self, ims, fname, **kwargs):
cdir = os.path.dirname(fname)
self._cache = os.path.join(cdir, cf)
self._cachename = cf
self.max_workers = kwargs.get('max_workers', None)

def _process_meta(self, save_omegas=False):
d = {}
Expand Down Expand Up @@ -199,12 +203,28 @@ def _write_yml(self):
def _write_frames(self):
"""also save shape array as originally done (before yaml)"""
buff_size = self._ims.shape[0]*self._ims.shape[1]
rows = np.empty(buff_size, dtype=np.uint16)
cols = np.empty(buff_size, dtype=np.uint16)
vals = np.empty(buff_size, dtype=self._ims.dtype)
arrd = dict()
for i in range(len(self._ims)):
# ???: make it so we can use emumerate on self._ims?
arrd = {}

ncpus = multiprocessing.cpu_count()
max_workers = ncpus if self.max_workers is None else self.max_workers
num_workers = min(max_workers, len(self._ims))

row_buffers = np.empty((num_workers, buff_size), dtype=np.uint16)
col_buffers = np.empty((num_workers, buff_size), dtype=np.uint16)
val_buffers = np.empty((num_workers, buff_size), dtype=self._ims.dtype)
buffer_ids = {}
assign_buffer_lock = threading.Lock()

def assign_buffer_id():
with assign_buffer_lock:
buffer_ids[threading.get_ident()] = len(buffer_ids)

def extract_data(i):
buffer_id = buffer_ids[threading.get_ident()]
rows = row_buffers[buffer_id]
cols = col_buffers[buffer_id]
vals = val_buffers[buffer_id]

# FIXME: in __init__() of ProcessedImageSeries:
# 'ProcessedImageSeries' object has no attribute '_adapter'

Expand All @@ -223,10 +243,18 @@ def _write_frames(self):
msg = "frame %d is %4.2f%% sparse (cutoff is 95%%)" \
% (i, sparseness)
warnings.warn(msg)
arrd['%d_row' % i] = rows[:count].copy()
arrd['%d_col' % i] = cols[:count].copy()
arrd['%d_data' % i] = vals[:count].copy()
pass

arrd[f'{i}_row'] = rows[:count].copy()
arrd[f'{i}_col'] = cols[:count].copy()
arrd[f'{i}_data'] = vals[:count].copy()

kwargs = {
'max_workers': num_workers,
'initializer': assign_buffer_id,
}
with ThreadPoolExecutor(**kwargs) as executor:
executor.map(extract_data, range(len(self._ims)))

arrd['shape'] = self._ims.shape
arrd['nframes'] = len(self._ims)
arrd['dtype'] = str(self._ims.dtype).encode()
Expand Down
2 changes: 1 addition & 1 deletion hexrd/matrixutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -852,7 +852,7 @@ def symmToVecds(A):


if USE_NUMBA:
@numba.njit
@numba.njit(cache=True, nogil=True)
def extract_ijv(in_array, threshold, out_i, out_j, out_v):
n = 0
w, h = in_array.shape
Expand Down

0 comments on commit 4befc8e

Please sign in to comment.