diff --git a/src/processing_steps/0200_generate_byte_hdf5.py b/src/processing_steps/0200_generate_byte_hdf5.py index af04c32..beb3627 100755 --- a/src/processing_steps/0200_generate_byte_hdf5.py +++ b/src/processing_steps/0200_generate_byte_hdf5.py @@ -1,19 +1,21 @@ #! /usr/bin/python3 ''' Byte-per-voxel HDF5 files for complete multi-scan tomograms -Format -/subvolume_dimensions: int(n,3). For each of the n component scans, the sub-volume dimensions (nz,ny,nx) -/subvolume_range: float(n,2). For each of the n component scane, the value range (vmin,vmax) -/subvolume_metadata: group Attributes are info from ESRF XML-file describing original data -/voxels: uint8(Nz,Ny,Nx). Nz = sum(scan_dimensions[:,0]), ny = minimum(subvolume_dimensions[:,1]), nx = minimum(subvolume_dimensions[:,2]) + +Format: +/subvolume_dimensions: int(n,3). For each of the n component scans, the sub-volume dimensions (nz,ny,nx) +/subvolume_range: float(n,2). For each of the n component scane, the value range (vmin,vmax) +/subvolume_metadata: group Attributes are info from ESRF XML-file describing original data +/voxels: uint8(Nz,Ny,Nx). Nz = sum(scan_dimensions[:,0]), ny = minimum(subvolume_dimensions[:,1]), nx = minimum(subvolume_dimensions[:,2]) ''' import sys sys.path.append(sys.path[0]+"/../") from config.paths import hdf5_root as hdf5_root, esrf_implants_root import h5py +from lib.py.commandline_args import default_parser from lib.py.esrf_read import * -from lib.py.helpers import commandline_args, generate_cylinder_mask, normalize +from lib.py.helpers import generate_cylinder_mask, normalize import numpy as np import os.path import pathlib @@ -22,18 +24,17 @@ NA = np.newaxis if __name__ == "__main__": - sample, chunk_length, use_bohrium, xml_root, verbose = commandline_args({ - "sample" : "", - "chunk_length" : 256, - "use_bohrium" : True, - "xml_root" : esrf_implants_root, - "verbose" : 1 - }) - - if verbose >= 1: print(f"data_root={xml_root}") - - subvolume_xmls = readfile(f"{xml_root}/index/{sample}.txt") - subvolume_metadata = [esrf_read_xml(f"{xml_root}/{xml.strip()}") for xml in subvolume_xmls] + argparser = default_parser(description=__doc__) + argparser.add_argument('--use_bohrium', action='store_true', + help='Toggles whether to use Bohrium for processing.') + argparser.add_argument('--xml_root', action='store', type=str, default=esrf_implants_root, + help='The root directory of the ESRF 2013 dataset.') + args = argparser.parse_args() + + if args.verbose >= 1: print(f"data_root={args.xml_root}") + + subvolume_xmls = readfile(f"{args.xml_root}/index/{args.sample}.txt") + subvolume_metadata = [esrf_read_xml(f"{args.xml_root}/{xml.strip()}") for xml in subvolume_xmls] subvolume_dimensions = np.array([(int(m['sizez']), int(m['sizey']), int(m['sizex'])) for m in subvolume_metadata]) subvolume_range = np.array([(float(m['valmin']), float(m['valmax'])) for m in subvolume_metadata]) @@ -44,15 +45,15 @@ # 2) top or bottom can have important info (depending on orientation of scan) (Nz,Ny,Nx) = (np.sum(subvolume_dimensions[:,0]), np.min(subvolume_dimensions[:,1]&~31), np.min(subvolume_dimensions[:,2]&~31)) - if verbose >= 1: + if args.verbose >= 1: for i in range(len(subvolume_metadata)): - print(f"{i} {sample}/{subvolume_metadata[i]['experiment']}: {subvolume_range[i]}") + print(f"{i} {args.sample}/{subvolume_metadata[i]['experiment']}: {subvolume_range[i]}") print((global_vmin, global_vmax), (Nz,Ny,Nx)) print(subvolume_dimensions) print(subvolume_range) - msb_filename = f"{hdf5_root}/hdf5-byte/msb/{sample}.h5" - lsb_filename = f"{hdf5_root}/hdf5-byte/lsb/{sample}.h5" + msb_filename = f"{hdf5_root}/hdf5-byte/msb/{args.sample}.h5" + lsb_filename = f"{hdf5_root}/hdf5-byte/lsb/{args.sample}.h5" # Make sure directory exists outdir = os.path.dirname(msb_filename) @@ -60,7 +61,7 @@ outdir = os.path.dirname(lsb_filename) pathlib.Path(outdir).mkdir(parents=True, exist_ok=True) - if verbose >= 1: print(f"Writing {msb_filename} and {lsb_filename}") + if args.verbose >= 1: print(f"Writing {msb_filename} and {lsb_filename}") h5file_msb = h5py.File(msb_filename,"w") h5file_lsb = h5py.File(lsb_filename,"w") @@ -93,36 +94,36 @@ (nz, ny, nx) = subvolume_dimensions[i] (sy, sx) = ((ny-Ny)//2 + ((ny-Ny)%2), (nx-Nx)//2 + ((nx-Nx)%2)) (ey, ex) = (ny-(ny-Ny)//2, nx-(nx-Nx)//2) - if verbose >= 1: print((sy,ey), (sx,ex)) + if args.verbose >= 1: print((sy,ey), (sx,ex)) - chunk = np.zeros((chunk_length, Ny, Nx), dtype=np.uint16) - for z in range(0, nz, chunk_length): - chunk_end = min(z+chunk_length, nz) + chunk = np.zeros((args.chunk_length, Ny, Nx), dtype=np.uint16) + for z in range(0, nz, args.chunk_length): + chunk_end = min(z+args.chunk_length, nz) region = [[sx, sy, z], [ex, ey, chunk_end]] - if verbose >= 1: print(f"Reading chunk {z+z_offset}:{chunk_end+z_offset} ({i}-{z}), region={region}") + if args.verbose >= 1: print(f"Reading chunk {z+z_offset}:{chunk_end+z_offset} ({i}-{z}), region={region}") slab_data = esrf_edfrange_to_bh(subvolume_info,region) - if verbose >= 1: print(f"Chunk shape: {slab_data.shape}") - if verbose >= 1: print("Max value before masking:", slab_data.max()) + if args.verbose >= 1: print(f"Chunk shape: {slab_data.shape}") + if args.verbose >= 1: print("Max value before masking:", slab_data.max()) slab_data *= mask[NA,:,:] - if verbose >= 1: print("Max value after masking:", slab_data.max()) + if args.verbose >= 1: print("Max value after masking:", slab_data.max()) chunk[:chunk_end-z] = normalize(slab_data, (global_vmin,global_vmax)) - if verbose >= 1: print("Max value after normalizing:", chunk.max()) + if args.verbose >= 1: print("Max value after normalizing:", chunk.max()) chunk_msb = ((chunk[:chunk_end-z] >> 8) & 0xff).astype(np.uint8) chunk_lsb = ( chunk[:chunk_end-z] & 0xff).astype(np.uint8) - if verbose >= 1: - print(f"Writing {sample} MSB slice {z+z_offset}:{chunk_end+z_offset} ({i}-{z})") + if args.verbose >= 1: + print(f"Writing {args.sample} MSB slice {z+z_offset}:{chunk_end+z_offset} ({i}-{z})") print("chunk_msb.max: ", chunk_msb.max()) print("chunk_msb.copy2numpy().max: ", chunk_msb.max()) h5tomo_msb[z_offset+z:z_offset+chunk_end] = chunk_msb[:] - if verbose >= 1: - print(f"Writing {sample} LSB slice {z+z_offset}:{chunk_end+z_offset} ({i}-{z})") + if args.verbose >= 1: + print(f"Writing {args.sample} LSB slice {z+z_offset}:{chunk_end+z_offset} ({i}-{z})") print("chunk_lsb.max: ", chunk_lsb.max()) print("chunk_lsb.copy2numpy().max: ", chunk_lsb.max()) h5tomo_lsb[z_offset+z:z_offset+chunk_end] = chunk_lsb[:]