Skip to content
This repository has been archived by the owner on Feb 22, 2020. It is now read-only.

Add Training Process for Nodule Detection and Classification - added customized datasets #300

Closed
wants to merge 11 commits into from
41 changes: 41 additions & 0 deletions prediction/src/algorithms/preprocessing/AddSegmentation.asv
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
function AddSegmentation(SegmentDataFolder, FolderDelimiter, BatchSize, ParFor_flag, IgnoreExisting_flag)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the reason for using another language here than Python?

if ParFor_flag
if isempty(gcp('nocreate'))
parpool;
end
else
delete(gcp('nocreate'));
end

fprintf('Lung segmentation...\n');
FileList = dir(SegmentDataFolder); FileList = FileList(3:end);
FileList = FileList(~strcmp({FileList.name}, 'DatasetInfo.mat'));
SampleNum = length(FileList);
I_Mask = zeros(SampleNum, 1); I_BB = zeros(SampleNum, 1);
tStart = tic; msgTxt = '';
warned = false(1, SampleNum);
if ParFor_flag
parfor j = 1:SampleNum
[I_Mask(j), I_BB(j), warned(j)] = LungSegmentation(sprintf('%s%s%s', SegmentDataFolder, FolderDelimiter, FileList(j).name), 'IgnoreExisting_flag', IgnoreExisting_flag);
tElapse = toc(tStart); tRemain = tElapse / eInd * (SampleNum - eInd);
if ~isempty(find(warned(sInd:eInd), 1))
msgPre = '';
else
msgPre = repmat('\b', 1, length(msgTxt) - 1);
end
msgTxt = sprintf('Progress (%d/%d): %.2f%%%%, %dmin %4.1fsec elapsed, %dmin %4.1fsec to go.\n', ...
eInd, SampleNum, eInd / SampleNum * 100, floor(tElapse / 60), mod(tElapse, 60), floor(tRemain / 60), mod(tRemain, 60));
fprintf([msgPre, msgTxt]);

end
else
for j = 1:SampleNum
[I_Mask(j), I_BB(j), warned(j)] = LungSegmentation(sprintf('%s%s%s', SegmentDataFolder, FolderDelimiter, FileList(j).name), 'IgnoreExisting_flag', IgnoreExisting_flag);
end
end


fprintf('Average intensity in mask: %.2f\n', mean(I_Mask));
fprintf('Average intensity in bounding box: %.2f\n', mean(I_BB));
save(sprintf('%s%s%s', SegmentDataFolder, FolderDelimiter, 'DatasetInfo.mat'), 'I_Mask', 'I_BB','-v7');
end
1 change: 1 addition & 0 deletions prediction/src/algorithms/preprocessing/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from full_prep import full_prep, savenpy
169 changes: 169 additions & 0 deletions prediction/src/algorithms/preprocessing/full_prep.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
import os
import warnings

from os import path as p
from functools import partial
from multiprocessing import Pool

import h5py
import numpy as np

from skimage import measure
from skimage.morphology import convex_hull_image
from scipy.io import loadmat
from scipy.ndimage.interpolation import zoom
from scipy.ndimage.morphology import binary_dilation, generate_binary_structure

from step1 import step1_python


def process_mask(mask):
convex_mask = np.copy(mask)

for i_layer in range(convex_mask.shape[0]):
mask1 = np.ascontiguousarray(mask[i_layer])

if np.sum(mask1) > 0:
mask2 = convex_hull_image(mask1)

if np.sum(mask2) > 2 * np.sum(mask1):
mask2 = mask1
else:
mask2 = mask1

convex_mask[i_layer] = mask2

struct = generate_binary_structure(3, 1)
return binary_dilation(convex_mask, structure=struct, iterations=10)


def lumTrans(img):
lungwin = np.array([-1200., 600.])
newimg = (img - lungwin[0]) / (lungwin[1] - lungwin[0])
newimg[newimg < 0] = 0
newimg[newimg > 1] = 1
return (newimg * 255).astype('uint8')


def resample(imgs, spacing, new_spacing, order=2):
if len(imgs.shape) == 3:
new_shape = np.round(imgs.shape * spacing / new_spacing)
true_spacing = spacing * imgs.shape / new_shape
resize_factor = new_shape / imgs.shape

with warnings.catch_warnings():
warnings.simplefilter("ignore")
imgs = zoom(imgs, resize_factor, mode='nearest', order=order)

return imgs, true_spacing
elif len(imgs.shape) == 4:
n = imgs.shape[-1]
newimg = []

for i in range(n):
slice = imgs[:, :, :, i]
newslice, true_spacing = resample(slice, spacing, new_spacing)
newimg.append(newslice)

newimg = np.transpose(np.array(newimg), [1, 2, 3, 0])
return newimg, true_spacing
else:
raise ValueError('wrong shape')


def savenpy(dirname, prep_folder, data_path, use_existing=True):
print('saving %s...' % dirname)
resolution = np.array([1, 1, 1])

if use_existing:
label_path = p.join(prep_folder, dirname + '_label.npy')
clean_path = p.join(prep_folder, dirname + '_clean.npy')
exists = p.exists(label_path) and p.exists(clean_path)
else:
exists = False

if exists:
print(dirname + ' already processed')
processed = 0
else:
print(dirname + ' not yet processed')
case_path = p.join(data_path, dirname)
im, m1, m2, spacing = step1_python(case_path)
Mask = m1 + m2

newshape = np.round(np.array(Mask.shape) * spacing / resolution)
xx, yy, zz = np.where(Mask)
box = np.array(
[
[np.min(xx), np.max(xx)],
[np.min(yy), np.max(yy)],
[np.min(zz), np.max(zz)]])

box = box * np.expand_dims(spacing, 1) / np.expand_dims(resolution, 1)
box = np.floor(box).astype('int')
margin = 5
extendbox = np.vstack(
[
np.max([[0, 0, 0], box[:, 0] - margin], 0),
np.min([newshape, box[:, 1] + 2 * margin], axis=0).T]).T

extendbox = extendbox.astype('int')
offset = extendbox.astype('float32')[:, 0]

convex_mask = m1
dm1 = process_mask(m1)
dm2 = process_mask(m2)
dilatedMask = dm1 + dm2
Mask = m1 + m2
extramask = dilatedMask ^ Mask
bone_thresh = 210
pad_value = 170

im[np.isnan(im)] = - 2000
sliceim = lumTrans(im)
shape = np.array(sliceim.shape)
info = np.concatenate((offset[np.newaxis, ...], spacing[np.newaxis, ...], shape[np.newaxis, ...]))
# save original slice images here if needed
sliceim = sliceim * dilatedMask + pad_value * (1 - dilatedMask).astype('uint8')
bones = sliceim * extramask > bone_thresh
sliceim[bones] = pad_value
# save masked slice images here if needed
# slices before resample, Ex: (109, 512, 512), (321, 512, 512)
sliceim1 = resample(sliceim, spacing, resolution, order=1)[0]
# slices after resample, Ex: (272, 360, 360), (321, 360, 360)
sliceim2 = sliceim1[
extendbox[0, 0]:extendbox[0, 1],
extendbox[1, 0]:extendbox[1, 1],
extendbox[2, 0]:extendbox[2, 1]]

sliceim = sliceim2[np.newaxis, ...]
# slices after cropping, Ex: (1, 247, 198, 266), (1, 262, 187, 246)
np.save(p.join(prep_folder, dirname + '_clean'), sliceim)
np.save(p.join(prep_folder, dirname + '_label'), np.array([[0, 0, 0, 0]]))
np.save(p.join(prep_folder, dirname + '_info'), info)
print(dirname + ' done')
processed = 1

return processed


def full_prep(data_path, prep_folder, use_existing=True, **kwargs):
n_worker = kwargs.get('n_worker')
warnings.filterwarnings('ignore')

if not p.exists(prep_folder):
os.mkdir(prep_folder)

pool = Pool(n_worker)
dirlist = kwargs.get('dirlist') or os.listdir(data_path)
print('start preprocessing %i directories...' % len(dirlist))

partial_savenpy = partial(
savenpy, prep_folder=prep_folder, data_path=data_path,
use_existing=use_existing)

mapped = pool.map(partial_savenpy, dirlist)
pool.close()
pool.join()
print('end preprocessing')
return mapped
Loading