forked from bowang-lab/MedSAM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpre_CT_MR.py
executable file
·181 lines (169 loc) · 6.65 KB
/
pre_CT_MR.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# -*- coding: utf-8 -*-
# %% import packages
# pip install connected-components-3d
import numpy as np
# import nibabel as nib
import SimpleITK as sitk
import os
join = os.path.join
from skimage import transform
from tqdm import tqdm
import cc3d
# convert nii image to npz files, including original image and corresponding masks
modality = "CT"
anatomy = "Abd" # anantomy + dataset name
img_name_suffix = "_0000.nii.gz"
gt_name_suffix = ".nii.gz"
prefix = modality + "_" + anatomy + "_"
nii_path = "data/FLARE22Train/images" # path to the nii images
gt_path = "data/FLARE22Train/labels" # path to the ground truth
npy_path = "data/npy/" + prefix[:-1]
os.makedirs(join(npy_path, "gts"), exist_ok=True)
os.makedirs(join(npy_path, "imgs"), exist_ok=True)
image_size = 1024
voxel_num_thre2d = 100
voxel_num_thre3d = 1000
names = sorted(os.listdir(gt_path))
print(f"ori \# files {len(names)=}")
names = [
name
for name in names
if os.path.exists(join(nii_path, name.split(gt_name_suffix)[0] + img_name_suffix))
]
print(f"after sanity check \# files {len(names)=}")
# set label ids that are excluded
remove_label_ids = [
12
] # remove deodenum since it is scattered in the image, which is hard to specify with the bounding box
tumor_id = None # only set this when there are multiple tumors; convert semantic masks to instance masks
# set window level and width
# https://radiopaedia.org/articles/windowing-ct
WINDOW_LEVEL = 40 # only for CT images
WINDOW_WIDTH = 400 # only for CT images
# %% save preprocessed images and masks as npz files
for name in tqdm(names[:40]): # use the remaining 10 cases for validation
image_name = name.split(gt_name_suffix)[0] + img_name_suffix
gt_name = name
gt_sitk = sitk.ReadImage(join(gt_path, gt_name))
gt_data_ori = np.uint8(sitk.GetArrayFromImage(gt_sitk))
# remove label ids
for remove_label_id in remove_label_ids:
gt_data_ori[gt_data_ori == remove_label_id] = 0
# label tumor masks as instances and remove from gt_data_ori
if tumor_id is not None:
tumor_bw = np.uint8(gt_data_ori == tumor_id)
gt_data_ori[tumor_bw > 0] = 0
# label tumor masks as instances
tumor_inst, tumor_n = cc3d.connected_components(
tumor_bw, connectivity=26, return_N=True
)
# put the tumor instances back to gt_data_ori
gt_data_ori[tumor_inst > 0] = (
tumor_inst[tumor_inst > 0] + np.max(gt_data_ori) + 1
)
# exclude the objects with less than 1000 pixels in 3D
gt_data_ori = cc3d.dust(
gt_data_ori, threshold=voxel_num_thre3d, connectivity=26, in_place=True
)
# remove small objects with less than 100 pixels in 2D slices
for slice_i in range(gt_data_ori.shape[0]):
gt_i = gt_data_ori[slice_i, :, :]
# remove small objects with less than 100 pixels
# reason: fro such small objects, the main challenge is detection rather than segmentation
gt_data_ori[slice_i, :, :] = cc3d.dust(
gt_i, threshold=voxel_num_thre2d, connectivity=8, in_place=True
)
# find non-zero slices
z_index, _, _ = np.where(gt_data_ori > 0)
z_index = np.unique(z_index)
if len(z_index) > 0:
# crop the ground truth with non-zero slices
gt_roi = gt_data_ori[z_index, :, :]
# load image and preprocess
img_sitk = sitk.ReadImage(join(nii_path, image_name))
image_data = sitk.GetArrayFromImage(img_sitk)
# nii preprocess start
if modality == "CT":
lower_bound = WINDOW_LEVEL - WINDOW_WIDTH / 2
upper_bound = WINDOW_LEVEL + WINDOW_WIDTH / 2
image_data_pre = np.clip(image_data, lower_bound, upper_bound)
image_data_pre = (
(image_data_pre - np.min(image_data_pre))
/ (np.max(image_data_pre) - np.min(image_data_pre))
* 255.0
)
else:
lower_bound, upper_bound = np.percentile(
image_data[image_data > 0], 0.5
), np.percentile(image_data[image_data > 0], 99.5)
image_data_pre = np.clip(image_data, lower_bound, upper_bound)
image_data_pre = (
(image_data_pre - np.min(image_data_pre))
/ (np.max(image_data_pre) - np.min(image_data_pre))
* 255.0
)
image_data_pre[image_data == 0] = 0
image_data_pre = np.uint8(image_data_pre)
img_roi = image_data_pre[z_index, :, :]
# save the image and ground truth as nii files for sanity check;
# they can be removed
img_roi_sitk = sitk.GetImageFromArray(img_roi)
gt_roi_sitk = sitk.GetImageFromArray(gt_roi)
sitk.WriteImage(
img_roi_sitk,
join(npy_path, prefix + gt_name.split(gt_name_suffix)[0] + "_img.nii.gz"),
)
sitk.WriteImage(
gt_roi_sitk,
join(npy_path, prefix + gt_name.split(gt_name_suffix)[0] + "_gt.nii.gz"),
)
# save the each CT image as npy file
for i in range(img_roi.shape[0]):
img_i = img_roi[i, :, :]
img_3c = np.repeat(img_i[:, :, None], 3, axis=-1)
resize_img_skimg = transform.resize(
img_3c,
(image_size, image_size),
order=3,
preserve_range=True,
mode="constant",
anti_aliasing=True,
)
resize_img_skimg_01 = (resize_img_skimg - resize_img_skimg.min()) / np.clip(
resize_img_skimg.max() - resize_img_skimg.min(), a_min=1e-8, a_max=None
) # normalize to [0, 1], (H, W, 3)
gt_i = gt_roi[i, :, :]
resize_gt_skimg = transform.resize(
gt_i,
(image_size, image_size),
order=0,
preserve_range=True,
mode="constant",
anti_aliasing=False,
)
resize_gt_skimg = np.uint8(resize_gt_skimg)
assert resize_img_skimg_01.shape[:2] == resize_gt_skimg.shape
np.save(
join(
npy_path,
"imgs",
prefix
+ gt_name.split(gt_name_suffix)[0]
+ "-"
+ str(i).zfill(3)
+ ".npy",
),
resize_img_skimg_01,
)
np.save(
join(
npy_path,
"gts",
prefix
+ gt_name.split(gt_name_suffix)[0]
+ "-"
+ str(i).zfill(3)
+ ".npy",
),
resize_gt_skimg,
)