-
Notifications
You must be signed in to change notification settings - Fork 2
/
LungSegmentation.py
473 lines (380 loc) · 17.9 KB
/
LungSegmentation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
import glob
import numpy as np
import pydicom
import os
import datetime
import io
import scipy.ndimage
import zipfile
from shutil import rmtree
from multiprocessing import Pool
import scipy.ndimage as ndimage
from PIL import Image
from pydicom.encaps import decode_data_sequence
from pydicom.encaps import encapsulate
from pydicom.dataset import FileDataset, FileMetaDataset
from pydicom.uid import RLELossless
import nibabel as nib
import matplotlib.pyplot as plt
from skimage.morphology import disk, binary_erosion, binary_closing
from skimage.measure import label,regionprops
from skimage import measure
from skimage.segmentation import clear_border, active_contour
from skimage import measure
from mpl_toolkits.mplot3d.art3d import Poly3DCollection
class LungSegmentation:
def __init__(self, tasks, outputPath, isDicom = True, nii_depthfactor = 2.0) -> None:
self.tasks = tasks
self.outputPath = outputPath
self.isDicom = isDicom
self.nii_depthfactor = nii_depthfactor
def ReadDICOM(self, path):
# Sort the dicom slices in their respective order
files = [os.path.basename(file) for file in glob.glob(path +"*.dcm")]
slices = [pydicom.read_file(path + filename) for filename in files]
slices.sort(key = lambda x: float(x.ImagePositionPatient[2]))
try:
slice_thickness = np.abs(slices[0].ImagePositionPatient[2] - slices[1].ImagePositionPatient[2])
except:
slice_thickness = np.abs(slices[0].SliceLocation - slices[1].SliceLocation)
for s in slices:
s.SliceThickness = slice_thickness.astype(np.float64)
spacing = np.array([float(slices[0].SliceThickness), float(slices[0].PixelSpacing[0]), float(slices[0].PixelSpacing[1])] , dtype=np.float32)
slices = np.stack([s.pixel_array for s in slices])
slices[slices == -2000] = 0
if np.max(slices) > 4095:
print('range: ', np.min(slices), np.max(slices))
if np.min(slices) < 0:
slices = slices + 1024
slices[slices > 4095] = 4095
slices[slices < 0] = 0
elif np.min(slices) < -1024:
print('range: ', np.min(slices), np.max(slices))
slices = slices + 1024
slices[slices > 4095] = 4095
slices[slices < 0] = 0
print('range: ', np.min(slices), np.max(slices))
return slices, spacing
def ReadNII(self, path):
img = nib.load(path)
spacing = img.header['pixdim'][:3]
spacing[0] = abs(spacing[0]) * self.nii_depthfactor
# spacing = np.array([space[1], space[2], space[0]])
# img = nib.funcs.as_closest_canonical(img)
slices = np.array(img.dataobj)
slices = np.moveaxis(slices, -1, 0)
slices = np.rot90(slices, k = -1, axes = (1,2))
# slices = np.array(img.get_fdata())
slices[slices == -2000] = 0
if np.max(slices) > 4095:
print('range: ', np.min(slices), np.max(slices))
if np.min(slices) < 0:
slices = slices + 1024
slices[slices > 4095] = 4095
slices[slices < 0] = 0
elif np.min(slices) < -1024:
print('range: ', np.min(slices), np.max(slices))
slices = slices + 1024
slices[slices > 4095] = 4095
slices[slices < 0] = 0
print('range: ', np.min(slices), np.max(slices))
return slices, spacing
def resample(self, image, spacing, new_spacing=[1,1,1]):
# Determine current pixel spacing
resize_factor = spacing / new_spacing
new_real_shape = image.shape * resize_factor
new_shape = np.round(new_real_shape)
real_resize_factor = new_shape / image.shape
new_spacing = spacing / real_resize_factor
image = scipy.ndimage.interpolation.zoom(image, real_resize_factor, mode='nearest')
return image, new_spacing
#write compressed dicom
def SaveCompressedResults(self, frames, dstFile, PatientID):
frame_data = []
for frame in frames:
image = Image.fromarray(frame.astype(np.ushort))
with io.BytesIO() as output:
image.save(output, format="PNG")
frame_data.append(output.getvalue())
encapsulated_data = encapsulate(frame_data)
file_meta = FileMetaDataset()
file_meta.MediaStorageSOPClassUID = '1.2.840.10008.1.2.4.51'
file_meta.MediaStorageSOPInstanceUID = "1.2.3"
file_meta.ImplementationClassUID = "1.2.3.4"
ds = FileDataset(dstFile, {},
file_meta=file_meta, preamble=b"\0" * 128)
ds.PatientName = "Patient " + str(PatientID)
ds.PatientID = str(PatientID)
# Set creation date/time
dt = datetime.datetime.now()
ds.ContentDate = dt.strftime('%Y%m%d')
timeStr = dt.strftime('%H%M%S.%f')
ds.ContentTime = timeStr
ds.PixelData = encapsulated_data
ds.file_meta.TransferSyntaxUID = RLELossless
print("Writing: " + dstFile)
ds.save_as(dstFile)
def plot_3d(self, image, threshold=-300):
# Position the scan upright,
# so the head of the patient would be at the top facing the camera
p = image.transpose(2,1,0)
verts, faces = measure.marching_cubes_classic(p, threshold)
fig = plt.figure(figsize=(10, 10))
ax = fig.add_subplot(111, projection='3d')
# Fancy indexing: `verts[faces]` to generate a collection of triangles
mesh = Poly3DCollection(verts[faces], alpha=0.70)
face_color = [0.45, 0.45, 0.75]
mesh.set_facecolor(face_color)
ax.add_collection3d(mesh)
ax.set_xlim(0, p.shape[0])
ax.set_ylim(0, p.shape[1])
ax.set_zlim(0, p.shape[2])
plt.show()
def largest_label_volume(self, im, bg=-1):
vals, counts = np.unique(im, return_counts=True)
counts = counts[vals != bg]
vals = vals[vals != bg]
if len(counts) > 0:
return vals[np.argmax(counts)]
else:
return None
#classic high-performance lung segmentation (from Kaggle)
def segment_lung_mask(self, image, fill_lung_structures=True):
# not actually binary, but 1 and 2.
# 0 is treated as background, which we do not want
# binary_image = np.array(image > 604, dtype=np.int8)+1
binary_image = (image < 604).astype('int8')
# Remove the blobs connected to the border of the image
for i in range(len(binary_image)):
binary_image[i] = clear_border(binary_image[i])
binary_image = np.invert(binary_image).astype(np.int8)
# plt.imshow(binary_image[150], cmap='gray')
# plt.show()
labels = measure.label(binary_image)
# Pick the pixel in the very corner to determine which label is air.
# Improvement: Pick multiple background labels from around the patient
# More resistant to "trays" on which the patient lays cutting the air
# around the person in half
background_label = labels[0,0,0]
#Fill the air around the person
binary_image[background_label == labels] = 2
background_label = labels[image.shape[0]-1,image.shape[1]-1,image.shape[2]-1]
#Fill the air around the person
binary_image[background_label == labels] = 2
# Method of filling the lung structures (that is superior to something like
# morphological closing)
if fill_lung_structures:
# For every slice we determine the largest solid structure
for i, axial_slice in enumerate(binary_image):
axial_slice = axial_slice - 1
labeling = measure.label(axial_slice)
l_max = self.largest_label_volume(labeling, bg=0)
if l_max is not None: #This slice contains some lung
binary_image[i][labeling != l_max] = 1
binary_image -= 1 #Make the image actual binary
# plt.imshow(binary_image[150], cmap='gray')
# plt.show()
binary_image = 1-binary_image # Invert it, lungs are now 1
# plt.imshow(binary_image[150], cmap='gray')
# plt.show()
# Remove other air pockets insided body
labels = measure.label(binary_image, background=0)
l_max = self.largest_label_volume(labels, bg=0)
if l_max is not None: # There are air pockets
binary_image[labels != l_max] = 0
return binary_image
def SaveLungAreas(self, frames, dstFile):
lungAreas = []
for i in range(len(frames)):
framelabel = label(frames[i] > 0)
areas = [r.area for r in regionprops(framelabel)]
lungAreas.append(np.sum(areas))
fig = plt.figure()
fig.set_size_inches(10, 3)
plt.plot(lungAreas)
fig.tight_layout()
fig.savefig(dstFile)
def TransBackground(self, img):
imgg = img.copy()
pixels = imgg.load()
for i in range(imgg.size[0]):
for j in range(imgg.size[1]):
if pixels[i,j] == (0, 0, 0, 255):
pixels[i,j] = (0, 0 ,0, 0)
else:
pixels[i,j] = (int(pixels[i,j][0]), int(pixels[i,j][1]),pixels[i,j][2],int(0.3*255))
return imgg
#perform segmentation refinement
def SegRefine(self, seg, ct):
refined = []
pg = 0
for sliceid in range(len(seg)):
binary = seg[sliceid].copy()
framelabel = label(binary)
rprobas = regionprops(framelabel)
areas = [r.area for r in rprobas]
areas.sort()
originImg = ct[sliceid].copy()
if len(areas) < 2 or np.sum(areas) < 15000:
binary = binary_closing(binary, disk(5))
refined.append(binary.astype('int8'))
pgress = int(sliceid/len(seg)*100)
if pgress != pg and pgress % 5 ==0:
pg = pgress
print("Progress {0}%".format(pgress))
continue
lunglst = [np.zeros_like(binary, dtype='bool'), np.zeros_like(binary, dtype='bool')]
for region in rprobas:
if region.area == areas[-1]:
lunglst[0][region.coords[:, 0], region.coords[:, 1]] = 1
else:
lunglst[1][region.coords[:, 0], region.coords[:, 1]] = 1
snakes = []
mask = np.zeros_like(binary, dtype='bool')
for lung in lunglst:
bins = binary_closing(lung, disk(10))
contours = measure.find_contours(bins, 0.8)
lungimg = originImg.copy()
pixelremoved = lung == 0
lungimg[pixelremoved] = 0
for contour in contours:
snake = active_contour(lungimg, contour,alpha=0.02,beta=20, max_iterations=20, coordinates='rc')
r_mask = np.zeros_like(binary, dtype='bool')
r_mask[snake[:, 0].astype('int'), snake[:, 1].astype('int')] = 1
r_mask = ndimage.binary_fill_holes(r_mask)
mask = mask | r_mask
masksub = mask.astype(np.uint8) - binary.astype(np.uint8)
binarywithsub = (binary.astype('bool') | masksub.astype('bool')).astype(np.uint8)
for lung in lunglst:
lung[lung>0] = 0
rprobas = regionprops(label(binarywithsub))
areas = [r.area for r in rprobas]
areas.sort()
for region in rprobas:
if region.area == areas[-1]:
lunglst[0][region.coords[:, 0], region.coords[:, 1]] = 1
elif region.area == areas[-2]:
lunglst[1][region.coords[:, 0], region.coords[:, 1]] = 1
binary[binary>0] = 0
for lung in lunglst:
lung = binary_erosion(lung, disk(2))
lung = binary_closing(lung, disk(10))
contours = measure.find_contours(lung, 0.8)
binary = binary.astype('bool') | lung.astype('bool')
refined.append(binary.astype('int8'))
pgress = int(sliceid/len(seg)*100)
if pgress != pg and pgress % 5 ==0:
pg = pgress
print("Progress {0}%".format(pgress))
return np.array(refined)
def unzip(self, filename, to_path):
zipFile = zipfile.ZipFile(filename)
for file in zipFile.namelist():
zipFile.extract(file, to_path)
zipFile.close()
def ProcessDICOM(self, task):
# inputPath = PATH + 'Patient '+str(CaseID)+'/CT/'
# print("Read Patient CT ", CaseID)
file, caseid = task
try:
ctframes, spacing = self.ReadDICOM(file)
except Exception as e:
print("Error when reading patient ", caseid)
print(repr(e))
return "Case " + str(caseid) + " Exception!"
return self.ProcessPatient(ctframes, spacing, caseid)
def ProcessNII(self, task):
# inputPath = PATH + 'Patient '+str(CaseID)+'/CT/'
# print("Read Patient CT ", caseid)
file, caseid = task
try:
ctframes, spacing = self.ReadNII(file)
except Exception as e:
print("Error when reading patient ", caseid)
print(repr(e))
return "Case " + str(caseid) + " Exception!"
return self.ProcessPatient(ctframes, spacing, caseid)
def ProcessPatient(self, ctframes, spacing, CaseID):
fig = plt.figure()
fig.set_size_inches(7, 4)
plt.hist(ctframes.flatten(), bins=80, color='c')
plt.xlabel("Hounsfield Units (HU)")
plt.ylabel("Frequency")
fig.tight_layout()
dstFile = self.outputPath + 'HU/Patient-' + str(CaseID) + '.png'
fig.savefig(dstFile)
ctresampled, newspacing = self.resample(ctframes, spacing, [1,1,1])
ctresampled[ctresampled<0] = 0
print("Shape before resampling\t", ctframes.shape)
print("Shape after resampling\t", ctresampled.shape)
# segmented_lungs = segment_lung_mask(ctresampled, False)
segmented_lungs = self.segment_lung_mask(ctresampled, True)
dstFile = self.outputPath + 'lungAreasInit/Patient-' + str(CaseID) + '.png'
self.SaveLungAreas(segmented_lungs, dstFile)
lungFrames = self.SegRefine(segmented_lungs, ctresampled)
# save mask
dstFile = self.outputPath + 'DMM/Patient-'+str(CaseID)+'.dmm'
self.SaveCompressedResults(lungFrames, dstFile, CaseID)
# save ct lung
pixelremoved = lungFrames == 0
ctresampled[pixelremoved] = 0
dstFile = self.outputPath + 'DTM/Patient-'+str(CaseID)+'.dtm'
self.SaveCompressedResults(ctresampled, dstFile, CaseID)
dstFile = self.outputPath + 'lungAreasFinal/Patient-' + str(CaseID) + '.png'
self.SaveLungAreas(lungFrames, dstFile)
return "Case " + str(CaseID) + " Processed."
def ParallelProcessing(self, needUnzip = True):
tasks = self.tasks
while len(tasks) > 0:
if len(tasks) < 10:
subtasks = tasks[0:]
tasks = []
else:
subtasks = tasks[:10]
tasks = tasks[10:]
# in case the file is zipped, unzip it first
for i in range(len(subtasks)):
file, caseid = subtasks[i]
if needUnzip and not os.path.exists(self.outputPath + 'Patient '+str(caseid)+'/CT/'):
if os.path.exists(file):
self.unzip(file, self.outputPath)
print('unzipped '+ file)
subtasks[i] = self.outputPath + 'Patient '+str(caseid)+'/CT/'
print("Processing Case {0}".format(subtasks))
with Pool(10) as p:
if self.isDicom:
print(p.map(self.ProcessDICOM, subtasks))
else:
print(p.map(self.ProcessNII, subtasks))
p.close()
p.join()
#in case you want to remove unzipped temp files
# for task in tasks:
# dstFile = self.rootPath + 'DTM/Patient-'+str(task)+'.dtm'
# if os.path.exists(dstFile):
# rmtree(self.rootPath + 'Patient '+str(task)+'/')
if __name__ == "__main__":
#####################################Zipped DICOM files ########################
PATH = '/guoqing/project/covid/'
files = [os.path.basename(file) for file in glob.glob(PATH +"*.zip")]
files.sort(key=lambda x: int(x[8:-4]))
tasks = []
for f in files:
CaseID =int(f[8:-4])
dstFile = PATH + 'DTM/Patient-'+str(CaseID)+'.dtm'
if os.path.exists(dstFile):
if os.path.exists(PATH + 'Patient '+str(CaseID)+'/CT/'):
rmtree(PATH + 'Patient '+str(CaseID)+'/')
print("Remove Processed Folder ", PATH + 'Patient '+str(CaseID)+'/')
continue
dst = '/home/gbao5100/project/covid/Patient-{0}.zip'
tasks.append((dst, CaseID))
################################ NII Files without CaseID ###########################
if False:
PATH = '/guoqing/project/covid/NII-OUT/'
files = [os.path.basename(file) for file in glob.glob("/guoqing/project/covid/NII/*.gz")]
tasks = []
for i in range(len(files)):
tasks.append((f, 2000 + i)) # file, and caseid
segmentor = LungSegmentation(tasks=tasks, outputPath = PATH, isDicom=True)
segmentor.ParallelProcessing(needUnzip=True)