-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathft_extract.py
91 lines (70 loc) · 2.92 KB
/
ft_extract.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import SimpleITK as sitk
from scipy.ndimage import zoom
import radiomics
from radiomics import featureextractor
'''
Script to extract tissue-specific radiomic features using PyRadiomics
'''
with open('exclude.txt') as f:
exclude = f.read().splitlines()
EXCLUDE = [e.split(' - ')[0] for e in exclude]
TISSUE_TYPE = 'dense'
VOLUMETRIC = False
def get_tissue_mask(mask, tissue_type):
if mask.ndim == 2:
mask[mask!=0] += 1 # fix for adipose = 127, dense = 254
# 0 = air, 128 = adipose, 255 = dense
if tissue_type == 'dense':
thresh = 255
elif tissue_type == 'adipose':
thresh = 128
if tissue_type != 'all':
mask[mask != thresh] = 0
mask[mask == thresh] = 1
else:
mask[mask > 0] = 1
# air should already be 0
return mask
if __name__ == '__main__':
data_dir = '/path/to/data'
if VOLUMETRIC:
export_dir = './extracted_fts/3D_'
params = './params/params_3D.yaml'
else:
export_dir = './extracted_fts/'
params = './params/params.yaml'
extractor = featureextractor.RadiomicsFeatureExtractor(params)
extracted_fts = pd.DataFrame()
start_i = 0
data = os.listdir(data_dir)
data = data[start_i:]
for i, sample in enumerate(data, start=start_i):
sample_name = sample.split('.npz')[0]
if sample_name in EXCLUDE:
print('skipping {}...'.format(sample_name))
continue
print('extracting {}...'.format(sample_name))
sample_npz = np.load(data_dir + '/' + sample)
if VOLUMETRIC:
img = sitk.GetImageFromArray(zoom(sample_npz['rec_3D'], (1, 0.1, 0.1)))
mask = sitk.GetImageFromArray(zoom(get_tissue_mask(sample_npz['mask_3D'], TISSUE_TYPE), (1, 0.1, 0.1)))
else:
img = sitk.GetImageFromArray(sample_npz['proj_2D'])
mask = sitk.GetImageFromArray(get_tissue_mask(sample_npz['arg_max'], TISSUE_TYPE))
extracted = extractor.execute(img, mask, voxelBased=False)
info = {k:v for k,v in extracted.items() if 'diagnostic' in k}
if i == 0:
version_info = {k:v for k,v in info.items() if 'original' not in k}
pd.Series(version_info).to_csv(export_dir + 'version_info_{}.csv'.format(TISSUE_TYPE))
fts = {k:float(v) for k,v in extracted.items() if not 'diagnostic' in k} # remove diagnostic features
sample_df = pd.DataFrame(fts, index=[0])
sample_df.insert(0, 'sample_name', sample_name)
extracted_fts = pd.concat([extracted_fts, sample_df], ignore_index=True)
if i != start_i and i % 200 == 0:
extracted_fts.to_csv(export_dir + 'extracted_fts_{}_temp_{}.csv'.format(TISSUE_TYPE, i))
print('extraction for {} tissue complete...'.format(TISSUE_TYPE))
extracted_fts.to_csv(export_dir + 'extracted_fts_{}.csv'.format(TISSUE_TYPE))