forked from arcaro-lab/dhcp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathregistration_pipeline.py
294 lines (207 loc) · 8.52 KB
/
registration_pipeline.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
'''
This analysis conducts necessary preprocesing steps in prep for analysis including:
Computing reigstration between individual anat to fsaverage via afni and freesurfer
Registering atlases to the individual
Extracting timeseries data from each roi of an atlast
'''
project_name = 'dhcp'
import os
#get current working directory
cwd = os.getcwd()
git_dir = os.path.join(cwd.split(project_name)[0], project_name)
import sys
#add git_dir to path
sys.path.append(git_dir)
import numpy as np
import pandas as pd
from glob import glob as glob
import pdb
import dhcp_params as params
import subprocess
import time
#print date and time
print(time.strftime("%d/%m/%Y %H:%M:%S"))
group = 'infant'
group_info = params.load_group_params(group)
#set directories
raw_data_dir = group_info.raw_data_dir
raw_anat_dir = group_info.raw_anat_dir
raw_func_dir = group_info.raw_func_dir
out_dir = group_info.out_dir
#set suffixes for anat and func files
anat_suf = group_info.anat_suf
func_suf = group_info.func_suf
#directory with preprocessing scripts
script_dir = f'{git_dir}/fmri'
#participants_file = 'participants_dhcp'
#load subject list
full_sub_list = group_info.sub_list
#limit to subs with 1 in to_run col
sub_list = full_sub_list[full_sub_list['to_run']==1]
#reset index
sub_list.reset_index(drop=True, inplace=True)
#limit to first 30 subjects
#sub_list = full_sub_list.head(200)
#set atlas
atlas = 'wang'
roi = 'pulvinar'
'''
Flags to determine which preprocessing steps to run
'''
#finds eligible subjects based on having all scans
find_eligible_subs = False
#extract brain
extract_brain = True
#Reg-phase1-4 : Register individual anat to fsaverage
reg_phase1 = True
reg_phase2 = False
reg_phase3 = True
reg_phase4 = True
#Registers atlas to individual anat
register_atlas = True
#split atlas into individual rois
split_atlas = True
#extracts mean timeseries from each roi of atlas
extract_ts_roi = True
#Register volumetric roi to individual anat
register_vol_roi = False
#extract voxel-wise timeseries from rois
extract_ts_voxel = False
def find_eligble_subs():
'''
Pre-registration phase: Check which subjects have all scans
Create new participant list with eligible subjects
'''
raw_sub_list = pd.read_csv(f'{raw_func_dir}/participants.tsv', sep='\t')
#create new sub list
sub_list = pd.DataFrame(columns=raw_sub_list.columns.to_list() + ['ses'])
for sub in raw_sub_list['participant_id']:
#check if subject has all scans
anat_files = glob(f'{raw_anat_dir}/sub-{sub}/*/anat/*{anat_suf}.nii.gz')
func_files = glob(f'{raw_func_dir}/sub-{sub}/*/func/*{func_suf}.nii.gz')
#determine session number
ses = glob(f'{raw_anat_dir}/sub-{sub}/ses-*')
ses = ses[0].split('/')[-1]
#if subject has all scans, add to new list
if len(anat_files) > 0 and len(func_files) > 0:
#add subject and ses to new final_sub_list
sub_list = pd.concat([sub_list, raw_sub_list[raw_sub_list['participant_id']==sub]], ignore_index=True)
sub_list.loc[sub_list['participant_id']==sub, 'ses'] = ses
#append sub- to participant_id
sub_list['participant_id'] = 'sub-' + sub_list['participant_id']
#add columns for phase 1-4 as empty
sub_list['phase_1'] = ''
sub_list['phase_2'] = ''
sub_list['phase_3'] = ''
sub_list['phase_4'] = ''
#save new list
sub_list.to_csv(f'{out_dir}/{group_info.participants_file}.csv', index=False)
#print total number of subjects
print(f'Total usable of participants: {len(sub_list)}')
def launch_script(sub_list,script_name, analysis_name,pre_req='', atlas = ''):
'''
Launches preprocessing script and checks for errors
'''
#create new column if analysis_name doesn't exist
if analysis_name not in sub_list.columns:
sub_list[analysis_name] = ''
full_sub_list[analysis_name] = ''
#check if script has already been run and whether pre-reqs have been met
curr_subs = sub_list[sub_list[analysis_name]!=1]
#if pre-reqs is not None, check if pre-reqs have been met
if pre_req != '':
curr_subs = curr_subs[curr_subs[pre_req]==1]
for sub, ses in zip(curr_subs['participant_id'], curr_subs['ses']):
print(f'Running {analysis_name} for {sub}')
try:
#run script
bash_cmd = f'python {script_dir}/{script_name} {sub} {group} {atlas}'
subprocess.run(bash_cmd, check=True, shell=True)
#set analysis_name to 1
sub_list.loc[sub_list['participant_id']==sub, analysis_name] = 1
#update the full_sub_list and save it
#note: full_sub_list is kept outside of the function so its not overwritten
full_sub_list.update(sub_list)
#reset index
full_sub_list.reset_index(drop=True, inplace=True)
full_sub_list.to_csv(f'{group_info.sub_file}', index=False)
except Exception as e:
#open log file
log_file = open(f'{script_dir}/qc/preproc_log.txt', 'a')
#write error to log file
log_file.write(f'{time.strftime("%d/%m/%Y %H:%M:%S:")} Error in {script_name} for {sub}: {e}\n')
log_file.close()
#time it
start = time.time()
if find_eligible_subs:
'''
Pre-registration phase: Check which subjects have all scans
Create new participant list with eligible subjects
'''
find_eligble_subs()
if extract_brain:
'''
Extract brain
'''
launch_script(sub_list = sub_list,script_name='extract_brain.py',analysis_name=f'extract_brain')
if reg_phase1:
'''
Phase 1: Converts GIFTI files to surf
'''
launch_script(sub_list = sub_list,script_name='phase1_registration.py',analysis_name='phase_1',pre_req='extract_brain')
if reg_phase2:
'''
Phase 2: Write curv and sulc data to txt in matlab
Note: subject loop and error logging is done in matlab script
*If this crashes on a new dataset, check the directory in the matlab script
**MAYBE SKIPPING THIS
'''
print('Running phase 2 registration')
bash_cmd = "matlab.exe -batch \"run('fmri/phase2_registration.m')\""
subprocess.run(bash_cmd, check=True, shell=True)
#load updated sub list
#this needs to be reloaded because matlab script updates it
full_sub_list = pd.read_csv(f'{out_dir}/{participants_file}.csv')
#Matlab adds a bunch of nans to empty cells, so this deletes them
full_sub_list.replace(np.nan,'',inplace=True)
#save updated sub list
full_sub_list.to_csv(f'{out_dir}/{participants_file}.csv', index=False)
if reg_phase3:
'''
Phase 3 of registration pipeline: Creates final surfaces and registers to fsaverage
'''
launch_script(sub_list = sub_list,script_name='phase3_registration.py',analysis_name='phase_3',pre_req='phase_1')
if reg_phase4:
'''
Phase 4 of registration pipeline: Registers anat to EPI
'''
launch_script(sub_list = sub_list,script_name='phase4_registration.py',analysis_name='phase_4',pre_req='phase_3')
if register_atlas:
'''
Register atlas to individual anat
'''
launch_script(sub_list = sub_list,script_name='register_atlas.py',analysis_name=f'{atlas}_reg',pre_req='phase_4', atlas = atlas)
if split_atlas:
'''
Split atlas into individual rois
'''
launch_script(sub_list = sub_list,script_name='split_atlas.py',analysis_name=f'{atlas}_split',pre_req=f'{atlas}_reg', atlas = atlas)
if extract_ts_roi:
'''
Extract mean timeseries from each roi of atlas
'''
launch_script(sub_list = sub_list,script_name='extract_ts_roi.py',analysis_name=f'{atlas}_ts',pre_req=f'{atlas}_split', atlas = atlas)
if register_vol_roi:
'''
Register volumetric roi to individual anat
'''
launch_script(sub_list = sub_list,script_name='register_vol_roi.py',analysis_name=f'{roi}_reg',pre_req=f'extract_brain', atlas = roi)
if extract_ts_voxel:
'''
Extract voxel-wise timeseries from all voxels of an roi
*note: this is a very time and memory intensive if you have many ROIs
'''
launch_script(sub_list = sub_list,script_name='extract_ts_voxel.py',analysis_name=f'{roi}_ts',pre_req=f'{roi}_reg', atlas = roi)
#end time
end = time.time()
print(f'Total time: {(end-start)/60}')