forked from danikiyasseh/loading-physiological-data
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathload_ptb_ecg.py
124 lines (104 loc) · 4.16 KB
/
load_ptb_ecg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Dec 27 15:38:15 2019
@author: Dani Kiyasseh
"""
import os
import wfdb
import pickle
import numpy as np
from scipy import signal
#%%
#basepath = '/home/scro3517/Desktop/ptb-diagnostic-ecg-database-1.0.0'
patient_folders = os.listdir(basepath)
patient_folders = sorted([folder for folder in patient_folders if os.path.isdir(os.path.join(basepath,folder))])
""" Remove patient_data Folder I Created Below """
patient_folders = [folder for folder in patient_folders if 'patient_data' not in folder]
samples = 2500
leads_list = ['i','ii','iii','avr','avl','avf','v1','v2','v3','v4','v5','v6']
#leads = ['ii']
classification = 'binary'
def obtain_arrays(patient_folders,leads,classification):
inputs = dict()
outputs = dict()
all_labels = []
for folder in patient_folders:
""" Path for Patient Files """
folderpath = os.path.join(basepath,folder)
""" Patient Files """
files = os.listdir(folderpath)
""" Keep Unique File Names """
files = [file.split('.hea')[0] for file in files if '.hea' in file]
""" Prepare Dicts for Population """
inputs[folder] = []
outputs[folder] = []
for file in files:
""" Load Data File """
filepath = os.path.join(folderpath,file)
record = wfdb.rdsamp(filepath)
lead_indices = np.where([ld in leads for ld in record[1]['sig_name']])[0]
data = np.transpose(record[0])
""" Resample and Store Data """
lead_data = []
for lead_index in lead_indices:
current_data = data[lead_index,:]
current_data = signal.resample(current_data,len(current_data)//2)
lead_data.append(current_data)
lead_data = np.array(lead_data)
label = record[1]['comments'][4].split(': ')[1]
if classification == 'binary':
if label != 'Myocardial infarction' and label != 'Healthy control':
continue
""" Iterate Through Lead Data Frame by Frame """
nframes = lead_data.shape[1]//samples
for nframe in range(nframes):
frame = lead_data[0,samples*nframe:samples*(nframe+1)]
inputs[folder].append(frame)
outputs[folder].append(label)
all_labels.append(label)
""" Convert List into Array """
input_array = np.array(inputs[folder])
output_array = np.array(outputs[folder])
if len(input_array) == 0:
inputs.pop(folder)
outputs.pop(folder)
else:
inputs[folder] = input_array
outputs[folder] = output_array
return inputs,outputs,all_labels
#%%
def encode_outputs(outputs,all_labels):
""" Retrieve Unique Class Names """
unique_labels = []
for label in all_labels:
if label not in unique_labels:
unique_labels.append(label)
""" Convert Drug Names to Labels """
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
label_encoder.fit(unique_labels)
for patient_number,labels in outputs.items():
outputs[patient_number] = label_encoder.transform(labels)
return outputs
#%%
def save_arrays(inputs,outputs,leads):
""" Make New Directory to Avoid Contamination """
savepath = os.path.join(basepath,'patient_data','leads_%s' % leads)
try:
os.chdir(savepath)
except:
os.makedirs(savepath)
""" Save Inputs and Labels Dicts For Splitting Later """
with open(os.path.join(savepath,'ecg_signal_frames_ptb.pkl'),'wb') as f:
pickle.dump(inputs,f)
with open(os.path.join(savepath,'ecg_signal_labels_ptb.pkl'),'wb') as f:
pickle.dump(outputs,f)
print('Leads %s Saved!' % leads)
#%%
if __name__ == '__main__':
for leads in leads_list:
leads = [leads]
inputs,outputs,all_labels = obtain_arrays(patient_folders,leads,classification)
outputs = encode_outputs(outputs,all_labels)
save_arrays(inputs,outputs,leads)