forked from danikiyasseh/loading-physiological-data
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathload_cardiology_ecg.py
138 lines (118 loc) · 4.47 KB
/
load_cardiology_ecg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Thu Dec 19 10:19:19 2019
@author: Dani Kiyasseh
Loading Nature Medicine Cardiology Test Set (Ng)
"""
import numpy as np
import matplotlib.pyplot as plt
import os
import json
import pickle
from scipy.signal import resample
#%%
basepath = '/home/scro3517/Desktop/CARDIOL_MAY_2017'
fs = 200
original_frame_length = 6000
samples_per_frame = 256
resampled_length = 2500
""" Determine Number of Frames Per Original Frame """
nframes = original_frame_length//samples_per_frame
samples_to_take_per_frame = samples_per_frame*nframes
""" All Files in Directory """
files = os.listdir(basepath)
""" Return Unique Patient Ids """
unique_patient_numbers = np.unique([file.split('_')[0] for file in files if not os.path.isdir(os.path.join(basepath,file))])
classification = 'all' #all
#%%
inputs = dict()
outputs = dict()
all_labels = []
for patient_number in unique_patient_numbers:
inputs[patient_number] = []
outputs[patient_number] = []
""" Load Frame Data """
filename = [file for file in files if patient_number in file and 'ecg' in file][0]
f = open(os.path.join(basepath,filename),'rb')
frame = np.fromfile(f,dtype=np.int16) #6000x1
""" Load Group Label File """
group_label = [file for file in files if patient_number in file and 'grp' in file][0]
with open(os.path.join(basepath,group_label)) as json_file:
data = json.load(json_file)
onsets = [episode['onset']-1 for episode in data['episodes']] #=1 for python start at 0
offsets = [episode['offset'] for episode in data['episodes']]
rhythms = [episode['rhythm_name'] for episode in data['episodes']]
for nframe in range(nframes):
start_sample = nframe * samples_per_frame
end_sample = start_sample + samples_per_frame
mini_frame = frame[start_sample:end_sample]
for i in range(len(rhythms)):
if onsets[i] <= start_sample < offsets[i]:
mini_label = rhythms[i]
if mini_label == 'AVB_TYPE2':
mini_label = 'AVB'
elif mini_label == 'AFL':
mini_label = 'AFIB'
elif mini_label == 'SUDDEN_BRADY':
break
if mini_label == 'SUDDEN_BRADY': #dont record sudden brady
continue
""" Resample Frame """
mini_frame = resample(mini_frame,resampled_length)
""" Binarize Labels """
if classification == 'binary':
if mini_label == 'NSR':
mini_label = 0
else:
mini_label = 1
all_labels.append(mini_label)
inputs[patient_number].append(mini_frame)
outputs[patient_number].append(mini_label)
# """ Take Last Portion of Frame """
# frame = frame[-samples_to_take_per_frame:]
# """ Reshape Frame """
# frames = np.reshape(frame,(-1,samples_per_frame))
# """ Change dtype of Frame """
# frames = np.array(frames,dtype=float)
# """ Return Group JSON File """
# """ Obtain Label from Group Label File """
# onset_instance = 0
# label = data['episodes'][onset_instance]['rhythm_name']
# """ Convert Into Binary Classification """
# if classification == 'binary':
# if 'NSR' in label:
# label = 0
# else:
# label = 1
# labels = np.repeat(label,frames.shape[0]).tolist()
#
# inputs[patient_number] = frames
# outputs[patient_number] = labels
inputs[patient_number] = np.array(inputs[patient_number])
outputs[patient_number] = np.array(outputs[patient_number])
#%%
""" Retrieve Unique Class Names """
unique_labels = []
for label in all_labels:
if label not in unique_labels:
unique_labels.append(label)
""" Convert Drug Names to Labels """
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
label_encoder.fit(unique_labels)
for patient_number,labels in outputs.items():
outputs[patient_number] = label_encoder.transform(labels)
#%%
""" Make New Directory to Avoid Contamination """
savepath = os.path.join(basepath,'patient_data','%s_classes' % classification)
try:
os.chdir(savepath)
except:
os.makedirs(savepath)
#%%
""" Save Inputs and Labels Dicts For Splitting Later """
with open(os.path.join(savepath,'ecg_signal_frames_cardiology.pkl'),'wb') as f:
pickle.dump(inputs,f)
with open(os.path.join(savepath,'ecg_signal_arrhythmia_labels_cardiology.pkl'),'wb') as f:
pickle.dump(outputs,f)