-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_reader_2023.py
247 lines (205 loc) · 10.1 KB
/
data_reader_2023.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
import os
import mne
import numpy as np
from scipy.signal import butter, lfilter
import matplotlib.pyplot as plt
# import h5py
from scipy.signal import iirnotch, lfilter, filtfilt, resample
from scipy.fft import rfft, rfftfreq
def get_all_TUSZ_2023_session_paths(rootPath):
# reference_type = '02_tcp_le'
session_paths = []
all_patients = []
reference_type_count = {}
all_patients = os.listdir(rootPath)
for patient in all_patients:
patient_sessions = os.listdir(os.path.join(rootPath,patient))
for patient_session in patient_sessions:
reference_types = os.listdir(os.path.join(rootPath,patient,patient_session))
for reference_type in reference_types:
if reference_type not in reference_type_count:
reference_type_count[reference_type] = 1
else:
reference_type_count[reference_type] += 1
files = os.listdir(os.path.join(rootPath,patient,patient_session,reference_type))
sessions = []
for file in files:
if file.endswith('.edf'):
sessions.append(file.split('.')[0])
for session in sessions:
session_paths.append(os.path.join(rootPath,patient,patient_session,reference_type,session+'.edf'))
return session_paths, all_patients, reference_type_count
def get_channels_from_raw(raw):
# montage_list_1 = ["EEG FP1-REF","EEG F7-REF","EEG T3-REF","EEG T5-REF",
# "EEG FP2-REF","EEG F8-REF","EEG T4-REF","EEG T6-REF",
# "EEG A1-REF","EEG T3-REF","EEG C3-REF","EEG CZ-REF",
# "EEG C4-REF","EEG T4-REF","EEG FP1-REF","EEG F3-REF",
# "EEG C3-REF","EEG P3-REF","EEG FP2-REF","EEG F4-REF",
# "EEG C4-REF","EEG P4-REF"]
# montage_list_2 = ["EEG F7-REF","EEG T3-REF","EEG T5-REF","EEG O1-REF",
# "EEG F8-REF","EEG T4-REF","EEG T6-REF","EEG O2-REF",
# "EEG T3-REF","EEG C3-REF","EEG CZ-REF","EEG C4-REF",
# "EEG T4-REF","EEG A2-REF","EEG F3-REF","EEG C3-REF",
# "EEG P3-REF","EEG O1-REF","EEG F4-REF","EEG C4-REF",
# "EEG P4-REF","EEG O2-REF"]
montage_list_1 = ["EEG FP1-REF","EEG F7-REF","EEG T3-REF","EEG T5-REF",
"EEG FP2-REF","EEG F8-REF","EEG T4-REF","EEG T6-REF",
"EEG T3-REF","EEG C3-REF","EEG CZ-REF",
"EEG C4-REF","EEG FP1-REF","EEG F3-REF",
"EEG C3-REF","EEG P3-REF","EEG FP2-REF","EEG F4-REF",
"EEG C4-REF","EEG P4-REF"]
montage_list_2 = ["EEG F7-REF","EEG T3-REF","EEG T5-REF","EEG O1-REF",
"EEG F8-REF","EEG T4-REF","EEG T6-REF","EEG O2-REF",
"EEG C3-REF","EEG CZ-REF","EEG C4-REF",
"EEG T4-REF","EEG F3-REF","EEG C3-REF",
"EEG P3-REF","EEG O1-REF","EEG F4-REF","EEG C4-REF",
"EEG P4-REF","EEG O2-REF"]
montage_indices_1 = [raw.ch_names.index(ch) for ch in montage_list_1]
montage_indices_2 = [raw.ch_names.index(ch) for ch in montage_list_2]
try:
signals_1 = raw.get_data(picks=montage_indices_1)
signals_2 = raw.get_data(picks=montage_indices_2)
except:
print('Something is wrong when reading channels of the raw EEG signal')
flag_wrong = True
return flag_wrong, 0
else:
flag_wrong = False
return flag_wrong, signals_1-signals_2
def butter_bandpass(lowcut, highcut, fs, order=3):
nyq = 0.5 * fs
low = lowcut / nyq
high = highcut / nyq
b, a = butter(order, [low, high], btype='band')
return b, a
def butter_bandpass_filter(data, lowcut, highcut, fs, order=3):
b, a = butter_bandpass(lowcut, highcut, fs, order=order)
# y = lfilter(b, a, data)
y = filtfilt(b, a, data)
return y
def slice_signals_into_binary_segments(signals, thisFS, labels, segment_interval, seizure_types, seizure_overlapping_ratio):
# This "segments" variable is to store seizure segments according to each seizure class
segments = [[] for i in range(len(seizure_types))]
for this_label in labels:
if this_label[2] == 'bckg':
label_index = 0
else:
label_index = 1
# if this_label[2] not in seizure_types:
# print('Seizure type not included: ', this_label[2])
# continue
# label_index = seizure_types.index(this_label[2])
# This "seg" variable is to all segments of one line of labels
seg = []
for i in range(this_label[0]*thisFS, this_label[1]*thisFS, int(segment_interval*(1-seizure_overlapping_ratio[label_index])*thisFS)):
if i+segment_interval*thisFS > this_label[1]*thisFS:
break
one_window = []
noise_flag = False
incomplete_flag = False
for j in range(len(signals)):
this_channel = signals[j][i:i+segment_interval*thisFS]
# print(len(this_channel))
if len(this_channel) < segment_interval*thisFS:
incomplete_flag = True
break
# print(max(abs(this_channel)))
if max(abs(this_channel)) > 500/10**6:
noise_flag = True
break
one_window.append(this_channel)
# seg.append(np.array(one_window))
if incomplete_flag==False and noise_flag==False and one_window and len(one_window[0]) == thisFS*segment_interval:
seg.append(np.array(one_window))
# if this_label[2] in seizure_types:
# this_index = seizure_types.index(this_label[2])
segments[label_index].append(seg)
return segments
def slice_signals_into_multiclass_segments(signals, thisFS, labels, segment_interval, seizure_types, seizure_overlapping_ratio):
# This "segments" variable is to store seizure segments according to each seizure class
segments = [[] for i in range(len(seizure_types))]
for this_label in labels:
if this_label[2] not in seizure_types:
print('Seizure type not included: ', this_label[2])
continue
label_index = seizure_types.index(this_label[2])
# This "seg" variable is to all segments of one line of labels
seg = []
for i in range(this_label[0]*thisFS, this_label[1]*thisFS, int(segment_interval*(1-seizure_overlapping_ratio[label_index])*thisFS)):
if i+segment_interval*thisFS > this_label[1]*thisFS:
break
one_window = []
noise_flag = False
incomplete_flag = False
for j in range(len(signals)):
this_channel = signals[j][i:i+segment_interval*thisFS]
# print(len(this_channel))
if len(this_channel) < segment_interval*thisFS:
incomplete_flag = True
break
# print(max(abs(this_channel)))
if max(abs(this_channel)) > 500/10**6:
noise_flag = True
break
one_window.append(this_channel)
# seg.append(np.array(one_window))
if incomplete_flag==False and noise_flag==False and one_window and len(one_window[0]) == thisFS*segment_interval:
seg.append(np.array(one_window))
# if this_label[2] in seizure_types:
# this_index = seizure_types.index(this_label[2])
segments[label_index].append(seg)
return segments
def plot_signal_in_frequency(signal, filtered_signal, sample_rate):
# Suppose `signal` is your signal data, `filtered_signal` is the filtered data
# signal = ...
# filtered_signal = ...
# Compute the frequency representation of the signals
fft_orig = rfft(signal)
fft_filtered = rfft(filtered_signal)
# Compute the frequencies corresponding to the FFT output elements
freqs = rfftfreq(len(signal), 1/sample_rate)
# Plot the original signal in frequency domain
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(freqs, np.abs(fft_orig))
plt.title('Original Signal')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Magnitude')
# Plot the filtered signal in frequency domain
plt.subplot(1, 2, 2)
plt.plot(freqs, np.abs(fft_filtered))
plt.title('Filtered Signal')
plt.xlabel('Frequency (Hz)')
plt.ylabel('Magnitude')
plt.tight_layout()
plt.show()
def make_a_filtered_plot_for_comparison(signals, filtered_signals, thisFS):
plt.figure()
plt.clf()
maximum_samples = 200
channel_index = 5
if maximum_samples == -1:
t = np.linspace(0, signals.shape[1]/thisFS, signals.shape[1])
plt.plot(t, signals[channel_index,:], label='Noisy signal')
plt.plot(t, filtered_signals[channel_index][:], label='Filtered signal')
else:
t = np.linspace(0, maximum_samples/thisFS, maximum_samples)
plt.plot(t, signals[channel_index,:maximum_samples], label='Noisy signal')
plt.plot(t, filtered_signals[channel_index][:maximum_samples], label='Filtered signal')
plt.grid(True)
plt.axis('tight')
plt.legend(loc='upper left')
# plt.show()
plt.savefig('filtered_signal_plot.png')
return
def resample_data_in_each_channel(signals, thisFS, resampleFS):
sigResampled = []
# num = int(len(signals[0])/thisFS*resampleFS)
for sig in signals:
if type(sig) == np.ndarray:
num = int(sig.shape[0]/thisFS*resampleFS)
else:
num = int(len(sig)/thisFS*resampleFS)
y = resample(sig, num)
sigResampled.append(y)
return sigResampled