-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathread_mat_files.py
80 lines (60 loc) · 2.66 KB
/
read_mat_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import scipy.io as sio
from sklearn.decomposition import PCA
import numpy as np
def get_shm_multi_class_data():
mat_contents = sio.loadmat('C:/Users/tsdev/Desktop/SHM DATA/4dof_features.mat')
y_data = mat_contents['labels'][:, 0]
y_data.shape = (y_data.shape[0])
x_data = mat_contents['multi_class_feats']
shuffled_data = np.empty([x_data.shape[0], 11])
shuffled_data[:, 0:10] = x_data
shuffled_data[:, 10] = y_data
# Shuffle data because it was ordered before by class
np.random.shuffle(shuffled_data)
x_data = shuffled_data[:, 0:10]
y_data = shuffled_data[:, 10]
y_data.shape = (y_data.shape[0], 1)
y_data_one_hot = np.zeros((y_data.shape[0], 17))
for row in range(y_data.shape[0]):
label = int(y_data[row, 0])
# label - 1 for indexing reasons, for example label = 1 means that the first column (index = 0) is the one with the value one
y_data_one_hot[row, label - 1] = 1
# if normalise_x:
# # We perform these operations because for this data, the values are too high are negative causing issues during
# # optimisation otherwise
# x_data = x_data * -1
# x_data = x_data / 100
return x_data, y_data_one_hot
def get_shm_two_class_data(normalise_x=True):
mat_contents = sio.loadmat('C:/Users/tsdev/Desktop/SHM DATA/4dof_features.mat')
y_data = mat_contents['labels'][:, 1]
y_data.shape = (y_data.shape[0])
x_data = mat_contents['two_class_feats']
shuffled_data = np.empty([x_data.shape[0], 4])
shuffled_data[:, 0:3] = x_data
shuffled_data[:, 3] = y_data
# Shuffle data because it was ordered before by class
np.random.shuffle(shuffled_data)
x_data = shuffled_data[:, 0:3]
y_data = shuffled_data[:, 3]
y_data.shape = (y_data.shape[0], 1)
shuffle_check = y_data[0:400, :]
unique, counts = np.unique(shuffle_check, return_counts=True)
shuffle_check_length = len(shuffle_check)
class_1_percentage = counts[0] / shuffle_check_length * 100
class_2_percentage = counts[1] / shuffle_check_length * 100
if normalise_x:
# We perform these operations because for this data, the values are too high are negative causing issues during
# optimisation otherwise
x_data = x_data * -1
x_data = x_data / 100
if class_1_percentage < 40 or class_2_percentage < 40:
raise ValueError('Imbalanced classes due to shuffle, please re-initialise')
return x_data, y_data
def main():
x_data, y_data = get_shm_two_class_data()
assert (y_data.shape[1] == 1)
x_data_multi, y_data_multi = get_shm_multi_class_data()
print(x_data_multi.shape)
if __name__ == "__main__":
main()