-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdata_setup.py
132 lines (100 loc) · 4.45 KB
/
data_setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import numpy as np
import pickle
import os
"""
Includes functions to load the RML2016 dataset and load it as numpt arrays and other data manipulation.
"""
# A function which loads the RML2016 data as bytes
def load_data(path:str) -> dict:
"""
A function to load the RML2016 dataset as bytes
Args:
path : is a string where the file is located
Returns:
the dataset as a dictionary
"""
with open(path , 'rb') as f:
data = pickle.load(f, encoding= "bytes")
return data
# Function to get the modulation names from the RML2016 dataset
def modulation_name_list(data : dict) -> list:
"""
A funtion which recieves a dict and retunrs the the key namse as list of bytes!
Args:
data : its a dict, which is loaded using pickle.load
Returns:
A list of modulation names which are in byte not str
"""
list_of_modulation = []
for key in data.keys():
list_of_modulation.append(key[0]) # append the first element because the keys are in (mod , snr) format
return list(set(list_of_modulation))
# Function to filter the data on desired modulations
def filter_on_modulation(data : dict , keys_to_exclude : list = None, keys_to_inculde : list = None) -> dict:
"""
A function to exclude the modulation from the main RML data
Args:
data : which a dict
keys_to_exclude : its a list of bytes, becasue the keys from the original data is loaded as bytes
Returns:
return another dict without the modulations specified
"""
if keys_to_inculde:
filtered_data = {key: value for key , value in data.items() if key[0] in keys_to_inculde}
else :
filtered_data = {key: value for key , value in data.items() if key[0] not in keys_to_exclude}
return filtered_data
def filter_on_snr(data:dict , snr_range:tuple):
filtered_data = {key: value for key , value in data.items() if key[1] in range(snr_range[0],snr_range[1])}
return filtered_data
def groupe_labels(labels : list, keys: list, target_label):
labels_copy = np.copy(labels)
for index in range(len(labels_copy)):
if labels_copy[index,0] in keys:
labels_copy[index,0] = target_label
return labels_copy
# A function to create splits of train and test split for RML2016 dataset
def create_train_test_splits(data : dict , ratio : int) -> np.array:
"""
This function recieves the RML dataset and splits each modulation with respect to raito into
train and test numpy arrays
Args:
data : which is RML2016
ratio : is an int which uses to split each modulation
Returns:
4 splits of np.array of train , test and their respective targets, ecah target is (mod_type , snr)
"""
data_copy = data.copy()
train_data = []
train_labels = []
test_data = []
test_labels = []
train_test_ratio = ratio
for items in data_copy:
mod , snr = items
num_train_samples = int(data_copy[items].shape[0]* (1 - train_test_ratio))
num_test_samples = int(data_copy[items].shape[0] - num_train_samples )
train_data.append(data_copy[items][:num_train_samples])
train_labels.append([[mod, snr]] * num_train_samples)
test_data.append(data_copy[items][num_train_samples:])
test_labels.append([[mod, snr]] * num_test_samples)
train_data_np = np.array(train_data).reshape((-1,2,128))
test_data_np = np.array(test_data).reshape((-1,2,128))
train_labels_np = np.array(train_labels).reshape((-1,2))
test_labels_np = np.array(test_labels).reshape((-1,2))
return train_data_np , train_labels_np , test_data_np , test_labels_np
# Function to load data as numpy arrays with their labels
def load_digital_mods(data: dict, ratio : int) -> np.array:
"""
This function first filter outs the RML2016 data on analogue modulation
then returns splits of train and test data with their labels as numpy arrays.
Args:
data : which is the loaded RML2016
ratio : the ratio which to split train and test data
Returns:
train and test data and their lables and the label to one hot converter
"""
filtered_data = filter_on_modulation(data = data , keys_to_exclude = [b'AM-SSB', b'WBFM', b'AM-DSB'])
filterd_mods = modulation_name_list(filtered_data)
train_data , train_label , test_data , test_label = create_train_test_splits(filtered_data , ratio)
return train_data , train_label , test_data , test_label, filterd_mods