-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexamples.py
243 lines (200 loc) · 8.05 KB
/
examples.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
'''
Author: David Kaplan
Advisor: Stephen Penny
'''
from keras.models import Sequential
from keras.layers import Dense, Dropout
import keras
import numpy as np
import matplotlib.pyplot as plt
import logging
import copy
import math
from residnet.data_processing import wrappers, transforms, metrics
from residnet.comparison_methods import classification, interpolation
from residnet import constants, visualization, util, datasets
logging.basicConfig(format = constants.LOGGING_FORMAT, level = logging.INFO)
def ex1():
'''Load raw HYCOM data and parse from scratch. Save.
Here, we are only going to parase 2 days worth of data from 2005.
'''
prep_data = wrappers.DataPreprocessing(
name = 'sample',
years = ['2005'],
denorm_local = False,
num_days = 2)
prep_data.parse_data()
prep_data.save()
def ex2():
'''
Load preprocessed data and create a training datawrapper.
'''
# Load the preprocessing data from `ex1`
d = wrappers.DataPreprocessing.load('output/datapreprocessing/sample_denormLocalFalse_res6')
# Create an index dictionary. This should be of the form:
# key -> float, where sum(values) <= 1.0. The values represet what
# proportion of data to put into the 'key'th set
idxs = {'training': 0.9, 'validation': 0.1}
# 'split' means that we are splitting the data based on a probability,
# split_dict indicates the proportions to split them to, and randomize
# being true means that we are going to shuffle the indices beforehand.
idxs = d.split_data_idxs('split', randomize = True, split_dict = idxs)
# Make the training data based on the indices calcualted in the previous step
# Only include the corners of 'temp' in the input and the truth for temp
# as the output
training = d.make_array(idxs = idxs['training'], input_keys = ['temp'],
output_key = 'temp')
# Normalize both the input and output data
training.normalize()
# Save the DataWrapper.
training.save(filename = 'output/datawrapper/sample_training_data.pkl')
def ex3():
'''Get the bilinear interpolation error on the training data.
'''
# Load the training data we produced in `ex2`
training = wrappers.DataWrapper.load('output/datawrapper/sample_training_data.pkl')
# denormalize the data so we can do non-ML interpolation with it
training.denormalize()
# Since the object is self consistent, if we denormalize again, nothing will
# happend (because it is already denormalized). It is designed to fail gracefully.
training.denormalize()
# Produce the interpolation error using bilinear for each subgrid
# Do a deep copy of the data before sending it in so that there is no
# corruption.
bilin_error = transforms.InterpolationErrorRegression(
src = copy.deepcopy(training),
func = interpolation.bilinear,
cost = metrics.Error,
output_size = src.res ** 2)
# The output was set as `y_true` of `bilin_error`
# Get the mean RMSE, RMSE std, and bias
RMSE = np.apply_along_axis(
transforms.RMSE,
axis = 1,
arr = bilin_error.y_true)
print('Bilinear interpolation results:')
print('\tbias: {}'.format(np.sum(bilin_error.y_true)))
print('\tmean RMSE: {}'.format(np.mean(RMSE)))
print('\tstd RMSE: {}'.format(np.std(RMSE)))
def ex4():
'''Produce a 2 element, one-hot array of subgrids that are greater than
or less than the bilinear interpolation error of the subgrid.
'''
# Load the training data we produced in `ex2` and denormalize
training = wrappers.DataWrapper.load('output/datawrapper/sample_training_data.pkl')
training.denormalize()
# This function encapsulates the entire process
# threshold tells the cutoff between the classes. If the error is greater than the
# threshold (0.1), then it is one category. If it is less than the threshold then it
# is a different category.
out = InterpolationErrorClassification(
src = training,
func = interpolation.bilinear,
cost = RMSE,
threshold = 0.1)
# `out` is now a `wrapper.DataWrapper` object where the input is the same as
# `training` and the output is a one-hot array based on the bilinear interpolation
# error. We can feed this object to a classification method for trainig, etc.
def ex5():
'''This examples produces a picture of the first day. On the left it is
going to be the base temperature data and on the right it is going to be
the RMSE error of a bilinear interpolation.
'''
training = wrappers.DataWrapper.load('output/datawrapper/sample_training_data.pkl')
training.denormalize()
fig = plt.figure()
ax1 = fig.add_subplot(1,2,1)
# Produce the 2D map of the first day
basemap = transforms.mapify(
loc_to_idx = training.loc_to_idx,
arr = training.y_true,
year = 2005,
day = 1,
res = training.res,
classification = False)
ax1 = visualization.map(
ax = ax1,
arr = basemap,
title = 'Original data')
# Make the bilinear error data
bilin_error = transforms.InterpolationErrorRegression(
src = copy.deepcopy(training),
func = interpolation.bilinear,
cost = metrics.Error,
output_size = src.res ** 2)
# Produce the 2D map of the first day
basemap = transforms.mapify(
loc_to_idx = training.loc_to_idx,
arr = training.y_true,
year = 2005,
day = 1,
res = training.res,
classification = False)
ax2 = visualization.map(
ax = fig.add_subplot(1,2,2),
arr = basemap,
title = 'Bilinear error')
plt.show()
def ex6():
# Make the (training, validation, testing)
training_data = wrappers.DataPreprocessing.load('output/datapreprocessing/training_data_denormLocal{}_res6/'.format(
denorm_local))
training_data.normalize()
# Split the training_data into both training and validation sets
idxs = training_data.split_data_idxs(
division_format = 'split',
split_dict = {'training': 0.85, 'validation': 0.15},
randomize = True)
train_src = training_data.make_array(output_key = 'temp', idxs = idxs['training'])
val_src = training_data.make_array(output_key = 'temp', idxs = idxs['validation'])
# Make the testing data
testing_data = wrappers.DataPreprocessing.load('output/datapreprocessing/testing_data_denormLocal{}_res6/'.format(
denorm_local))
testing_data.normalize()
test_src = testing_data.make_array(output_key = 'temp')
# Make, train, and save the training performance.
nn = neural_network.experiment_interpolation_network('nn_lores')
nn.fit(
training_data = train_src,
num_epochs = 1,
validation_data = val_src)
# fig = nn.visualize_training()
# figname = 'nn_lores_training.png'
# fig.savefig(figname)
# nn.save(filename = nn_model_save_loc)
y_pred = nn.predict(in_data = test_src.X)
y_pred = transforms.Denormalize_arr(
arr = y_pred, avg = test_src.norm_data[:,0],
norm = test_src.norm_data[:,1])
def debug_bicubic():
src = wrappers.DataPreprocessing(
name = 'sample',
years = ['2005'],
denorm_local = False,
num_days = 1)
src.parse_data()
src = src.make_array(
input_keys = ['temp'], output_key = 'temp')
print('truth\n',src.y_true[0])
src = transforms.makeBicubicArrays(copy.deepcopy(src))
bicubic = transforms.InterpolationErrorRegression(
src = src,
func = interpolation.bicubic,
cost = metrics.Error,
output_size = src.res ** 2)
res_in = 6
corner_idxs = util.gen_corner_idxs(res_in)
print('bicubic error\n', bicubic.y_true[0].reshape(6,6))
print('\nin bicubic\n', bicubic.X[0])
print('\nbicubic output {}\n'.format(
transforms.interpolate_grid(
input_grid = bicubic.X[0],
res = bicubic.res,
interp_func = interpolation.bicubic).reshape(6,6)))
print('\nnorm data\n', bicubic.norm_data[0])
print('\nbicubic error\n', metrics.RMSE(bicubic.y_true[0]))
a = np.apply_along_axis(
metrics.RMSE,
axis = 1,
arr = bicubic.y_true)
print('Mean RMSE bicubic: {}'.format(np.mean(a)))