-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathEngineeringMath_week14.py
135 lines (105 loc) · 2.77 KB
/
EngineeringMath_week14.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import numpy as np
def f(w, x, y):
return (w*x - y)** 2
def gradent(w, x, y):
return 2*(w*x-y) * x
def unconstrained_optimization(w, x, y, lr=1e-3):
fx = f(w, x, y)
while True:
# print("w = %f, fx = %f" % (w, fx))
w_new = w - lr * gradent(w, x, y)
fx_new = f(w_new, x, y)
print("f(%f) = %f, f(%f) = %f" % (w, fx, w_new, fx_new))
if fx - fx_new < 1e-9:
break
w = w_new
fx = fx_new
def task_1():
'''
min (w*x - y)**2
x = 1.5, y = 0.5
w0 = 0.8
gradient = 2*(w*x-y) * x
= 4.5*w - 1.5
iteration: w_k+1 = w_k - alpha_k * gradient
'''
w0, x, y = 0.8, 1.5, 0.5
unconstrained_optimization(w0, x, y)
#############################################################
def affine_forward(x, w):
'''
Inputs:
- x: Input data, (n: 4, d: 3)
- w: Weights, (D: 3, M)
Return:
- out: (n: 4, M)
- cache(use in backward): Tuple of: x, w
'''
out = x.dot(w)
cache = (x, w)
return out, cache
def affine_backward(dout, cache):
"""
Inputs:
- dout: Upstream derivative, of shape (N, M)
- cache: Tuple of:
- x: Input data, of shape (N: 4, d: 3)
Return:
- dx: Gradient with respect to x, of shape (N, D)
- dw: Gradient with respect to w, of shape (D, M)
"""
x, w = cache
dx = dout.dot(w.T)
dw = x.T.dot(dout)
return dx, dw
def loss_func(x, y):
loss = np.sum((x - y) ** 2)
dloss = 2 * (x - y)
return loss, dloss
class TwoLayerNet(object):
def __init__(self, input_dim=3, hidden_dim=2, output_dim=1, weight_scale=1e-3):
self.params = {}
self.params['W1'] = weight_scale * np.random.randn(input_dim, hidden_dim)
self.params['W2'] = weight_scale * np.random.randn(hidden_dim+1, output_dim)
def loss(self, X, y=None):
W1, W2 = self.params['W1'], self.params['W2']
X = np.insert(X, 0, values=1, axis=1) ## rua
out1, cache1 = affine_forward(X, W1)
out1 = np.insert(out1, 0, values=1, axis=1) # rua
out2, cache2 = affine_forward(out1, W2)
output = out2
if y is None: # test time: just forward
return output
# train time: forward & backward
loss, dloss = loss_func(output, y)
grads = {}
dout2, grads['W2'] = affine_backward(dloss, cache2)
dout2 = dout2[:, 1:]
dout1, grads['W1'] = affine_backward(dout2, cache1)
return loss, grads
def output_params(self):
print('w1:\n', self.params['W1'])
print('w2:\n', self.params['W2'])
def task_2(num_iterations=2000, learning_rate=1e-3):
X = np.array([
[0, 0],
[0, 1],
[1, 0],
[1, 1],
])
y = np.array([[0], [1], [1], [0]])
model = TwoLayerNet()
for t in range(num_iterations):
loss, grads = model.loss(X, y)
for p, w in model.params.items():
dw = grads[p]
w -= learning_rate * dw # sgd
if (t % (num_iterations // 30) == 0):
print('loss = %f' % (loss))
# model.output_params()
print(model.loss(X))
def main():
# task_1()
task_2()
if __name__ == '__main__':
main()