-
Notifications
You must be signed in to change notification settings - Fork 0
/
from_scratch.py
111 lines (82 loc) · 3.07 KB
/
from_scratch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import numpy as np
import matplotlib.pyplot as plt
import random
# choose learning rate
learning_rate = 0.001
# create fake data
x = np.arange(-1, 1, 0.01)
y = x ** 3
# initialize weights & biases
w1 = random.normalvariate(0, 1)
w2 = random.normalvariate(0, 1)
w3 = random.normalvariate(0, 1)
w4 = random.normalvariate(0, 1)
w5 = random.normalvariate(0, 1)
w6 = random.normalvariate(0, 1)
b1 = 0
b2 = 0
b3 = 0
b4 = 0
# choose an activation function
activation_func = lambda x: np.log(1 + np.e ** x)
# pre-node function
f1 = lambda x: w1 * x + b1
f2 = lambda x: w2 * x + b2
f3 = lambda x: w3 * x + b3
# first run through network
y_pred = activation_func(f1(x)) * w4 + activation_func(f2(x)) * w5 + activation_func(f3(x)) * w6 + b4
epochs = 50000
loss_history = []
for epoch in range(epochs):
# simple loss function (sum of squared residuals)
def loss_func(y, y_pred):
return sum((y - y_pred) ** 2)
# save loss
loss_history.append(loss_func(y, y_pred))
# gradient descent (derivative of sum of squared residuals)
dSSR_w1 = (-2 * (y - y_pred) * w4 * x * np.e ** f1(x) / (1 + np.e ** f1(x))).sum()
dSSR_w2 = (-2 * (y - y_pred) * w5 * x * np.e ** f2(x) / (1 + np.e ** f2(x))).sum()
dSSR_w3 = (-2 * (y - y_pred) * w6 * x * np.e ** f3(x) / (1 + np.e ** f3(x))).sum()
dSSR_w4 = (-2 * (y - y_pred) * activation_func(f1(x))).sum()
dSSR_w5 = (-2 * (y - y_pred) * activation_func(f2(x))).sum()
dSSR_w6 = (-2 * (y - y_pred) * activation_func(f3(x))).sum()
dSSR_b1 = (-2 * (y - y_pred) * w4 * np.e ** f1(x) / (1 + np.e ** f1(x))).sum()
dSSR_b2 = (-2 * (y - y_pred) * w5 * np.e ** f2(x) / (1 + np.e ** f2(x))).sum()
dSSR_b3 = (-2 * (y - y_pred) * w6 * np.e ** f3(x) / (1 + np.e ** f3(x))).sum()
dSSR_b4 = (-2 * (y - y_pred)).sum()
# step sizes
step_size_dSSR_w1 = dSSR_w1 * learning_rate
step_size_dSSR_w2 = dSSR_w2 * learning_rate
step_size_dSSR_w3 = dSSR_w3 * learning_rate
step_size_dSSR_w4 = dSSR_w4 * learning_rate
step_size_dSSR_w5 = dSSR_w5 * learning_rate
step_size_dSSR_w6 = dSSR_w6 * learning_rate
step_size_dSSR_b1 = dSSR_b1 * learning_rate
step_size_dSSR_b2 = dSSR_b2 * learning_rate
step_size_dSSR_b3 = dSSR_b3 * learning_rate
step_size_dSSR_b4 = dSSR_b4 * learning_rate
# updating weights & biases
w1 = w1 - step_size_dSSR_w1
w2 = w2 - step_size_dSSR_w2
w3 = w3 - step_size_dSSR_w3
w4 = w4 - step_size_dSSR_w4
w5 = w5 - step_size_dSSR_w5
w6 = w6 - step_size_dSSR_w6
b1 = b1 - step_size_dSSR_b1
b2 = b2 - step_size_dSSR_b2
b3 = b3 - step_size_dSSR_b3
b4 = b4 - step_size_dSSR_b4
# run values through network
y_pred = activation_func(f1(x)) * w4 + activation_func(f2(x)) * w5 + activation_func(f3(x)) * w6 + b4
# plot result
plt.figure(figsize=(10, 10))
plt.plot(x, y, label='Data')
plt.plot(x, y_pred, '--', label='Neural Network')
plt.legend(frameon=False)
plt.savefig('scratch_output.jpg')
# plot loss
plt.figure(figsize=(10, 10))
plt.plot(range(epochs), loss_history)
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.savefig('scratch_loss.jpg')