-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathengine.py
94 lines (77 loc) · 3 KB
/
engine.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import math
class Val:
"""val will store a scalar value(data) and its gradient"""
def __init__(self, data, _children=()):
self.data = data
self.grad = 0
self._prev = set(_children) # used for graph construction (to do topological sort etc)
self._backward = lambda:None # write backward for only operations that are used in ANNs
def __repr__(self):
return f"Val(data={self.data}, grad={self.grad})"
def __add__(self, other):
other = other if isinstance(other, Val) else Val(other)
out = Val(self.data + other.data)
def _backward():
self.grad += 1.0 * out.grad # ∂L/∂x = (∂L/∂out) * (∂out/∂x) [out.grad is ∂L/∂out (upstream gradient), 1.0 is ∂out/∂x (local gradient)]
other.grad += 1.0 * out.grad
out._backward = _backward
return out
def __mul__(self, other):
other = other if isinstance(other, Val) else Val(other)
out = Val(self.data * other.data)
def _backward():
self.grad += other.data * out.grad
other.grad += self.data * out.grad
out._backward = _backward
return out
def relu(self):
out = Val(0 if self.data < 0 else self.data, (self,))
def _backward():
self.grad += (out.data > 0) * out.grad
out._backward = _backward
return out
def exp(self):
out = Val(math.exp(self.data), (self,))
def _backward():
self.grad += out.data * out.grad
out._backward = _backward
return out
def __pow__(self, other):
assert isinstance(other, (int, float))
out = Val(self.data**other, (self,))
def _backward():
self.grad += (other * self.data**(other-1)) * out.grad
out._backward = _backward
return out
def tanh(self):
out = Val(math.tanh(self.data), (self,))
def _backward():
self.grad += (1 - out.data**2) * out.grad
out._backward = _backward
return out
def __truediv__(self, other):
out = Val(self * other**-1)
return out
def __rmul__(self, other):
other = other if isinstance(other, Val) else Val(other)
out = Val(other.data *self.data)
return out
def __radd__(self, other):
other = other if isinstance(other, Val) else Val(other)
out = Val(other.data + self.data)
return out
def backward(self):
# write topological sort (it sorts the graph(we constructed the graph using _prev) left to right)
topo_order = []
visited = set()
def build_order(v):
if v not in visited:
visited.add(v)
for child in v._prev:
build_order(child)
topo_order.append(v)
build_order(self)
# calculating grad one by one and applying the chain rule to get their gradient
self.grad = 1
for v in reversed(topo_order):
v._backward()