-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathiris.py
150 lines (118 loc) · 5.31 KB
/
iris.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
import argparse
import numpy as np
import polycirc
from polycirc import diagram_to_function, rdiff, ir, optic, learner
from polycirc.learner import make_learner
from examples.util import load_iris, model_accuracy
# This demo solves the 3-class classification problem of the Iris dataset, which
# has 4-dimensional inputs
INPUT_SIZE = 4
OUTPUT_SIZE = 3
# The model is a single fully-connected layer, so it has 4*3 = 12
# parameters to be learned.
NPARAM = OUTPUT_SIZE * INPUT_SIZE
# We'll be training in fixed-point representation with 10 bits of precision.
NBITS = 10 # Number of fixed-point bits to use
ONE = 1 << NBITS # The number 1, encoded in fixed point representation with NBITS
################################################################################
# Update/Displacement maps
#
# Start by defining "update" and "displacement" maps.
# - For mathematical background, see this paper: https://arxiv.org/abs/2103.01931
# - For more simple examples of update/displacement maps which work for
# floating-point data, see polycirc.learner
# A gradient-descent-like update using fixed-point arithmetic.
def fixed_gd(lr: int):
def fixed_gd_inner(p: int):
# TODO: define "scale" and "constant" diagrams in polycirc.ir?
fwd = ir.copy(p)
scale_shift = ir.shrc(NBITS, p) >> ir.scale(lr, p) >> ir.shrc(NBITS, p)
rev = (ir.identity(p) @ scale_shift) >> ir.sub(p)
return optic.make_optic(fwd, rev, residual=ir.obj(p))
return fixed_gd_inner
# Custom loss which clips model output into the range [0, 1] before comparing
# with one-hot encoded labels.
def cliploss(b: int):
f = ir.clip(-ONE, ONE, b) >> ir.addc(ONE, b) >> ir.shrc(1, b)
fwd = ir.shrc(NBITS, b) >> ir.copy(b) >> (ir.identity(b) @ f)
rev = ir.sub(b)
return optic.make_optic(fwd, rev, residual=ir.obj(b))
################################################################################
# Constructing the model circuit
def build_model():
# 'model' is a circuit with m*n + m inputs and n outputs, which performs a
# matrix multiplication.
# We're learning a simple single-layer neural network with a sigmoid-like
# activation.
model_circuit = ir.mat_mul(OUTPUT_SIZE, INPUT_SIZE)
# DIFFERENTIABILITY
# =================
# Reverse-differentiate model_circuit using the optic algorithm described in
# Data-Parallel Algorithms for String Diagrams
# ( see https://arxiv.org/abs/2305.01041 )
#
f = rdiff(model_circuit) # : (4*3 + 4) + 3 → 3 + (4*3 + 4)
# make_learner turns a model into a learner using a choice of update
# (optimiser) and displacement (loss).
u = fixed_gd(ONE >> 7)(NPARAM) # learning rate of 1/2^7
d = cliploss(OUTPUT_SIZE) # mean squared error loss
step_circuit = learner.make_learner(f, u, d, NPARAM, INPUT_SIZE)
# model_circuit takes a matrix (n*m values) and a vector (m values) and
# produces another vector (n values).
# step_circuit computes the same vector *plus a parameter update*.
return model_circuit, step_circuit
################################################################################
# Training
def main():
parser = argparse.ArgumentParser(description='')
subparsers = parser.add_subparsers(dest="command", required=True, help="Available commands")
# Subparser for the 'train' command
train_parser = subparsers.add_parser("train", help="train model")
train_parser.add_argument('--iris-data', default='data/iris.csv')
# Subparser for the 'print' command
print_parser = subparsers.add_parser("print", help="print model circuit as python")
args = parser.parse_args()
match args.command:
case "train":
return train(args.iris_data)
case "print":
# Print the forward and step circuits of the model as python functions
fwd, rev = build_model()
print(polycirc.ast.diagram_to_ast(fwd, 'predict'))
print(polycirc.ast.diagram_to_ast(rev, 'step'))
def train(iris_data):
# Load data from CSV
print("loading data...")
x, y = load_iris(iris_data, scale=ONE)
# initialize params
p = np.zeros(NPARAM, dtype=int).tolist()
# compile the forward (fwd) and gradient (step) passes of the circuit into
# python functions.
predict_circuit, step_circuit = build_model()
predict = diagram_to_function(predict_circuit)
step = diagram_to_function(step_circuit)
N = len(x)
NUM_ITER = N * 60
# Iterate through data in a (deterministic) schedule which speeds up
# training a lot.
q = np.arange(N)
q[0::3] = np.arange(50)
q[1::3] = np.arange(50) + 50
q[2::3] = np.arange(50) + 100
# Do a single step of SGD-like training.
# NOTE: we call tolist on numpy values to get python ints, but this
# isn't strictly necessary.
# NOTE: 'step' produces model output, new parameters, and new data. We
# only need the new parameters.
for j in range(0, NUM_ITER):
i = q[j % N]
p = step(*p, *x[i].tolist(), *y[i].tolist())[OUTPUT_SIZE:OUTPUT_SIZE+NPARAM]
# Train accuracy.
# NOTE: we don't bother with a test or holdout set: this is just to demo
# differentiability of the IR.
print('final parameters', p)
print("predicting...")
acc = model_accuracy(predict, p, x, y)
print(f'accuracy: {100*acc}')
if __name__ == "__main__":
main()