-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathcpu_forwarded_main.cpp
271 lines (198 loc) · 8.09 KB
/
cpu_forwarded_main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
#include <iostream>
#include "verilator_common.h"
#include "verilated.h"
#include "Vcpu_forwarded.h"
#define MEM cpu_forwarded__DOT__memory_stage__DOT__main_mem__DOT__mem
using namespace std;
void load_neq_program(const int mem_depth, Vcpu_forwarded* const top) {
// Set all memory to be no-ops
for (int i = 0; i < mem_depth; i++) {
uint32_t no_op = tiny_CPU_no_op();
top->MEM[i] = no_op;
}
// res 1101
top->MEM[0] = tiny_CPU_load_immediate(100, 12);
top->MEM[1] = tiny_CPU_load_immediate(4, 13);
top->MEM[2] = tiny_CPU_binop(TINY_CPU_NEQ, 12, 13, 14);
top->MEM[3] = tiny_CPU_load_immediate(58, 20);
top->MEM[4] = tiny_CPU_store(14, 20);
}
void test_neq_alu(const int argc, char** argv) {
Vcpu_forwarded* top = new Vcpu_forwarded();
load_neq_program(2048, top);
cout << "Testing neq" << endl;
RESET(top);
int n_cycles = 40;
for (int i = 0; i < n_cycles; i++) {
HIGH_CLOCK(top);
cout << "At " << i << " instruction type is = " << (int) top->current_instruction_type_dbg << ", PC = " << (int) top->PC_value << endl;
}
cout << "top->MEM[58] = " << ((int)top->MEM[58]) << endl;
assert(top->MEM[58] == 1);
top->final();
}
void load_multiload_store_program(const int mem_depth, Vcpu_forwarded* const top) {
// Set all memory to be no-ops
for (int i = 0; i < mem_depth; i++) {
uint32_t no_op = tiny_CPU_no_op();
top->MEM[i] = no_op;
}
top->MEM[0] = tiny_CPU_load_immediate(0, 0);
top->MEM[1] = tiny_CPU_load_immediate(1000, 1);
top->MEM[2] = tiny_CPU_load_immediate(0, 26); // reg26 <- 0, loop count
top->MEM[3] = tiny_CPU_load_immediate(100, 25); // reg25 <- 100, loop bound
top->MEM[4] = tiny_CPU_store(0, 1); // mem[1000] = 0
top->MEM[5] = tiny_CPU_load(1, 2); // reg2 <- mem[1000]
}
void load_loop_program(const int mem_depth, Vcpu_forwarded* const top) {
// Set all memory to be no-ops
for (int i = 0; i < mem_depth; i++) {
uint32_t no_op = tiny_CPU_no_op();
top->MEM[i] = no_op;
}
top->MEM[0] = tiny_CPU_load_immediate(0, 0);
top->MEM[1] = tiny_CPU_load_immediate(1000, 1);
top->MEM[2] = tiny_CPU_load_immediate(0, 26); // reg26 <- 0, loop count
top->MEM[3] = tiny_CPU_load_immediate(3, 25); // reg25 <- 4, loop bound
top->MEM[4] = tiny_CPU_store(0, 1); // mem[1000] = 0
// Enter loop
top->MEM[5] = tiny_CPU_load(1, 2); // reg2 <- mem[1000]
top->MEM[6] = tiny_CPU_load_immediate(1, 3); // reg3 <- 1
top->MEM[7] = tiny_CPU_binop(TINY_CPU_ADD, 2, 3, 2); // reg2 <- reg2 + 1
top->MEM[8] = tiny_CPU_store(2, 1); // mem[1000] <= reg2
top->MEM[9] = tiny_CPU_binop(TINY_CPU_ADD, 26, 3, 26);
top->MEM[10] = tiny_CPU_binop(TINY_CPU_NEQ, 25, 26, 27);
top->MEM[11] = tiny_CPU_load_immediate(5, 9); // reg9 <- 5
top->MEM[12] = tiny_CPU_jump(27, 9); // if loop count != loop bound jump to 5
//top->MEM[13] = tiny_CPU_store(0, 1); // Just to check if jumps are being executed
}
void test_increment_loop(const int argc, char** argv) {
Vcpu_forwarded* top = new Vcpu_forwarded();
load_loop_program(2048, top);
cout << "Testing increment loop" << endl;
RESET(top);
// Cycles needed to get to MEM[1000] = K
// Startup cycles + (N_STAGES*loop_length*K)
int K = 3;
// int N_STAGES = 5;
// int startup_instructions = 5;
// int loop_length = 8; // TODO: Set correctly
int n_cycles = 80; //N_STAGES*(startup_instructions + loop_length*K);
for (int i = 0; i < n_cycles; i++) {
HIGH_CLOCK(top);
cout << "At " << i << " instruction type is = " << (int) top->current_instruction_type_dbg << ", PC = " << (int) top->PC_value << endl;
cout << "top->MEM[1000] = " << ((int)top->MEM[1000]) << endl;
cout << "---------------------------------------------------------------" << endl;
if (i > 0) {
assert(top->PC_value > 0);
}
}
cout << "top->MEM[1000] = " << ((int)top->MEM[1000]) << endl;
assert(top->MEM[1000] == K);
top->final();
}
void load_load_store_program(const int mem_depth, Vcpu_forwarded* const top) {
// Set all memory to be no-ops
for (int i = 0; i < mem_depth; i++) {
uint32_t no_op = tiny_CPU_no_op();
top->MEM[i] = no_op;
}
top->MEM[0] = tiny_CPU_load_immediate(5, 0);
top->MEM[1] = tiny_CPU_load_immediate(1000, 1);
top->MEM[2] = tiny_CPU_store(0, 1); // mem[1000] = 5
}
void test_load_store_program(const int argc, char** argv) {
cout << "Testing load immediate then storing it back" << endl;
Vcpu_forwarded* top = new Vcpu_forwarded();
load_load_store_program(2048, top);
RESET(top);
HIGH_CLOCK(top);
// First instruction is load_immediate
cout << "Current instruction type = " << (int) top->current_instruction_type_dbg << endl;
assert(top->current_instruction_type_dbg == TINY_CPU_INSTRUCTION_LOAD_IMMEDIATE);
HIGH_CLOCK(top);
int n_cycles = 10;
for (int i = 0; i < n_cycles; i++) {
HIGH_CLOCK(top);
cout << "At " << i << " instruction type is = " << (int) top->current_instruction_type_dbg << ", PC = " << (int) top->PC_value << endl;
cout << "---------------------------------------------------------------" << endl;
}
cout << "top->MEM[1000] = " << ((int)top->MEM[1000]) << endl;
assert(top->MEM[1000] == 5);
top->final();
}
void test_multiload_store_program(const int argc, char** argv) {
cout << "Testing multiple loads then store back, then load" << endl;
Vcpu_forwarded* top = new Vcpu_forwarded();
load_multiload_store_program(2048, top);
RESET(top);
HIGH_CLOCK(top);
// First instruction is load_immediate
cout << "Current instruction type = " << (int) top->current_instruction_type_dbg << endl;
assert(top->current_instruction_type_dbg == TINY_CPU_INSTRUCTION_LOAD_IMMEDIATE);
HIGH_CLOCK(top);
int n_cycles = 10;
// Q: How many cycles are needed to increment 2 times?
for (int i = 0; i < n_cycles; i++) {
HIGH_CLOCK(top);
cout << "At " << i << " instruction type is = " << (int) top->current_instruction_type_dbg << ", PC = " << (int) top->PC_value << endl;
cout << "---------------------------------------------------------------" << endl;
}
cout << "top->MEM[1000] = " << ((int)top->MEM[1000]) << endl;
assert(top->MEM[1000] == 0);
top->final();
}
void load_forwarded_arith_program(const int mem_depth, Vcpu_forwarded* const top) {
// Set all memory to be no-ops
for (int i = 0; i < mem_depth; i++) {
uint32_t no_op = tiny_CPU_no_op();
top->MEM[i] = no_op;
}
top->MEM[0] = tiny_CPU_load_immediate(8, 0); // r0 <- 8
top->MEM[1] = tiny_CPU_load_immediate(1000, 1); // r1 <- 1000
top->MEM[2] = tiny_CPU_binop(TINY_CPU_ADD, 0, 1, 2); // r2 <- r0 + r1
top->MEM[3] = tiny_CPU_binop(TINY_CPU_MUL, 1, 2, 3); // r3 <- r1 + r2
top->MEM[4] = tiny_CPU_load_immediate(123, 1); // r1 <- 123
top->MEM[5] = tiny_CPU_store(0, 1); // mem[123] = 5
}
// Instr pairs:
// binop binop
// loadimm binop
// binop loadimm
// store binop
// store loadimm
// store jump
// Note: If loading an immediate there is no RAW dependence
// between load_imm and arithmetic because the load will be resolved when
// decoding the load_imm
// Q: Is there a way to have processor designs expressed as loads and stores,
// in a sequential programming model and then have the design compiled down to
// circuits that preserve the sequential programming model?
void test_forwarded_arith(const int argc, char** argv) {
cout << "Testing arithmetic result forwarding" << endl;
Vcpu_forwarded* top = new Vcpu_forwarded();
load_forwarded_arith_program(2048, top);
RESET(top);
HIGH_CLOCK(top);
HIGH_CLOCK(top);
HIGH_CLOCK(top);
HIGH_CLOCK(top);
HIGH_CLOCK(top);
HIGH_CLOCK(top);
HIGH_CLOCK(top);
HIGH_CLOCK(top);
HIGH_CLOCK(top);
HIGH_CLOCK(top);
cout << "top->MEM[123] = " << ((int)top->MEM[1000]) << endl;
assert(top->MEM[123] == ((8 + 1000) + 1000));
top->final();
}
// What is the next step? I guess doing branch prediction would be nice?
int main(const int argc, char** argv) {
test_forwarded_arith(argc, argv);
test_multiload_store_program(argc, argv);
test_load_store_program(argc, argv);
test_neq_alu(argc, argv);
test_increment_loop(argc, argv);
cout << "$$$$ CPU Forwarded tests passed" << endl;
}