-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcachesim.cpp
331 lines (310 loc) · 10.6 KB
/
cachesim.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
#include <cassert>
#include <tuple>
#include "cachesim.hpp"
bool vipt;
// L1 cache
L1* l1;
// TLB
TLB* tlb;
// HWIVPT
HWIVPT* pt;
// global access to config (used for stat calculation)
sim_config_t* global_config;
void pipt_access(char rw, uint64_t addr, sim_stats_t* stats);
void vipt_access(char rw, uint64_t addr, sim_stats_t* stats);
void tag_compare(char rw,
uint64_t idx,
uint64_t tag,
Set* currentSet,
sim_stats_t* stats,
bool vipt);
std::tuple<uint64_t, uint64_t> get_vpn_and_page_offset(uint64_t addr);
void flush_l1(sim_stats_t* stats);
void deallocate_resources();
/**
* The use of virtually indexed physically tagged caches limits
* the total number of sets you can have.
* If the user selected configuration is invalid for VIPT
* Update config->s to reflect the minimum value for S (log2 number of
* ways) If the user selected configuration is valid do not modify it.
*/
void legalize_s(sim_config_t* config) {
if (config->s < config->c - config->p) {
config->s = config->c - config->p;
}
}
/**
* Subroutine for initializing the cache simulator. You many add and initialize
* any global or heap variables as needed.
*/
void sim_setup(sim_config_t* config) {
global_config = config;
vipt = config->vipt;
if (vipt) {
tlb = new TLB(config->t);
pt = new HWIVPT(config->m);
l1 = new L1(config->c, config->b, config->s);
} else {
l1 = new L1(config->c, config->b, config->s);
}
}
/**
* Subroutine that simulates the cache one trace event at a time.
*/
void sim_access(char rw, uint64_t addr, sim_stats_t* stats) {
if (vipt) {
vipt_access(rw, addr, stats);
} else {
pipt_access(rw, addr, stats);
}
}
/**
* Subroutine for cleaning up any outstanding memory operations and calculating
* overall statistics such as miss rate or average access time.
*/
void sim_finish(sim_stats_t* stats) {
deallocate_resources();
stats->hit_ratio_l1 = (double)stats->hits_l1 / (double)stats->accesses_l1;
stats->miss_ratio_l1 = 1.0 - stats->hit_ratio_l1;
double S = global_config->s * 0.2;
if (vipt) {
stats->hit_ratio_tlb =
(double)stats->hits_tlb / (double)stats->accesses_tlb;
stats->miss_ratio_tlb = 1.0 - stats->hit_ratio_tlb;
stats->hit_ratio_hw_ivpt =
(double)stats->hits_hw_ivpt / (double)stats->accesses_hw_ivpt;
stats->miss_ratio_hw_ivpt = 1.0 - stats->hit_ratio_hw_ivpt;
double hwivpt_penalty =
(1.0 + HW_IVPT_ACCESS_TIME_PER_M * global_config->m) *
DRAM_ACCESS_PENALTY;
double tag_compare_time = L1_TAG_COMPARE_TIME_CONST + S;
double hit_time =
L1_ARRAY_LOOKUP_TIME_CONST + stats->hit_ratio_tlb * tag_compare_time +
stats->miss_ratio_tlb *
(hwivpt_penalty + tag_compare_time * stats->hit_ratio_hw_ivpt);
stats->avg_access_time =
hit_time + (stats->miss_ratio_l1 * DRAM_ACCESS_PENALTY);
} else {
double hit_time_l1 =
L1_ARRAY_LOOKUP_TIME_CONST + L1_TAG_COMPARE_TIME_CONST + S;
stats->avg_access_time =
hit_time_l1 + (stats->miss_ratio_l1 * DRAM_ACCESS_PENALTY);
}
}
void vipt_access(char rw, uint64_t addr, sim_stats_t* stats) {
stats->accesses_tlb++;
auto [vpn, pageOffset] = get_vpn_and_page_offset(addr);
// array lookup
uint64_t idx = l1->getIdx(addr);
auto currentSet = l1->sets[idx];
// set buffer was populated
stats->array_lookups_l1++;
TLBEntry* tlbEntry;
// tlb access
for (uint64_t i = 0; i < tlb->nEntries; i++) {
tlbEntry = tlb->entries[i];
if (tlbEntry->valid && tlbEntry->vpn == vpn) {
// valid and tag matched
stats->hits_tlb++;
// entry was just accessed, becomes the MRU
tlb->lru->setMRU(0, i);
// the i-th entry should be now the MRU
assert(i == tlb->lru->getMRUNode(0)->indexOfBlockOrEntry);
auto ppn = tlbEntry->ppn;
// tlb hit, proceed with l1 tag compare
stats->tag_compares_l1++;
stats->accesses_l1++;
tag_compare(rw, idx, ppn, currentSet, stats, true);
// return (optimization) as no more work is needed
return;
}
}
// tlb miss
stats->misses_tlb++;
// look for translation in hwivpt
stats->accesses_hw_ivpt++;
HWIVPTEntry* pte;
// hwivpt access
for (uint64_t i = 0; i < pt->nEntries; i++) {
pte = pt->entries[i];
if (pte->valid && pte->vpn == vpn) {
// valid and tag matched
stats->hits_hw_ivpt++;
// entry was just accessed, becomes the MRU
pt->lru->setMRU(0, i);
// the i-th entry should be now the MRU
assert(i == pt->lru->getMRUNode(0)->indexOfBlockOrEntry);
// replace LRU entry in the TLB, becomes the MRU
auto mruNode = tlb->lru->convertLRUToMRU(0);
// this tlb entry should be the MRU
assert(mruNode->indexOfBlockOrEntry ==
tlb->lru->getMRUNode(0)->indexOfBlockOrEntry);
auto mruEntry = tlb->entries[mruNode->indexOfBlockOrEntry];
// mark it as valid (in case it was a compulsory miss)
mruEntry->valid = 1;
// install the translation from the HWIVPT
mruEntry->vpn = pte->vpn;
mruEntry->ppn = pte->ppn;
// proceed to access the cache
stats->accesses_l1++;
tag_compare(rw, idx, mruEntry->ppn, currentSet, stats, true);
// return (optimization) as no more work is needed
return;
}
}
// hwivpt miss
stats->misses_hw_ivpt++;
// flush l1 cache & reset its LRU status
flush_l1(stats);
// replace LRU entry in HWIVPT with new VPN (PPN stays as is)
// LRU entry becomes the MRU
auto pteIndex = pt->lru->convertLRUToMRU(0)->indexOfBlockOrEntry;
auto newPte = pt->entries[pteIndex];
// mark it as valid (in case it was a compulsory miss)
newPte->valid = 1;
// install translation
newPte->vpn = vpn;
// replace LRU entry in the TLB with new mapping
auto tlbeIndex = tlb->lru->convertLRUToMRU(0)->indexOfBlockOrEntry;
auto tlbe = tlb->entries[tlbeIndex];
// mark it as valid (in case it was a compulsory miss)
tlbe->valid = 1;
// install the translation from the HWIVPT
tlbe->vpn = vpn;
tlbe->ppn = newPte->ppn;
// proceed to access the cache
stats->accesses_l1++;
tag_compare(rw, idx, newPte->ppn, currentSet, stats, true);
}
std::tuple<uint64_t, uint64_t> get_vpn_and_page_offset(uint64_t addr) {
uint64_t pageOffset = addr & (0xffffffffffffffff >> (64 - global_config->p));
uint64_t vpn = addr >> global_config->p;
return {vpn, pageOffset};
};
void flush_l1(sim_stats_t* stats) {
Block* block;
// invalidate all blocks and writeback dirty ones
for (uint64_t i = 0; i < l1->numSets; i++) {
for (uint64_t j = 0; j < l1->blocksPerSet; j++) {
block = l1->sets[i]->blocks[j];
// mark block as invalid
block->valid = 0;
// if block was dirty: clear bit & write back
if (block->dirty) {
block->dirty = 0;
stats->cache_flush_writebacks++;
}
}
}
// reset LRU status of L1
delete l1->lru;
l1->lru = new LRU(l1->numSets, l1->blocksPerSet);
}
void tag_compare(char rw,
uint64_t idx,
uint64_t tag,
Set* currentSet,
sim_stats_t* stats,
bool vipt) {
for (uint64_t blockIdx = 0; blockIdx < l1->blocksPerSet; blockIdx++) {
// get block
auto block = currentSet->blocks[blockIdx];
// if valid and tag match, then we have a hit
if (block->valid && block->tag == tag) {
stats->hits_l1++;
if (rw == WRITE) {
// if its a write hit, as we are following a
// WBWA policy, just set the dirty bit to true
block->dirty = true;
stats->writes++;
} else {
stats->reads++;
}
// block is now the MRU as it was just accessed
l1->lru->setMRU(idx, blockIdx);
// now this block should be the MRU (head of list)
assert(blockIdx == l1->lru->getMRUNode(idx)->indexOfBlockOrEntry);
// return because there is nothing more to do for this access
return;
}
}
// if no block matched the search, we have a miss, so increase counter
stats->misses_l1++;
// get LRU block to evict and move it to MRU pos
auto lruNode = l1->lru->convertLRUToMRU(idx);
auto lruBlockIdx = lruNode->indexOfBlockOrEntry;
auto lruBlock = currentSet->blocks[lruBlockIdx];
if (lruBlock->dirty) {
// if it's dirty, then write it back to mem
stats->writebacks_l1++;
// once the block is written back, it's no longer dirty
lruBlock->dirty = false;
if (vipt) {
// update LRU status of corresponding HWIVTPT entry
// since it was written back to main memory
// since `tag == ppn`, we can index the HWIVPT with the tag
// to find the entry in O(1)
pt->lru->setMRU(0, lruBlock->tag);
// now this entry should be the MRU
assert(lruBlock->tag == pt->lru->getMRUNode(0)->indexOfBlockOrEntry);
}
}
// set it's valid bit, just in case this was a cold-start miss
lruBlock->valid = true;
if (rw == WRITE) {
// if its a write, then the block is immediately dirty (WBWA write-policy)
lruBlock->dirty = true;
stats->writes++;
} else {
stats->reads++;
}
if (vipt) {
// since we have fetched the block from memory
// to read/write from/to it, we have accessed the
// corresponding frame in mem, so we need to update
// the LRU status of the corresponding HWIVPT entry
pt->lru->setMRU(0, tag);
// now this entry should be the MRU
assert(tag == pt->lru->getMRUNode(0)->indexOfBlockOrEntry);
}
// update the tag
lruBlock->tag = tag;
// now this block should be the MRU
assert(lruBlockIdx == l1->lru->getMRUNode(idx)->indexOfBlockOrEntry);
}
void pipt_access(char rw, uint64_t addr, sim_stats_t* stats) {
// we just accessed the cache, so increase counter
stats->accesses_l1++;
// extract tag from address
uint64_t tag = l1->getTag(addr);
// extract index from address
uint64_t idx = l1->getIdx(addr);
// find the set for this address
auto currentSet = l1->sets[idx];
// we just did an array lookup, so increase counter
stats->array_lookups_l1++;
// do a `parallel' tag comparison to find a block that matches
tag_compare(rw, idx, tag, currentSet, stats, false);
}
void deallocate_resources() {
delete l1;
if (vipt) {
delete tlb;
delete pt;
}
}
// clang-format off
/*
GDB:
gdb ./cachesim
PIPT:
run -c 10 -b 6 -s 4 < traces/gcc.trace
VIPT:
run -v -c 15 -b 7 -s 1 -p 14 -t 7 -m 18 < traces/gcc.trace
VALGRIND:
PIPT:
valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes ./cachesim -c 10 -b 6 -s 4 < traces/gcc.trace
VIPT:
valgrind --leak-check=full --show-leak-kinds=all --track-origins=yes ./cachesim -v -c 15 -b 7 -s 1 -p 14 -t 7 -m 18 < traces/gcc.trace
*/
// clang-format on