-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathN.cfg
412 lines (366 loc) · 12.4 KB
/
N.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
##############################################################################################
# This is our "best guess" knob file for a Nehalem-class (45nm) Intel Core i7
# processor. We make no claims as to the accuracy or correctness of these
# settings, there is no support for modeling SMT cores, LLC cache inclusion,
# and a variety of other microarchitectural features, so use this at your own
# risk. It is **your** responsibility to understand what you are modeling and
# simulating!
##############################################################################################
# Global settings about the system and the simulation.
system_cfg {
seed = 1 # Random number generator seed
num_cores = 1 # Number of cores in the system.
heartbeat_interval = 10000 # Print out simulator heartbeat every x cycles.
ztrace_file_prefix = "ztrace" # Zesto trace filename prefix.
simulate_power = false # Simulate power.
power_rtp_interval = 0 # uncore cycles between power computations.
cache_miss_sample_parameter = 0 # Interval between sampling cache misses.
power_rtp_file = "" # Runtime power file.
output_redir = "sim.out" # Redirect simulator output.
dvfs_cfg {
# DVFS controller configuration.
config = "none"
# Re-evaluate voltage/freq choice every X cycles.
interval = 0
}
# OS scheduler and core allocator.
scheduler_cfg {
scheduler_tick = 0 # Scheduler refresh in cycles.
allocator = "gang:1" # Core allocation algorithm.
allocator_opt_target = "throughput" # Core allocation optimization target.
speedup_model = "linear" # Core allocation speedup model.
}
profiling_cfg {
# file with profiling results
file_prefix = ""
# symbol/instruction to start profiling (format is symbol_name(+offset))
start = {}
# symbol/instruction to stop profiling (if empty, exit points of @profiling_start)
stop = {}
}
ignore_cfg {
# Names of functions to replace.
funcs = {}
# Individual instructions to ignore. Format is either an exact PC in hex or
# symbol_name(+offset), like the profiling start parameters.
pcs = {}
}
}
# Core configuration.
core_cfg {
# Pipeline model.
pipeline_model = "DPM"
# CPU clock frequency
core_clock = 3200.0
# Instruction fetch settings.
fetch_cfg {
# Size of instruction queue (macro ops), placed between predecode and
# decode.
instruction_queue_size = 18
# Caches consist of the cache itself, a TLB, a prefetcher, and a coherency
# controller.
icache_cfg icache {
# General cache settings - size, associativity, line size, etc.
config = "IL1:128:4:64:4:64:2:C:8"
# Cache coherency controller configuration.
coherency_controller = "none"
# Enable cache miss sampling.
sample_misses = false
iprefetch_cfg inst_pf {
config = {"nextline"} # 1st-level icache prefetcher configuration
on_miss_only = true # icache prefetch on miss only
fifosize = 8 # Prefetch FIFO size (TODO: units?)
buffer = 0 # Prefetch buffer size.
filter = 0 # Prefetch filter size.
filter_reset = 65536 # Prefetch filter reset interval (cycles).
# Prefetch threshold - only prefetch if MSHR occupancy is less than
# this.
threshold = 4
# Maximum instruction prefetch requests in the MSHR
max_outstanding_requests = 2
# Sampling interval (cycles) for prefetch control. 0 = no PF controller.
watermark_sampling_interval = 100
# Minimum watermark - always prefetch if lower than this.
watermark_min = 0.1
# Maximum watermark - never prefetch if above this.
watermark_max = 0.3
}
itlb_cfg itlb {
# Instruction ITLB configuration.
config = "ITLB:128:4:1:2:L:5"
# Coherency controller.
coherency_controller = "none"
}
}
branch_pred_cfg {
# bpred configuration(s)
type = {"tage:TAGE5:5:2048:512:9:6:75"}
# fusion algorithm for hybrid 2nd-level bpred
fusion = "none"
# branch target buffer configuration
btb = "btac:BTB:512:4:8:l"
# indirect branch target buffer configuration
ibtb = "2levbtac:iBTB:1:8:1:128:4:8:l"
# return address stack predictor configuration
ras = "multistack:RAS:8:8"
# additional latency from branch-exec to jeclear
jump_exec_delay = 1
}
byte_queue_cfg {
# Number of entries.
size = 3
# Bytes per line.
line_size = 16
}
predecode_cfg {
# Number of stages in the predecode pipe.
depth = 2
# Width of predecode pipeline (macro-ops)
width = 6
}
}
decode_cfg {
# Pipeline depth in stages.
depth = 2
# Width of pipeline in macro-ops.
width = 4
# stage of branch agen ("targetstage").
branch_agen_stage = 1
# Maximum branches decoded per cycle.
branch_decode_limit = 1
# maximum uops generated for each decoder (e.g., 4 1 1)
decoder_max_uops = {4, 1, 1, 1}
# Latency to access micro-code sequencer.
ucode_sequencer_latency = 0
# Number of entries in uop queue.
uop_queue_size = 24
# Enable/disable uop fusion rules.
uop_fusion_cfg {
# Fuse the load op with the next computation op.
load_comp_op = true
# Fuse the load op with the next fp op.
fpload_comp_op = true
# Store address generate - store op.
sta_std = true
# Load-store op fusion.
load_op_store = false
}
}
# Alloc = dispatch
alloc_cfg {
# Pipeline depth (stages).
depth = 1
# Pipeline width (uops).
width = 4
# use drain-flush after misprediction
use_drain_flush = true
}
exec_cfg {
# Maximum issues from RS per cycle (equal to num exec ports).
width = 6
# Number of cycles for payload RAM access (schedule to exec delay).
payload_depth = 2
# Enable heuristic tornado breaker.
enable_tornado_breaker = true
# Enable load issue throttling on partial matches.
enable_partial_throttle = true
# Latency to forward results to FP cluster (cycles).
fp_forward_penalty = 0
# Memory dependence predictor configuration.
mem_dep_pred_config = "lwt:LWT:8192:999999"
# Number of reservation station entries.
rs_size = 36
# Number of load queue entries.
loadq_size = 36
# Number of store queue entries.
storeq_size = 24
dcache_cfg dcache {
config = "DL1:64:8:64:8:64:2:C:W:B:16:8:C"
mshr_cmd = "RWPB"
coherency_controller = "none"
sample_misses = false
dtlb_cfg dtlb {
config = "DTLB:256:4:1:2:L:8"
coherency_controller = "none"
}
d2tlb_cfg d2tlb {
config = "none"
coherency_controller = "none"
}
dprefetch_cfg data_pf {
# 1st-level dcache prefetcher configuration
config = {"IP:256:12:13:6", "nextline"}
on_miss_only = true # dcache prefetch on miss only
fifosize = 8 # Prefetch FIFO size (TODO: units?)
buffer = 0 # Prefetch buffer size.
filter = 0 # Prefetch filter size.
filter_reset = 65536 # Prefetch filter reset interval (cycles).
threshold = 4 # Prefetch threshold.
# Maximum instruction prefetch requests in the MSHR
max_outstanding_requests = 2
# Sampling interval (cycles) for prefetch control. 0 = no PF controller.
watermark_sampling_interval = 100
# Minimum watermark - always prefetch if lower than this.
watermark_min = 0.1
# Maximum watermark - never prefetch if above this.
watermark_max = 0.3
}
}
l2cache_cfg L2 {
config = "DL2:512:8:64:8:64:2:C:W:B:16:8:C"
mshr_cmd = "RPWB"
coherency_controller = "const:75"
sample_misses = false
l2prefetch_cfg l2_pf {
config = {"IP:256:12:13:6", "nextline"}
on_miss_only = true # dcache prefetch on miss only
fifosize = 8 # Prefetch FIFO size (TODO: units?)
buffer = 0 # Prefetch buffer size.
filter = 0 # Prefetch filter size.
filter_reset = 65536 # Prefetch filter reset interval (cycles).
threshold = 4 # Prefetch threshold.
# Maximum instruction prefetch requests in the MSHR
max_outstanding_requests = 2
# Sampling interval (cycles) for prefetch control. 0 = no PF controller.
watermark_sampling_interval = 100
# Minimum watermark - always prefetch if lower than this.
watermark_min = 0.1
# Maximum watermark - never prefetch if above this.
watermark_max = 0.3
}
}
# RingCache settings.
repeater_cfg {
# RingCache configuration (originally in zesto-repeater).
config = "none"
# Send request to L1 in parallel with the repeater.
request_dl1 = false
}
exeu int_alu {
latency = 1 # Execution latency.
rate = 1 # Issue rate.
port_binding = {0, 1, 5} # Port bindings.
}
exeu jump {
latency = 1
rate = 1
port_binding = {5}
}
exeu int_mul {
latency = 3
rate = 1
port_binding = {1}
}
exeu int_div {
latency = 24
rate = 16
port_binding = {0}
}
exeu shift {
latency = 1
rate = 1
port_binding = {0, 5}
}
exeu fp_alu {
latency = 3
rate = 1
port_binding = {1}
}
exeu fp_mul {
latency = 5
rate = 2
port_binding = {0}
}
exeu fp_div {
latency = 32
rate = 32
port_binding = {0}
}
exeu fp_cplx {
latency = 58
rate = 58
port_binding = {0}
}
exeu ld {
latency = 1
rate = 1
port_binding = {2}
}
exeu st_agen {
latency = 1
rate = 1
port_binding = {3}
}
exeu st_data {
latency = 1
rate = 1
port_binding = {4}
}
# LEA = load effective address.
exeu lea {
latency = 1
rate = 1
port_binding = {1}
}
exeu magic {
latency = 1
rate = 1
port_binding = {0}
}
}
# Commit stage.
commit_cfg {
rob_size = 128 # Number of ROB entries.
commit_width = 4 # Maximum uops committed per cycle.
commit_branches = 0 # Maximum branches committed per cycle.
}
} # End of core cfg.
# Last level cache, FSB, DRAM, etc.
uncore_cfg {
llccache_cfg llc {
# General cache settings - size, associativity, line size, etc.
config = "LLC:8192:16:64:16:64:9:L:W:B:16:1:8:C"
# Cache coherency controller configuration.
coherency_controller = "const:75"
mshr_cmd = "RPWB" # MSHR configuration.
clock = 1600 # Cache clock frequency (MHz).
sample_misses = false
llcprefetch_cfg llc_pf {
config = {"IP:256:12:13:6 stream:12:4"} # last-level cache prefetcher configuration
on_miss_only = false # LLC prefetch on miss only
fifosize = 8 # Prefetch FIFO size (TODO: units?)
buffer = 0 # Prefetch buffer size.
filter = 0 # Prefetch filter size.
filter_reset = 65536 # Prefetch filter reset interval (cycles).
# Prefetch threshold - only prefetch if MSHR occupancy is less than
# this.
threshold = 4
# Maximum instruction prefetch requests in the MSHR
max_outstanding_requests = 2
# Sampling interval (cycles) for prefetch control. 0 = no PF controller.
watermark_sampling_interval = 2000
# Minimum watermark - always prefetch if lower than this.
watermark_min = 0.1
# Maximum watermark - never prefetch if above this.
watermark_max = 0.4
}
}
fsb_cfg {
width = 8 # FSB bus width (Bytes).
ddr = true # FSB double pumped data.
clock = 800.0 # FSB bus clock frequency (MHz).
magic = false # FSB unlimited bandwdidth.
}
dram_cfg {
memory_controller_config = "simple:16:1"
dram_config = "simplesdram:4:4:35:11.25:11.25:11.25:11.25:64"
# Based on Samsung K4B510446E-ZCH0
# 512-Mb, DDR3-1600 9-9-9
#
# t_RAS = 45.0ns
# t_RCD = 15.0ns
# t_CAS = 15.0ns
# t_WR = 15.0ns
# t_RP = 15.0ns
}
} # End of uncore configs.