19
19
20
20
# Properties by which to differentiate results per commit:
21
21
KEY_PROPERTIES = [
22
- "cpu_info" , "gpu_info" , "backends" , "n_gpu_layers" , "model_filename " , "model_type " , "n_batch" , "n_ubatch " ,
23
- "embeddings " , "cpu_mask " , "cpu_strict " , "poll " , "n_threads " , "type_k " , "type_v " , "use_mmap " , "no_kv_offload " ,
24
- "split_mode" , "main_gpu" , "tensor_split" , "flash_attn" , "n_prompt" , "n_gen"
22
+ "cpu_info" , "gpu_info" , "backends" , "n_gpu_layers" , "tensor_buft_overrides " , "model_filename " , "model_type " ,
23
+ "n_batch " , "n_ubatch " , "embeddings " , "cpu_mask " , "cpu_strict " , "poll " , "n_threads " , "type_k " , "type_v " ,
24
+ "use_mmap" , "no_kv_offload" , " split_mode" , "main_gpu" , "tensor_split" , "flash_attn" , "n_prompt" , "n_gen" , "n_depth "
25
25
]
26
26
27
27
# Properties that are boolean and are converted to Yes/No for the table:
30
30
# Header names for the table:
31
31
PRETTY_NAMES = {
32
32
"cpu_info" : "CPU" , "gpu_info" : "GPU" , "backends" : "Backends" , "n_gpu_layers" : "GPU layers" ,
33
- "model_filename" : "File" , "model_type" : "Model" , "model_size" : "Model size [GiB]" ,
34
- "model_n_params" : "Num. of par." , "n_batch" : "Batch size" , "n_ubatch" : "Microbatch size" ,
35
- "embeddings " : "Embeddings " , "cpu_mask " : "CPU mask " , "cpu_strict " : "CPU strict" , "poll " : "Poll " ,
36
- "n_threads " : "Threads " , "type_k " : "K type " , "type_v " : "V type " , "split_mode " : "Split mode " , "main_gpu " : "Main GPU " ,
37
- "no_kv_offload" : "NKVO" , " flash_attn" : "FlashAttention" , "tensor_split" : "Tensor split" , "use_mmap" : "Use mmap " ,
33
+ "tensor_buft_overrides" : "Tensor overrides" , " model_filename" : "File" , "model_type" : "Model" , "model_size" : "Model size [GiB]" ,
34
+ "model_n_params" : "Num. of par." , "n_batch" : "Batch size" , "n_ubatch" : "Microbatch size" , "embeddings" : "Embeddings" ,
35
+ "cpu_mask " : "CPU mask " , "cpu_strict " : "CPU strict " , "poll " : "Poll" , "n_threads" : "Threads" , "type_k " : "K type" , "type_v" : "V type " ,
36
+ "use_mmap " : "Use mmap " , "no_kv_offload " : "NKVO " , "split_mode " : "Split mode " , "main_gpu " : "Main GPU " , "tensor_split " : "Tensor split " ,
37
+ "flash_attn" : "FlashAttention" ,
38
38
}
39
39
40
40
DEFAULT_SHOW = ["model_type" ] # Always show these properties by default.
@@ -281,12 +281,12 @@ def get_rows(properties):
281
281
The returned rows are unique in terms of property combinations.
282
282
"""
283
283
select_string = ", " .join (
284
- [f"tb.{ p } " for p in properties ] + ["tb.n_prompt" , "tb.n_gen" , "AVG(tb.avg_ts)" , "AVG(tc.avg_ts)" ])
284
+ [f"tb.{ p } " for p in properties ] + ["tb.n_prompt" , "tb.n_gen" , "tb.n_depth" , " AVG(tb.avg_ts)" , "AVG(tc.avg_ts)" ])
285
285
equal_string = " AND " .join (
286
286
[f"tb.{ p } = tc.{ p } " for p in KEY_PROPERTIES ] + [
287
287
f"tb.build_commit = '{ hexsha8_baseline } '" , f"tc.build_commit = '{ hexsha8_compare } '" ]
288
288
)
289
- group_order_string = ", " .join ([f"tb.{ p } " for p in properties ] + ["tb.n_gen" , "tb.n_prompt" ])
289
+ group_order_string = ", " .join ([f"tb.{ p } " for p in properties ] + ["tb.n_gen" , "tb.n_prompt" , "tb.n_depth" ])
290
290
query = (f"SELECT { select_string } FROM test tb JOIN test tc ON { equal_string } "
291
291
f"GROUP BY { group_order_string } ORDER BY { group_order_string } ;" )
292
292
return cursor .execute (query ).fetchall ()
@@ -309,7 +309,7 @@ def get_rows(properties):
309
309
rows_full = get_rows (KEY_PROPERTIES )
310
310
properties_different = []
311
311
for i , kp_i in enumerate (KEY_PROPERTIES ):
312
- if kp_i in DEFAULT_SHOW or kp_i == "n_prompt" or kp_i == "n_gen" :
312
+ if kp_i in DEFAULT_SHOW or kp_i in [ "n_prompt" , "n_gen" , "n_depth" ] :
313
313
continue
314
314
for row_full in rows_full :
315
315
if row_full [i ] != rows_full [0 ][i ]:
@@ -340,17 +340,20 @@ def get_rows(properties):
340
340
341
341
table = []
342
342
for row in rows_show :
343
- n_prompt = int (row [- 4 ])
344
- n_gen = int (row [- 3 ])
343
+ n_prompt = int (row [- 5 ])
344
+ n_gen = int (row [- 4 ])
345
+ n_depth = int (row [- 3 ])
345
346
if n_prompt != 0 and n_gen == 0 :
346
347
test_name = f"pp{ n_prompt } "
347
348
elif n_prompt == 0 and n_gen != 0 :
348
349
test_name = f"tg{ n_gen } "
349
350
else :
350
351
test_name = f"pp{ n_prompt } +tg{ n_gen } "
352
+ if n_depth != 0 :
353
+ test_name = f"{ test_name } @d{ n_depth } "
351
354
# Regular columns test name avg t/s values Speedup
352
355
# VVVVVVVVVVVVV VVVVVVVVV VVVVVVVVVVVVVV VVVVVVV
353
- table .append (list (row [:- 4 ]) + [test_name ] + list (row [- 2 :]) + [float (row [- 1 ]) / float (row [- 2 ])])
356
+ table .append (list (row [:- 5 ]) + [test_name ] + list (row [- 2 :]) + [float (row [- 1 ]) / float (row [- 2 ])])
354
357
355
358
# Some a-posteriori fixes to make the table contents prettier:
356
359
for bool_property in BOOL_PROPERTIES :
@@ -376,7 +379,7 @@ def get_rows(properties):
376
379
for gns in GPU_NAME_STRIP :
377
380
row_table [ip ] = row_table [ip ].replace (gns , "" )
378
381
379
- gpu_names = row_table [ip ].split ("/ " )
382
+ gpu_names = row_table [ip ].split (", " )
380
383
num_gpus = len (gpu_names )
381
384
all_names_the_same = len (set (gpu_names )) == 1
382
385
if len (gpu_names ) >= 2 and all_names_the_same :
0 commit comments