Skip to content

Commit ec1424e

Browse files
Add block_seq_stride flag (#692)
Add `block_seq_stride` flag --------- Co-authored-by: Rob Suderman <[email protected]>
1 parent 77ca02f commit ec1424e

File tree

1 file changed

+7
-0
lines changed

1 file changed

+7
-0
lines changed

sharktank/sharktank/examples/export_paged_llm_v1.py

+7
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ def main():
4545
type=lambda arg: [int(bs) for bs in arg.split(",")],
4646
default="4",
4747
)
48+
parser.add_argument(
49+
"--block-seq-stride",
50+
help="Block sequence stride for paged KV cache, must divide evenly into the context length",
51+
type=int,
52+
default="16",
53+
)
4854
parser.add_argument(
4955
"--verbose",
5056
help="Include verbose logging",
@@ -76,6 +82,7 @@ def main():
7682
static_tables=False, # Rely on the compiler for hoisting tables.
7783
kv_cache_type="direct" if args.bs == [1] else "paged",
7884
attention_kernel=args.attention_kernel,
85+
block_seq_stride=args.block_seq_stride,
7986
)
8087
llama_config.fake_quant = args.fake_quant
8188

0 commit comments

Comments
 (0)