File tree Expand file tree Collapse file tree 2 files changed +4
-2
lines changed Expand file tree Collapse file tree 2 files changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -706,7 +706,8 @@ def forward(
706
706
use_paged_context_fmha = (
707
707
metadata .runtime_features .chunked_prefill
708
708
or metadata .runtime_features .cache_reuse
709
- or metadata .runtime_features .has_speculative_draft_tokens ) if metadata .runtime_features else False
709
+ or metadata .runtime_features .has_speculative_draft_tokens
710
+ ) if metadata .runtime_features else False
710
711
711
712
if use_paged_context_fmha and self .has_fp8_kv_cache :
712
713
# NOTE: W4A8_AWQ can be included too, exclude for now since
Original file line number Diff line number Diff line change @@ -556,7 +556,8 @@ def no_cuda_graph():
556
556
resource_manager = resource_manager )
557
557
torch .cuda .synchronize ()
558
558
559
- def _set_up_attn_metadata (self , kv_cache_manager : KVCacheManager ,
559
+ def _set_up_attn_metadata (self ,
560
+ kv_cache_manager : KVCacheManager ,
560
561
is_dummy_forward : bool = False ):
561
562
# is_dummy_forward is used to indicate whether the forward is
562
563
# a dummy forward for memory estimation OR
You can’t perform that action at this time.
0 commit comments