From 02ff2777951fbbba25227630d1e45581aaa1487e Mon Sep 17 00:00:00 2001 From: sanyalington Date: Thu, 26 Sep 2024 17:00:10 +0000 Subject: [PATCH] re-enable avoid torch slice fix when chunked prefill is disabled --- vllm/attention/backends/rocm_flash_attn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/backends/rocm_flash_attn.py b/vllm/attention/backends/rocm_flash_attn.py index bb6ba9cb37ed9..b793ebf46d173 100644 --- a/vllm/attention/backends/rocm_flash_attn.py +++ b/vllm/attention/backends/rocm_flash_attn.py @@ -573,7 +573,7 @@ def forward( else: out = output ops.paged_attention_rocm( - output[num_prefill_tokens:], + out, exp_sums, max_logits, tmp_output,