From 295715d8505774062211f80133a12ad2f95a3f86 Mon Sep 17 00:00:00 2001 From: grimoire Date: Fri, 24 Nov 2023 13:18:45 +0800 Subject: [PATCH 1/2] fix --- .../llama/flash_attention2/flash_fwd_launch_template.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/turbomind/models/llama/flash_attention2/flash_fwd_launch_template.h b/src/turbomind/models/llama/flash_attention2/flash_fwd_launch_template.h index 4a94da08b2..9c5acbcd6a 100644 --- a/src/turbomind/models/llama/flash_attention2/flash_fwd_launch_template.h +++ b/src/turbomind/models/llama/flash_attention2/flash_fwd_launch_template.h @@ -14,7 +14,13 @@ template __global__ void flash_fwd_kernel(Flash_fwd_params params) { + +#if __CUDA_ARCH__ >= 800 flash::compute_attn(params); +#else +// TODO: support flash attention2 on sm<80 + assert(false); +#endif } template From a81bde3f365cd45aefc9022b594d7e6c6af72128 Mon Sep 17 00:00:00 2001 From: grimoire Date: Fri, 24 Nov 2023 16:11:46 +0800 Subject: [PATCH 2/2] fix lint --- .../models/llama/flash_attention2/flash_fwd_launch_template.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/turbomind/models/llama/flash_attention2/flash_fwd_launch_template.h b/src/turbomind/models/llama/flash_attention2/flash_fwd_launch_template.h index 9c5acbcd6a..2c23bd7cb7 100644 --- a/src/turbomind/models/llama/flash_attention2/flash_fwd_launch_template.h +++ b/src/turbomind/models/llama/flash_attention2/flash_fwd_launch_template.h @@ -18,7 +18,7 @@ __global__ void flash_fwd_kernel(Flash_fwd_params params) #if __CUDA_ARCH__ >= 800 flash::compute_attn(params); #else -// TODO: support flash attention2 on sm<80 + // TODO: support flash attention2 on sm<80 assert(false); #endif }