diff --git a/src/turbomind/kernels/attention/decoding.cu b/src/turbomind/kernels/attention/decoding.cu index dde7dc5301..1b04b7d4eb 100644 --- a/src/turbomind/kernels/attention/decoding.cu +++ b/src/turbomind/kernels/attention/decoding.cu @@ -86,7 +86,7 @@ void dispatchDecoding(const AttentionParams& params) return false; }; - auto dispatch_head_num = [&](auto arch) { + auto dispatch_head_dim = [&](auto arch) { if (params.size_per_head == 128) { return dispatch_kv(arch, std::integral_constant{}); } @@ -98,15 +98,15 @@ void dispatchDecoding(const AttentionParams& params) auto dispatch = [&]() { if (params.arch >= 80) { - return dispatch_head_num(arch::Sm80{}); + return dispatch_head_dim(arch::Sm80{}); } if constexpr (!std::is_same_v) { if (params.arch == 75) { - return dispatch_head_num(arch::Sm75{}); + return dispatch_head_dim(arch::Sm75{}); } else if (params.arch >= 70) { - return dispatch_head_num(arch::Sm70{}); + return dispatch_head_dim(arch::Sm70{}); } }