Skip to content

Commit

Permalink
fix comments
Browse files Browse the repository at this point in the history
  • Loading branch information
irexyc committed Nov 6, 2024
1 parent eaa217d commit 16f0059
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions src/turbomind/kernels/attention/decoding.cu
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ void dispatchDecoding(const AttentionParams<T>& params)
return false;
};

auto dispatch_head_num = [&](auto arch) {
auto dispatch_head_dim = [&](auto arch) {
if (params.size_per_head == 128) {
return dispatch_kv(arch, std::integral_constant<int, 128>{});
}
Expand All @@ -98,15 +98,15 @@ void dispatchDecoding(const AttentionParams<T>& params)

auto dispatch = [&]() {
if (params.arch >= 80) {
return dispatch_head_num(arch::Sm80{});
return dispatch_head_dim(arch::Sm80{});
}

if constexpr (!std::is_same_v<T, nv_bfloat16>) {
if (params.arch == 75) {
return dispatch_head_num(arch::Sm75{});
return dispatch_head_dim(arch::Sm75{});
}
else if (params.arch >= 70) {
return dispatch_head_num(arch::Sm70{});
return dispatch_head_dim(arch::Sm70{});
}
}

Expand Down

0 comments on commit 16f0059

Please sign in to comment.