Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
q10 committed Nov 25, 2024
1 parent 2cac703 commit fa24249
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
2 changes: 1 addition & 1 deletion fbgemm_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ if(NOT FBGEMM_CPU_ONLY)
add_subdirectory(experimental/gemm)
endif()

if(NOT FBGEMM_CPU_ONLY AND NOT USE_ROCM)
if(NOT FBGEMM_CPU_ONLY)
# TODO: Re-enable gen_ai for ROCm once ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp
# lands into latest ROCm
add_subdirectory(experimental/gen_ai)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/literals.hpp"

#if (defined(USE_ROCM) && ROCM_VERSION >= 60300)
// NOTE: This source is currently only available in the `develop` branch of CK
// https://github.com/ROCm/composable_kernel
#include "ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle_v3_ab_scale.hpp"
#endif

// Define commonly used types.
template <ck::index_t... Is>
Expand All @@ -42,6 +46,7 @@ using PassThrough = ck::tensor_operation::element_wise::PassThrough;

namespace fbgemm_gpu {

#if (defined(USE_ROCM) && ROCM_VERSION >= 60300)
template <
int BLOCK_SIZE,
int MBLOCK,
Expand Down Expand Up @@ -269,4 +274,20 @@ at::Tensor f8f8bf16_blockwise(
}
}

#else

at::Tensor f8f8bf16_blockwise(
at::Tensor XQ,
at::Tensor WQ,
at::Tensor x_scale,
at::Tensor w_scale,
int64_t block_m = 128,
int64_t block_n = 128,
int64_t block_k = 128) {
throw std::runtime_error(
"ROCm version is older than 6.3"); // requires ROCm>=6.3
}

#endif

} // namespace fbgemm_gpu

0 comments on commit fa24249

Please sign in to comment.