Skip to content

Commit

Permalink
Address comments
Browse files Browse the repository at this point in the history
  • Loading branch information
aacostadiaz committed Nov 15, 2024
1 parent e1f85c4 commit 0bf92ff
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 4 deletions.
1 change: 0 additions & 1 deletion cmake/FindDPCPP.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ if(NOT "${DPCPP_SYCL_ARCH}" STREQUAL "")
if("${DPCPP_SYCL_TARGET}" STREQUAL "nvptx64-nvidia-cuda")
list(APPEND DPCPP_FLAGS "-Xsycl-target-backend")
list(APPEND DPCPP_FLAGS "--cuda-gpu-arch=${DPCPP_SYCL_ARCH}")
list(APPEND DPCPP_FLAGS "-fgpu-inline-threshold=1000000;")
list(APPEND DPCPP_COMPILE_ONLY_FLAGS; "-mllvm;-enable-global-offset=false;")
endif()
endif()
Expand Down
2 changes: 1 addition & 1 deletion include/cutlass/detail/helper_macros.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@

#if defined(CUTLASS_ENABLE_SYCL)
#define CUTLASS_HOST
#define CUTLASS_GLOBAL __attribute__((always_inline)) inline
#define CUTLASS_GLOBAL
#define CUTLASS_SHARED
#else
#define CUTLASS_HOST __host__
Expand Down
4 changes: 2 additions & 2 deletions include/cutlass/device_kernel.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,10 @@ void Kernel2(typename Operator::Params params) {

/// Generic CUTLASS kernel template.
template <typename Operator>
CUTLASS_GLOBAL
#if defined(CUTLASS_ENABLE_SYCL)
void device_kernel(typename Operator::Params const& params, sycl::local_ptr<char> smem) {
void device_kernel(typename Operator::Params const params, sycl::local_ptr<char> smem) {
#else
CUTLASS_GLOBAL
#ifdef __CUDACC__
// Enclosing this in __CUDACC__ suppresses MSVC warnings.
__launch_bounds__(Operator::MaxThreadsPerBlock, Operator::MinBlocksPerMultiprocessor)
Expand Down

0 comments on commit 0bf92ff

Please sign in to comment.