File tree 2 files changed +4
-2
lines changed
tools/util/include/cutlass/util/reference/device
2 files changed +4
-2
lines changed Original file line number Diff line number Diff line change @@ -38,7 +38,7 @@ find_library(DPCPP_LIB_DIR NAMES sycl sycl6 PATHS "${DPCPP_BIN_DIR}/../lib")
38
38
39
39
add_library (DPCPP::DPCPP INTERFACE IMPORTED )
40
40
41
- set (DPCPP_FLAGS "-fsycl;-fno-sycl-id-queries-fit-in-int; " )
41
+ set (DPCPP_FLAGS "-fsycl;" )
42
42
set (DPCPP_COMPILE_ONLY_FLAGS "" )
43
43
44
44
if (NOT "${DPCPP_SYCL_TARGET} " STREQUAL "" )
Original file line number Diff line number Diff line change 30
30
**************************************************************************************************/
31
31
#pragma once
32
32
33
+ #include < limits>
33
34
#include < stdexcept>
34
35
#include " cutlass/cutlass.h"
35
36
#include " cutlass/util/reference/device/kernel/tensor_foreach.h"
@@ -133,7 +134,8 @@ struct BlockForEach {
133
134
#if defined (CUTLASS_ENABLE_SYCL)
134
135
// TODO: query the queue for block size
135
136
block_size = 128 ;
136
- grid_size = cute::ceil_div (capacity, block_size);
137
+ // Ensure global range doesn't overflow int
138
+ grid_size = std::min (capacity, static_cast <size_t >(std::numeric_limits<int >::max ())) / block_size;
137
139
#else
138
140
// if grid_size or block_size are zero, query occupancy using the CUDA Occupancy API
139
141
cudaError_t result = cudaOccupancyMaxPotentialBlockSize (
You can’t perform that action at this time.
0 commit comments