Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Raise MSVC warning level from /W3 to /W4 #2342

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 11 additions & 4 deletions cmake/helpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -108,18 +108,25 @@ function(add_ur_target_compile_options name)
elseif(MSVC)
target_compile_options(${name} PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/MP> # clang-cl.exe does not support /MP
/W3
/W4
/wd4456 # Disable: declaration of 'identifier' hides previous local declaration
/wd4457 # Disable: declaration of 'identifier' hides function parameter
/wd4458 # Disable: declaration of 'identifier' hides class member
/wd4459 # Disable: declaration of 'identifier' hides global declaration
/MD$<$<CONFIG:Debug>:d>
/GS
/DWIN32_LEAN_AND_MEAN
/DNOMINMAX
)

if(UR_DEVELOPER_MODE)
target_compile_definitions(${name} PRIVATE
# _CRT_SECURE_NO_WARNINGS used mainly because of getenv
# C4267: The compiler detected a conversion from size_t to a smaller type.
_CRT_SECURE_NO_WARNINGS
)

if(UR_DEVELOPER_MODE)
target_compile_options(${name} PRIVATE
/WX /GS /D_CRT_SECURE_NO_WARNINGS /wd4267
/WX /GS
)
endif()
endif()
Expand Down
7 changes: 7 additions & 0 deletions examples/collector/collector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,14 @@
#include <string_view>

#include "ur_api.h"

#ifdef _MSC_VER
#pragma warning(disable : 4245)
#endif
#include "xpti/xpti_trace_framework.h"
#ifdef _MSC_VER
#pragma warning(default : 4245)
#endif

constexpr uint16_t TRACE_FN_BEGIN =
static_cast<uint16_t>(xpti::trace_point_type_t::function_with_args_begin);
Expand Down
2 changes: 1 addition & 1 deletion include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ typedef struct ur_physical_mem_handle_t_ *ur_physical_mem_handle_t;
///////////////////////////////////////////////////////////////////////////////
#ifndef UR_BIT
/// @brief Generic macro for enumerator bit masks
#define UR_BIT(_i) (1 << _i)
#define UR_BIT(_i) (1U << _i)
#endif // UR_BIT

///////////////////////////////////////////////////////////////////////////////
Expand Down
2 changes: 1 addition & 1 deletion scripts/core/common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ name: "$x_physical_mem_handle_t"
type: macro
desc: "Generic macro for enumerator bit masks"
name: "$X_BIT( _i )"
value: "( 1 << _i )"
value: "( 1U << _i )"
--- #--------------------------------------------------------------------------
type: enum
desc: "Defines Return/Error codes"
Expand Down
7 changes: 4 additions & 3 deletions source/adapters/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,15 +97,16 @@ if (UR_ENABLE_TRACING)
get_target_property(XPTI_SRC_DIR xpti SOURCE_DIR)
set(XPTI_PROXY_SRC "${XPTI_SRC_DIR}/xpti_proxy.cpp")
endif()
target_compile_definitions(${TARGET_NAME} PRIVATE
add_library(cuda-xpti-proxy STATIC ${XPTI_PROXY_SRC})
target_compile_definitions(cuda-xpti-proxy PRIVATE
XPTI_ENABLE_INSTRUMENTATION
XPTI_STATIC_LIBRARY
)
target_include_directories(${TARGET_NAME} PRIVATE
target_include_directories(cuda-xpti-proxy PRIVATE
${XPTI_INCLUDES}
${CUDA_CUPTI_INCLUDE_DIR}
)
target_sources(${TARGET_NAME} PRIVATE ${XPTI_PROXY_SRC})
target_link_libraries(${TARGET_NAME} PRIVATE cuda-xpti-proxy)
endif()

if (CUDA_cupti_LIBRARY)
Expand Down
26 changes: 13 additions & 13 deletions source/adapters/cuda/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ static ur_result_t enqueueCommandBufferFillHelper(
if ((PatternSize == 1) || (PatternSize == 2) || (PatternSize == 4)) {
CUDA_MEMSET_NODE_PARAMS NodeParams = {};
NodeParams.dst = DstPtr;
NodeParams.elementSize = PatternSize;
NodeParams.elementSize = static_cast<unsigned int>(PatternSize);
NodeParams.height = N;
NodeParams.pitch = PatternSize;
NodeParams.width = 1;
Expand Down Expand Up @@ -508,12 +508,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
auto &ArgIndices = hKernel->getArgIndices();
CUDA_KERNEL_NODE_PARAMS NodeParams = {};
NodeParams.func = CuFunc;
NodeParams.gridDimX = BlocksPerGrid[0];
NodeParams.gridDimY = BlocksPerGrid[1];
NodeParams.gridDimZ = BlocksPerGrid[2];
NodeParams.blockDimX = ThreadsPerBlock[0];
NodeParams.blockDimY = ThreadsPerBlock[1];
NodeParams.blockDimZ = ThreadsPerBlock[2];
NodeParams.gridDimX = static_cast<unsigned int>(BlocksPerGrid[0]);
NodeParams.gridDimY = static_cast<unsigned int>(BlocksPerGrid[1]);
NodeParams.gridDimZ = static_cast<unsigned int>(BlocksPerGrid[2]);
NodeParams.blockDimX = static_cast<unsigned int>(ThreadsPerBlock[0]);
NodeParams.blockDimY = static_cast<unsigned int>(ThreadsPerBlock[1]);
NodeParams.blockDimZ = static_cast<unsigned int>(ThreadsPerBlock[2]);
NodeParams.sharedMemBytes = LocalSize;
NodeParams.kernelParams = const_cast<void **>(ArgIndices.data());

Expand Down Expand Up @@ -1397,12 +1397,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp(
CUDA_KERNEL_NODE_PARAMS &Params = KernelCommandHandle->Params;

Params.func = CuFunc;
Params.gridDimX = BlocksPerGrid[0];
Params.gridDimY = BlocksPerGrid[1];
Params.gridDimZ = BlocksPerGrid[2];
Params.blockDimX = ThreadsPerBlock[0];
Params.blockDimY = ThreadsPerBlock[1];
Params.blockDimZ = ThreadsPerBlock[2];
Params.gridDimX = static_cast<unsigned int>(BlocksPerGrid[0]);
Params.gridDimY = static_cast<unsigned int>(BlocksPerGrid[1]);
Params.gridDimZ = static_cast<unsigned int>(BlocksPerGrid[2]);
Params.blockDimX = static_cast<unsigned int>(ThreadsPerBlock[0]);
Params.blockDimY = static_cast<unsigned int>(ThreadsPerBlock[1]);
Params.blockDimZ = static_cast<unsigned int>(ThreadsPerBlock[2]);
Params.sharedMemBytes = KernelCommandHandle->Kernel->getLocalSize();
Params.kernelParams =
const_cast<void **>(KernelCommandHandle->Kernel->getArgIndices().data());
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1153,7 +1153,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGet(ur_platform_handle_t hPlatform,

try {
if (pNumDevices) {
*pNumDevices = NumDevices;
*pNumDevices = static_cast<uint32_t>(NumDevices);
}

if (ReturnDevices && phDevices) {
Expand Down Expand Up @@ -1236,7 +1236,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceCreateWithNativeHandle(
ur_result_t UR_APICALL urDeviceGetGlobalTimestamps(ur_device_handle_t hDevice,
uint64_t *pDeviceTimestamp,
uint64_t *pHostTimestamp) {
CUevent Event;
CUevent Event{};
ScopedContext Active(hDevice);

if (pDeviceTimestamp) {
Expand Down
45 changes: 25 additions & 20 deletions source/adapters/cuda/enqueue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ void guessLocalWorkSize(ur_device_handle_t Device, size_t *ThreadsPerBlock,
int MinGrid, MaxBlockSize;
UR_CHECK_ERROR(cuOccupancyMaxPotentialBlockSize(
&MinGrid, &MaxBlockSize, Kernel->get(), NULL, Kernel->getLocalSize(),
MaxBlockDim[0]));
static_cast<int>(MaxBlockDim[0])));

roundToHighestFactorOfGlobalSizeIn3d(ThreadsPerBlock, GlobalSizeNormalized,
MaxBlockDim, MaxBlockSize);
Expand Down Expand Up @@ -208,7 +208,7 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
MaxWorkGroupSize = Device->getMaxWorkGroupSize();

if (ProvidedLocalWorkGroupSize) {
auto IsValid = [&](int Dim) {
auto IsValid = [&](size_t Dim) {
if (ReqdThreadsPerBlock[Dim] != 0 &&
LocalWorkSize[Dim] != ReqdThreadsPerBlock[Dim])
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
Expand All @@ -217,7 +217,8 @@ setKernelParams([[maybe_unused]] const ur_context_handle_t Context,
LocalWorkSize[Dim] > MaxThreadsPerBlock[Dim])
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;

if (LocalWorkSize[Dim] > Device->getMaxWorkItemSizes(Dim))
if (LocalWorkSize[Dim] >
Device->getMaxWorkItemSizes(static_cast<int>(Dim)))
return UR_RESULT_ERROR_INVALID_WORK_GROUP_SIZE;
// Checks that local work sizes are a divisor of the global work sizes
// which includes that the local work sizes are neither larger than
Expand Down Expand Up @@ -489,9 +490,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(

auto &ArgIndices = hKernel->getArgIndices();
UR_CHECK_ERROR(cuLaunchKernel(
CuFunc, BlocksPerGrid[0], BlocksPerGrid[1], BlocksPerGrid[2],
ThreadsPerBlock[0], ThreadsPerBlock[1], ThreadsPerBlock[2], LocalSize,
CuStream, const_cast<void **>(ArgIndices.data()), nullptr));
CuFunc, static_cast<unsigned int>(BlocksPerGrid[0]),
static_cast<unsigned int>(BlocksPerGrid[1]),
static_cast<unsigned int>(BlocksPerGrid[2]),
static_cast<unsigned int>(ThreadsPerBlock[0]),
static_cast<unsigned int>(ThreadsPerBlock[1]),
static_cast<unsigned int>(ThreadsPerBlock[2]), LocalSize, CuStream,
const_cast<void **>(ArgIndices.data()), nullptr));

if (LocalSize != 0)
hKernel->clearLocalSize();
Expand Down Expand Up @@ -657,12 +662,12 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunchCustomExp(
auto &ArgIndices = hKernel->getArgIndices();

CUlaunchConfig launch_config;
launch_config.gridDimX = BlocksPerGrid[0];
launch_config.gridDimY = BlocksPerGrid[1];
launch_config.gridDimZ = BlocksPerGrid[2];
launch_config.blockDimX = ThreadsPerBlock[0];
launch_config.blockDimY = ThreadsPerBlock[1];
launch_config.blockDimZ = ThreadsPerBlock[2];
launch_config.gridDimX = static_cast<unsigned int>(BlocksPerGrid[0]);
launch_config.gridDimY = static_cast<unsigned int>(BlocksPerGrid[1]);
launch_config.gridDimZ = static_cast<unsigned int>(BlocksPerGrid[2]);
launch_config.blockDimX = static_cast<unsigned int>(ThreadsPerBlock[0]);
launch_config.blockDimY = static_cast<unsigned int>(ThreadsPerBlock[1]);
launch_config.blockDimZ = static_cast<unsigned int>(ThreadsPerBlock[2]);

launch_config.sharedMemBytes = LocalSize;
launch_config.hStream = CuStream;
Expand Down Expand Up @@ -1075,8 +1080,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferFill(
break;
}
default: {
UR_CHECK_ERROR(commonMemSetLargePattern(Stream, patternSize, size,
pPattern, DstDevice));
UR_CHECK_ERROR(
commonMemSetLargePattern(Stream, static_cast<uint32_t>(patternSize),
size, pPattern, DstDevice));
break;
}
}
Expand Down Expand Up @@ -1108,7 +1114,6 @@ static size_t imageElementByteSize(CUDA_ARRAY_DESCRIPTOR ArrayDesc) {
return 4;
default:
detail::ur::die("Invalid image format.");
return 0;
}
}

Expand Down Expand Up @@ -1212,7 +1217,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageRead(
CUDA_ARRAY_DESCRIPTOR ArrayDesc;
UR_CHECK_ERROR(cuArrayGetDescriptor(&ArrayDesc, Array));

int ElementByteSize = imageElementByteSize(ArrayDesc);
int ElementByteSize = static_cast<int>(imageElementByteSize(ArrayDesc));

size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels;
size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width;
Expand Down Expand Up @@ -1285,7 +1290,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageWrite(
CUDA_ARRAY_DESCRIPTOR ArrayDesc;
UR_CHECK_ERROR(cuArrayGetDescriptor(&ArrayDesc, Array));

int ElementByteSize = imageElementByteSize(ArrayDesc);
int ElementByteSize = static_cast<int>(imageElementByteSize(ArrayDesc));

size_t ByteOffsetX = origin.x * ElementByteSize * ArrayDesc.NumChannels;
size_t BytesToCopy = ElementByteSize * ArrayDesc.NumChannels * region.width;
Expand Down Expand Up @@ -1364,7 +1369,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemImageCopy(
UR_ASSERT(SrcArrayDesc.NumChannels == DstArrayDesc.NumChannels,
UR_RESULT_ERROR_INVALID_MEM_OBJECT);

int ElementByteSize = imageElementByteSize(SrcArrayDesc);
int ElementByteSize = static_cast<int>(imageElementByteSize(SrcArrayDesc));

size_t DstByteOffsetX =
dstOrigin.x * ElementByteSize * SrcArrayDesc.NumChannels;
Expand Down Expand Up @@ -1549,8 +1554,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueUSMFill(
CuStream));
break;
default:
commonMemSetLargePattern(CuStream, patternSize, size, pPattern,
(CUdeviceptr)ptr);
commonMemSetLargePattern(CuStream, static_cast<uint32_t>(patternSize),
size, pPattern, (CUdeviceptr)ptr);
break;
}
if (phEvent) {
Expand Down
5 changes: 3 additions & 2 deletions source/adapters/cuda/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,8 +284,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urUSMPitchedAllocExp(
ur_result_t Result = UR_RESULT_SUCCESS;
try {
ScopedContext Active(hDevice);
UR_CHECK_ERROR(cuMemAllocPitch((CUdeviceptr *)ppMem, pResultPitch,
widthInBytes, height, elementSizeBytes));
UR_CHECK_ERROR(
cuMemAllocPitch((CUdeviceptr *)ppMem, pResultPitch, widthInBytes,
height, static_cast<unsigned int>(elementSizeBytes)));
} catch (ur_result_t error) {
Result = error;
} catch (...) {
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/cuda/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urKernelSuggestMaxCooperativeGroupCountExp(

int MaxNumActiveGroupsPerCU{0};
UR_CHECK_ERROR(cuOccupancyMaxActiveBlocksPerMultiprocessor(
&MaxNumActiveGroupsPerCU, hKernel->get(), localWorkSize,
dynamicSharedMemorySize));
&MaxNumActiveGroupsPerCU, hKernel->get(),
static_cast<int>(localWorkSize), dynamicSharedMemorySize));
detail::ur::assertion(MaxNumActiveGroupsPerCU >= 0);
// Handle the case where we can't have all SMs active with at least 1 group
// per SM. In that case, the device is still able to run 1 work-group, hence
Expand Down
8 changes: 4 additions & 4 deletions source/adapters/cuda/kernel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,8 @@ struct ur_kernel_handle_t_ {
}
ParamSizes[Index] = Size;
// calculate the insertion point on the array
size_t InsertPos = std::accumulate(std::begin(ParamSizes),
std::begin(ParamSizes) + Index, 0);
size_t InsertPos = std::accumulate(
std::begin(ParamSizes), std::begin(ParamSizes) + Index, size_t{0});
// Update the stored value for the argument
std::memcpy(&Storage[InsertPos], Arg, Size);
Indices[Index] = &Storage[InsertPos];
Expand Down Expand Up @@ -152,8 +152,8 @@ struct ur_kernel_handle_t_ {
const args_index_t &getIndices() const noexcept { return Indices; }

uint32_t getLocalSize() const {
return std::accumulate(std::begin(OffsetPerIndex),
std::end(OffsetPerIndex), 0);
return static_cast<uint32_t>(std::accumulate(
std::begin(OffsetPerIndex), std::end(OffsetPerIndex), size_t{0}));
}
} Args;

Expand Down
4 changes: 2 additions & 2 deletions source/adapters/cuda/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ ur_result_t ur_program_handle_t_::buildProgram(const char *BuildOptions) {
}

UR_CHECK_ERROR(cuModuleLoadDataEx(&Module, static_cast<const void *>(Binary),
Options.size(), Options.data(),
OptionVals.data()));
static_cast<unsigned int>(Options.size()),
Options.data(), OptionVals.data()));

BuildStatus = UR_PROGRAM_BUILD_STATUS_SUCCESS;

Expand Down
2 changes: 1 addition & 1 deletion source/adapters/cuda/usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -325,7 +325,7 @@ umf_result_t USMMemoryProvider::initialize(ur_context_handle_t Ctx,

enum umf_result_t USMMemoryProvider::alloc(size_t Size, size_t Align,
void **Ptr) {
auto Res = allocateImpl(Ptr, Size, Align);
auto Res = allocateImpl(Ptr, Size, static_cast<uint32_t>(Align));
if (Res != UR_RESULT_SUCCESS) {
getLastStatusRef() = Res;
return UMF_RESULT_ERROR_MEMORY_PROVIDER_SPECIFIC;
Expand Down
18 changes: 10 additions & 8 deletions source/adapters/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -89,18 +89,19 @@ if(UR_BUILD_ADAPTER_L0)
endif()

# TODO: fix level_zero adapter conversion warnings
# C4267: The compiler detected a conversion from size_t to a smaller type.
target_compile_options(ur_adapter_level_zero PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244>
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244 /wd4267>
)

set_target_properties(ur_adapter_level_zero PROPERTIES
VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}"
SOVERSION "${PROJECT_VERSION_MAJOR}"
)

if (WIN32)
# 0x800: Search for the DLL only in the System32 folder
target_link_options(ur_adapter_level_zero PRIVATE LINKER:/DEPENDENTLOADFLAG:0x800)
if(CMAKE_CXX_COMPILER_LINKER_ID MATCHES MSVC)
# 0x800: Search for the DLL only in the System32 folder
target_link_options(ur_adapter_level_zero PRIVATE LINKER:/DEPENDENTLOADFLAG:0x800)
endif()

target_link_libraries(ur_adapter_level_zero PRIVATE
Expand Down Expand Up @@ -183,18 +184,19 @@ if(UR_BUILD_ADAPTER_L0_V2)
target_compile_definitions(ur_adapter_level_zero_v2 PUBLIC UR_ADAPTER_LEVEL_ZERO_V2)

# TODO: fix level_zero adapter conversion warnings
# C4267: The compiler detected a conversion from size_t to a smaller type.
target_compile_options(ur_adapter_level_zero_v2 PRIVATE
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244>
$<$<CXX_COMPILER_ID:MSVC>:/wd4805 /wd4244 /wd4100 /wd4267>
)

set_target_properties(ur_adapter_level_zero_v2 PROPERTIES
VERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH}"
SOVERSION "${PROJECT_VERSION_MAJOR}"
)

if (WIN32)
# 0x800: Search for the DLL only in the System32 folder
target_link_options(ur_adapter_level_zero_v2 PUBLIC LINKER:/DEPENDENTLOADFLAG:0x800)
if(CMAKE_CXX_COMPILER_LINKER_ID MATCHES MSVC)
# 0x800: Search for the DLL only in the System32 folder
target_link_options(ur_adapter_level_zero_v2 PUBLIC LINKER:/DEPENDENTLOADFLAG:0x800)
endif()

target_link_libraries(ur_adapter_level_zero_v2 PRIVATE
Expand Down
2 changes: 0 additions & 2 deletions source/adapters/level_zero/adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,5 @@ ur_result_t urAdapterGetInfo(ur_adapter_handle_t, ur_adapter_info_t PropName,
default:
return UR_RESULT_ERROR_INVALID_ENUMERATION;
}

return UR_RESULT_SUCCESS;
}
} // namespace ur::level_zero
2 changes: 1 addition & 1 deletion source/adapters/level_zero/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList(
// queue's map to hold the fence and other associated command
// list information.
auto &QGroup = Queue->getQueueGroup(UseCopyEngine);
uint32_t QueueGroupOrdinal;
uint32_t QueueGroupOrdinal = 0;
auto &ZeCommandQueue = ForcedCmdQueue
? *ForcedCmdQueue
: QGroup.getZeQueue(&QueueGroupOrdinal);
Expand Down
Loading
Loading