Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cuda 12.0 Failed build #136

Open
Maxfashko opened this issue Apr 18, 2024 · 3 comments
Open

Cuda 12.0 Failed build #136

Maxfashko opened this issue Apr 18, 2024 · 3 comments

Comments

@Maxfashko
Copy link

#16 42.54 [ 86%] Building CUDA object CMakeFiles/pplcv_static.dir/src/ppl/cv/cuda/sepfilter2d.cu.o
#16 42.72 /opt/ppl.cv/src/ppl/cv/cuda/norm.hpp(687): warning #173-D: floating-point value does not fit in required integral type
#16 42.72 partial_mins[threadIdx_x] = 3.40282346638528859811704183484516925e+38F;
#16 42.72 ^
#16 42.72 detected during instantiation of "void ppl::cv::cuda::MinMaxKernel(const Tsrc *, int, int, int, int, const ppl::cv::cuda::uchar *, int, int, Tdst *) [with Tsrc=ppl::cv::cuda::uchar, Tdst=long long]" at line 207 of /opt/ppl.cv/src/ppl/cv/cuda/normalize.cu
#16 42.72
#16 42.72 Remark: The warnings can be suppressed with "-diag-suppress "
#16 42.72
#16 42.72 /opt/ppl.cv/src/ppl/cv/cuda/norm.hpp(832): warning #173-D: floating-point value does not fit in required integral type
#16 42.72 partial_mins[threadIdx_x] = 3.40282346638528859811704183484516925e+38F;
#16 42.72 ^
#16 42.72 detected during instantiation of "void ppl::cv::cuda::MinMaxKernel(const Tsrc *, int, int, int, int, const ppl::cv::cuda::uchar *, int, int, Tdst *) [with Tsrc=ppl::cv::cuda::uchar, Tdst=long long]" at line 207 of /opt/ppl.cv/src/ppl/cv/cuda/normalize.cu
#16 42.72
#16 42.77 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(32): error: texture is not a template
#16 42.77 static texture<uchar, 0x02,
#16 42.77 ^
#16 42.77
#16 42.77 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(34): error: texture is not a template
#16 42.77 static texture<uchar4, 0x02,
#16 42.77 ^
#16 42.77
#16 42.77 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(36): error: texture is not a template
#16 42.77 static texture<float, 0x02,
#16 42.77 ^
#16 42.77
#16 42.77 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(126): error: no instance of overloaded function "tex2D" matches the argument list
#16 42.77 argument types are: (, float, float)
#16 42.77 float value = tex2D(uchar_c1_ref, coordinate_x, coordinate_y);
#16 42.77 ^
#16 42.77
#16 42.77 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(133): error: no instance of overloaded function "tex2D" matches the argument list
#16 42.77 argument types are: (, float, float)
#16 42.77 float4 value = tex2D(uchar_c4_ref, coordinate_x, coordinate_y);
#16 42.77 ^
#16 42.77
#16 42.77 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(158): error: no instance of overloaded function "tex2D" matches the argument list
#16 42.77 argument types are: (, float, float)
#16 42.77 float value = tex2D(float_c1_ref, coordinate_x, coordinate_y);
#16 42.77 ^
#16 42.77
#16 42.78 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(767): error: no instance of overloaded function "tex2D" matches the argument list
#16 42.78 argument types are: (, float, float)
#16 42.78 float value = tex2D(uchar_c1_ref, float_x, float_y);
#16 42.78 ^
#16 42.78
#16 42.78 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(774): error: no instance of overloaded function "tex2D" matches the argument list
#16 42.78 argument types are: (, float, float)
#16 42.78 float4 value = tex2D(uchar_c4_ref, float_x, float_y);
#16 42.78 ^
#16 42.78
#16 42.78 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(808): error: no instance of overloaded function "tex2D" matches the argument list
#16 42.78 argument types are: (, float, float)
#16 42.78 float value = tex2D(float_c1_ref, float_x, float_y);
#16 42.78 ^
#16 42.78
#16 42.79 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(1069): error: identifier "cudaBindTexture2D" is undefined
#16 42.79 code = cudaBindTexture2D(0, uchar_c1_ref, src, desc, src_cols, src_rows,
#16 42.79 ^
#16 42.79
#16 42.79 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(1085): error: identifier "cudaBindTexture2D" is undefined
#16 42.79 code = cudaBindTexture2D(0, uchar_c4_ref, src, desc, src_cols, src_rows,
#16 42.79 ^
#16 42.79
#16 42.79 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(1162): error: identifier "cudaBindTexture2D" is undefined
#16 42.79 code = cudaBindTexture2D(0, uchar_c1_ref, src, desc, src_cols, src_rows,
#16 42.79 ^
#16 42.79
#16 42.79 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(1179): error: identifier "cudaBindTexture2D" is undefined
#16 42.79 code = cudaBindTexture2D(0, uchar_c4_ref, src, desc, src_cols, src_rows,
#16 42.79 ^
#16 42.79
#16 42.80 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(1264): error: identifier "cudaBindTexture2D" is undefined
#16 42.80 code = cudaBindTexture2D(0, float_c1_ref, src, desc, src_cols, src_rows,
#16 42.80 ^
#16 42.80
#16 42.80 /opt/ppl.cv/src/ppl/cv/cuda/resize.cu(1341): error: identifier "cudaBindTexture2D" is undefined
#16 42.80 code = cudaBindTexture2D(0, float_c1_ref, src, desc, src_cols, src_rows,
#16 42.80 ^
#16 42.80
#16 42.81 15 errors detected in the compilation of "/opt/ppl.cv/src/ppl/cv/cuda/resize.cu".
#16 42.84 make[2]: *** [CMakeFiles/pplcv_static.dir/build.make:585: CMakeFiles/pplcv_static.dir/src/ppl/cv/cuda/resize.cu.o] Error 2
#16 42.84 make[2]: *** Waiting for unfinished jobs....
#16 44.18 /opt/ppl.cv/src/ppl/cv/cuda/norm.hpp(687): warning #173-D: floating-point value does not fit in required integral type
#16 44.18 partial_mins[threadIdx_x] = 3.40282346638528859811704183484516925e+38F;

@jimurk
Copy link
Collaborator

jimurk commented May 20, 2024

Some operators in ppl.cv used texture reference APIs which is no longer supported in CUDA 12, you can remove these code and use existing ordinary cuda implementation since they were used for performance.

@pieris98
Copy link

I've made this PR. As mentioned in the PR, this just builds for my setup (RTX3090, sm86 compute capability, CUDA toolkit version 12.2) but I haven't tested for correctness yet.
To make the cmake build work in my case I also had to dirty code in my sm86 and comment out some older sm compute capabilities which were apparently not supported for CUDA 12 and gave errors. This is not perfect but did the job. Here's my cuda.cmake file:

include(${HPCC_DEPS_DIR}/hpcc/cmake/cuda-common.cmake)

if(PPLCV_USE_MSVC_STATIC_RUNTIME)
    hpcc_cuda_use_msvc_static_runtime()
endif()

set(_NVCC_FLAGS )
#set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_35,code=sm_35")
#set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_37,code=sm_37")
#set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_50,code=sm_50")
#set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_53,code=sm_53")
#if(CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "8")
#    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_60,code=sm_60")
#    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_61,code=sm_61")
#    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_62,code=sm_62")
#endif()
#if(CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "9")
#    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_70,code=sm_70")
#endif()
#if(CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "10")
#    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_72,code=sm_72")
#    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_75,code=sm_75")
#endif()
if (CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "11")
    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_80,code=sm_80")
    if (CUDA_VERSION_MINOR VERSION_GREATER_EQUAL "1")
        set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_86,code=sm_86")
    endif ()
    #if (CUDA_VERSION_MINOR VERSION_GREATER_EQUAL "4")
    #    set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_87,code=sm_87")
    #endif ()
endif ()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${_NVCC_FLAGS}")

# --------------------------------------------------------------------------- #

file(GLOB PPLCV_CUDA_PUBLIC_HEADERS src/ppl/cv/cuda/*.h)
install(FILES ${PPLCV_CUDA_PUBLIC_HEADERS}
    DESTINATION include/ppl/cv/cuda)

list(APPEND PPLCV_COMPILE_DEFINITIONS PPLCV_USE_CUDA)

file(GLOB PPLCV_CUDA_SRC src/ppl/cv/cuda/*.cpp src/ppl/cv/cuda/utility/*.cpp)
file(GLOB PPLCV_CUDA_CU  src/ppl/cv/cuda/*.cu)
list(APPEND PPLCV_SRC ${PPLCV_CUDA_SRC} ${PPLCV_CUDA_CU})
list(APPEND PPLCV_INCLUDE_DIRECTORIES $<BUILD_INTERFACE:${CUDA_INCLUDE_DIRS}>)
list(APPEND PPLCV_LINK_LIBRARIES $<BUILD_INTERFACE:${CUDA_LIBRARIES}>)

# glog benchmark and unittest sources
file(GLOB PPLCV_CUDA_BENCHMARK_SRC src/ppl/cv/cuda/*_benchmark.cpp)
file(GLOB PPLCV_CUDA_UNITTEST_SRC src/ppl/cv/cuda/*_unittest.cpp)
list(APPEND PPLCV_BENCHMARK_SRC ${PPLCV_CUDA_BENCHMARK_SRC})
list(APPEND PPLCV_UNITTEST_SRC ${PPLCV_CUDA_UNITTEST_SRC})

@woowahan-DoUn
Copy link

@pieris98 Thanks a lot for your code!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

4 participants