diff --git a/meson.build b/meson.build index 868e539194..b6b02d5f3e 100644 --- a/meson.build +++ b/meson.build @@ -127,10 +127,8 @@ if get_option('build_backends') tf_dl_lib = cc.find_library('dl', required: false) tf_tensorflow_lib = cc.find_library('libtensorflow_cc', dirs: tensorflow_libdir, required: false) - tf_protobuf_lib = cc.find_library('libprotobuf', - dirs: tensorflow_libdir, required: false) - if get_option('tensorflow') and tf_dl_lib.found() and tf_tensorflow_lib.found() and tf_protobuf_lib.found() + if get_option('tensorflow') and tf_dl_lib.found() and tf_tensorflow_lib.found() includes += include_directories( tensorflow_include, tensorflow_include[0] + '/bazel-genfiles', @@ -141,7 +139,7 @@ if get_option('build_backends') tensorflow_include[0] + '/tensorflow/contrib/makefile/gen/protobuf-host/include', is_system: true ) - deps += [tf_dl_lib, tf_tensorflow_lib, tf_protobuf_lib] + deps += [tf_dl_lib, tf_tensorflow_lib] files += 'src/neural/network_tf.cc' has_backends = true endif @@ -198,7 +196,14 @@ if get_option('build_backends') deps += [ openblas_lib ] has_blas = true + endif + if has_blas + blas_files = [ + 'src/neural/blas/batchnorm.cc', + 'src/neural/blas/fully_connected_layer.cc', + 'src/neural/blas/winograd_convolution3.cc' + ] endif ispc = find_program('ispc', required: false) @@ -239,11 +244,8 @@ if get_option('build_backends') if get_option('blas') and has_blas - blas_files = [ - 'src/neural/blas/batchnorm.cc', + blas_files += [ 'src/neural/blas/convolution1.cc', - 'src/neural/blas/fully_connected_layer.cc', - 'src/neural/blas/winograd_convolution3.cc', 'src/neural/blas/network_blas.cc' ] @@ -290,7 +292,7 @@ if get_option('build_backends') ] if not get_option('blas') - opencl_files += 'src/neural/blas/transforms.cc' + opencl_files += blas_files endif includes += include_directories(get_option('opencl_include')) @@ -318,7 +320,6 @@ if get_option('build_backends') ] if get_option('cudnn') and cu_blas.found() and cu_dnn.found() and cu_dart.found() and nvcc.found() - includes += include_directories(get_option('cudnn_include')) deps += [cu_blas, cu_dnn, cu_dart] cuda_arguments = ['-c', '@INPUT@', '-o', '@OUTPUT@', '-I', meson.current_source_dir() + '/src'] diff --git a/meson_options.txt b/meson_options.txt index 58bb418af3..708f27674f 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -55,7 +55,7 @@ option('mkl_include', option('cudnn_include', type: 'array', - value: [], + value: ['/usr/local/cuda/include/'], description: 'Paths to cudnn include directory') option('build_backends', diff --git a/src/neural/opencl/cl2.hpp b/src/neural/opencl/cl2.hpp index 8d2df877cf..711b429e9b 100644 --- a/src/neural/opencl/cl2.hpp +++ b/src/neural/opencl/cl2.hpp @@ -357,8 +357,8 @@ cl::unmapSVM(inputB); cl::unmapSVM(output2); - cl_int error; - vectorAddKernel( + cl_int error; + vectorAddKernel( cl::EnqueueArgs( cl::NDRange(numElements/2), cl::NDRange(numElements/2)), @@ -369,7 +369,7 @@ 3, aPipe, defaultDeviceQueue, - error + error ); cl::copy(outputBuffer, begin(output), end(output)); @@ -1139,6 +1139,8 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ @@ -1235,8 +1237,6 @@ inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, string) \ \ F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ @@ -1830,8 +1830,6 @@ class Wrapper cl_type get() const { return object_; } - cl_type get() { return object_; } - protected: template friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); @@ -2744,7 +2742,9 @@ class Context error = platforms[i].getDevices(type, &devices); #if defined(CL_HPP_ENABLE_EXCEPTIONS) - } catch (Error) {} + } catch (cl::Error& e) { + error = e.err(); + } // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type // We do error checking next anyway, and can throw there if needed #endif @@ -3069,7 +3069,7 @@ class Event : public detail::Wrapper */ cl_int setCallback( cl_int type, - void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), + void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), void * user_data = NULL) { return detail::errHandler( @@ -3258,7 +3258,7 @@ class Memory : public detail::Wrapper * value - not the Memory class instance. */ cl_int setDestructorCallback( - void (CL_CALLBACK * pfn_notify)(cl_mem, void *), + void (CL_CALLBACK * pfn_notify)(cl_mem, void *), void * user_data = NULL) { return detail::errHandler( @@ -3849,7 +3849,7 @@ class Buffer : public Memory } return result; - } + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 }; @@ -4269,7 +4269,7 @@ class Image1D : public Image { CL_MEM_OBJECT_IMAGE1D, width, - 0, 0, 0, 0, 0, 0, 0, {0} + 0, 0, 0, 0, 0, 0, 0, 0 }; object_ = ::clCreateImage( context(), @@ -4358,7 +4358,7 @@ class Image1DBuffer : public Image CL_MEM_OBJECT_IMAGE1D_BUFFER, width, 0, 0, 0, 0, 0, 0, 0, - {buffer()} + buffer() }; object_ = ::clCreateImage( context(), @@ -4446,7 +4446,7 @@ class Image1DArray : public Image 0, 0, // height, depth (unused) arraySize, rowPitch, - 0, 0, 0, {0} + 0, 0, 0, 0 }; object_ = ::clCreateImage( context(), @@ -4561,7 +4561,7 @@ class Image2D : public Image height, 0, 0, // depth, array size (unused) row_pitch, - 0, 0, 0, {0} + 0, 0, 0, 0 }; object_ = ::clCreateImage( context(), @@ -4879,7 +4879,7 @@ class Image2DArray : public Image arraySize, rowPitch, slicePitch, - 0, 0, {0} + 0, 0, 0 }; object_ = ::clCreateImage( context(), @@ -4994,7 +4994,7 @@ class Image3D : public Image 0, // array size (unused) row_pitch, slice_pitch, - 0, 0, {0} + 0, 0, 0 }; object_ = ::clCreateImage( context(), @@ -5927,27 +5927,28 @@ class Kernel : public detail::Wrapper ); } - template - void setSVMPointersHelper(std::array &pointerList, const pointer &t0, Ts... ts) + template + void setSVMPointersHelper(std::array &pointerList, const pointer &t0, const pointer &t1, Ts & ... ts) { pointerList[index] = static_cast(t0.get()); - setSVMPointersHelper(ts...); + setSVMPointersHelper(pointerList, t1, ts...); } - template + template typename std::enable_if::value, void>::type - setSVMPointersHelper(std::array &pointerList, T0 t0, Ts... ts) + setSVMPointersHelper(std::array &pointerList, T0 t0, T1 t1, Ts... ts) { pointerList[index] = static_cast(t0); - setSVMPointersHelper(ts...); + setSVMPointersHelper(pointerList, t1, ts...); } - + template void setSVMPointersHelper(std::array &pointerList, const pointer &t0) { pointerList[index] = static_cast(t0.get()); } + template typename std::enable_if::value, void>::type setSVMPointersHelper(std::array &pointerList, T0 t0) @@ -5956,7 +5957,7 @@ class Kernel : public detail::Wrapper } template - cl_int setSVMPointers(const T0 &t0, Ts... ts) + cl_int setSVMPointers(const T0 &t0, Ts & ... ts) { std::array pointerList; @@ -7208,7 +7209,7 @@ class CommandQueue : public detail::Wrapper return err; } - +#if CL_HPP_TARGET_OPENCL_VERSION >= 110 cl_int enqueueReadBufferRect( const Buffer& buffer, cl_bool blocking, @@ -7323,7 +7324,7 @@ class CommandQueue : public detail::Wrapper return err; } - +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 #if CL_HPP_TARGET_OPENCL_VERSION >= 120 /** * Enqueue a command to fill a buffer object with a pattern @@ -9533,7 +9534,7 @@ class KernelFunctor } template - cl_int setSVMPointers(const T0 &t0, T1s... ts) + cl_int setSVMPointers(const T0 &t0, T1s &... ts) { return kernel_.setSVMPointers(t0, ts...); } @@ -9675,4 +9676,4 @@ namespace compatibility { } // namespace cl -#endif // CL_HPP_ \ No newline at end of file +#endif // CL_HPP_