diff --git a/src/ATen/native/xpu/ForeachOpList.cpp b/src/ATen/native/xpu/ForeachOpList.cpp index 6813a91ae..73f23f39e 100644 --- a/src/ATen/native/xpu/ForeachOpList.cpp +++ b/src/ATen/native/xpu/ForeachOpList.cpp @@ -1,4 +1,11 @@ #include +#include +#include +#include +#include +#include +#include + #include #include #include @@ -8,29 +15,6 @@ namespace at { namespace native { -::std::vector foreach_tensor_mul_list_kernel_slow( - at::TensorList self, - at::TensorList other); -void foreach_tensor_mul_list_kernel_slow_( - at::TensorList self, - at::TensorList other); - -::std::vector foreach_tensor_div_list_kernel_slow( - at::TensorList self, - at::TensorList other); -void foreach_tensor_div_list_kernel_slow_( - at::TensorList self, - at::TensorList other); - -::std::vector foreach_tensor_add_list_kernel_slow( - at::TensorList self, - at::TensorList other, - const at::Scalar& alpha); -void foreach_tensor_add_list_kernel_slow_( - at::TensorList self, - at::TensorList other, - const at::Scalar& alpha); - #define FOREACH_BINARY_OP_LIST(NAME, DIVISION_OP) \ void foreach_tensor_##NAME##_list_kernel_xpu_( \ TensorList tensors1, TensorList tensors2) { \ @@ -81,28 +65,6 @@ FOREACH_BINARY_OP_LIST_ALPHA(add); FOREACH_BINARY_OP_LIST(mul, false); FOREACH_BINARY_OP_LIST(div, true); -::std::vector foreach_tensor_addcmul_scalarlist_slow( - at::TensorList self, - at::TensorList tensor1, - at::TensorList tensor2, - at::ArrayRef scalars); -void foreach_tensor_addcmul_scalarlist_slow_( - at::TensorList self, - at::TensorList tensor1, - at::TensorList tensor2, - at::ArrayRef scalars); - -::std::vector foreach_tensor_addcdiv_scalarlist_slow( - at::TensorList self, - at::TensorList tensor1, - at::TensorList tensor2, - at::ArrayRef scalars); -void foreach_tensor_addcdiv_scalarlist_slow_( - at::TensorList self, - at::TensorList tensor1, - at::TensorList tensor2, - at::ArrayRef scalars); - #define FOREACH_POINTWISE_OP_TENSOR(NAME) \ std::vector foreach_tensor_##NAME##_list_kernel_xpu( \ TensorList input, \ @@ -142,11 +104,6 @@ void foreach_tensor_addcdiv_scalarlist_slow_( FOREACH_POINTWISE_OP_TENSOR(addcmul) FOREACH_POINTWISE_OP_TENSOR(addcdiv) -::std::vector foreach_tensor_ternary_lerp_slow( - at::TensorList self, - at::TensorList tensors1, - at::TensorList weights); - std::vector foreach_tensor_lerp_ternary_xpu( TensorList tensors1, TensorList tensors2, @@ -166,11 +123,6 @@ std::vector foreach_tensor_lerp_ternary_xpu( return vec_res; } -void foreach_tensor_ternary_lerp_slow_( - at::TensorList self, - at::TensorList tensors1, - at::TensorList weights); - void foreach_tensor_lerp_ternary_xpu_( TensorList tensors1, TensorList tensors2, diff --git a/src/ATen/native/xpu/ForeachOpScalar.cpp b/src/ATen/native/xpu/ForeachOpScalar.cpp index 46b908ced..5a581a289 100644 --- a/src/ATen/native/xpu/ForeachOpScalar.cpp +++ b/src/ATen/native/xpu/ForeachOpScalar.cpp @@ -1,34 +1,18 @@ #include +#include +#include +#include +#include +#include +#include #include #include #include namespace at { - namespace native { -::std::vector foreach_tensor_add_scalar_kernel_slow( - at::TensorList self, - const at::Scalar& scalar); -void foreach_tensor_add_scalar_kernel_slow_( - at::TensorList self, - const at::Scalar& scalar); - -::std::vector foreach_tensor_mul_scalar_kernel_slow( - at::TensorList self, - const at::Scalar& scalar); -void foreach_tensor_mul_scalar_kernel_slow_( - at::TensorList self, - const at::Scalar& scalar); - -::std::vector foreach_tensor_div_scalar_kernel_slow( - at::TensorList self, - const at::Scalar& scalar); -void foreach_tensor_div_scalar_kernel_slow_( - at::TensorList self, - const at::Scalar& scalar); - #define FOREACH_BINARY_OP_SCALAR(NAME, DIV_OP) \ void foreach_tensor_##NAME##_scalar_kernel_xpu_( \ TensorList tensors, const Scalar& scalar) { \ @@ -54,28 +38,6 @@ FOREACH_BINARY_OP_SCALAR(add, /*div_op*/ false); FOREACH_BINARY_OP_SCALAR(mul, /*div_op*/ false); FOREACH_BINARY_OP_SCALAR(div, /*div_op*/ true); -::std::vector foreach_tensor_addcmul_scalar_slow( - at::TensorList self, - at::TensorList tensor1, - at::TensorList tensor2, - const at::Scalar& value); -void foreach_tensor_addcmul_scalar_slow_( - at::TensorList self, - at::TensorList tensor1, - at::TensorList tensor2, - const at::Scalar& value); - -::std::vector foreach_tensor_addcdiv_scalar_slow( - at::TensorList self, - at::TensorList tensor1, - at::TensorList tensor2, - const at::Scalar& value); -void foreach_tensor_addcdiv_scalar_slow_( - at::TensorList self, - at::TensorList tensor1, - at::TensorList tensor2, - const at::Scalar& value); - #define FOREACH_POINTWISE_OP_SCALAR(NAME) \ std::vector foreach_tensor_##NAME##_scalar_xpu( \ TensorList input, \ @@ -112,15 +74,6 @@ void foreach_tensor_addcdiv_scalar_slow_( FOREACH_POINTWISE_OP_SCALAR(addcmul) FOREACH_POINTWISE_OP_SCALAR(addcdiv) -::std::vector foreach_tensor_lerp_list_kernel_slow( - at::TensorList self, - at::TensorList tensors1, - const at::Scalar& weight); -void foreach_tensor_lerp_list_kernel_slow_( - at::TensorList self, - at::TensorList tensors1, - const at::Scalar& weight); - std::vector foreach_tensor_lerp_list_xpu( TensorList tensors1, TensorList tensors2, diff --git a/src/ATen/native/xpu/ForeachOpScalarList.cpp b/src/ATen/native/xpu/ForeachOpScalarList.cpp index 6ac047476..1433e08bd 100644 --- a/src/ATen/native/xpu/ForeachOpScalarList.cpp +++ b/src/ATen/native/xpu/ForeachOpScalarList.cpp @@ -1,4 +1,9 @@ #include +#include +#include +#include +#include +#include #include #include @@ -8,44 +13,6 @@ namespace at { namespace native { -::std::vector foreach_tensor_add_scalar_kernel_slow( - at::TensorList self, - const at::Scalar& scalar); -void foreach_tensor_add_scalar_kernel_slow_( - at::TensorList self, - const at::Scalar& scalar); -::std::vector foreach_tensor_mul_scalar_kernel_slow( - at::TensorList self, - const at::Scalar& scalar); -void foreach_tensor_mul_scalar_kernel_slow_( - at::TensorList self, - const at::Scalar& scalar); - -::std::vector foreach_tensor_add_scalarlist_kernel_slow( - at::TensorList self, - at::ArrayRef scalars); -void foreach_tensor_add_scalarlist_kernel_slow_( - at::TensorList self, - at::ArrayRef scalars); -::std::vector foreach_tensor_mul_scalarlist_kernel_slow( - at::TensorList self, - at::ArrayRef scalars); -void foreach_tensor_mul_scalarlist_kernel_slow_( - at::TensorList self, - at::ArrayRef scalars); - -::std::vector foreach_tensor_div_scalar_kernel_slow( - at::TensorList self, - const at::Scalar& scalar); -void foreach_tensor_div_scalar_kernel_slow_( - at::TensorList self, - const at::Scalar& scalar); -::std::vector foreach_tensor_div_scalarlist_kernel_slow( - at::TensorList self, - at::ArrayRef scalars); -void foreach_tensor_div_scalarlist_kernel_slow_( - at::TensorList self, - at::ArrayRef scalars); #define FOREACH_BINARY_OP_SCALARLIST(NAME, DIV_OP) \ void foreach_tensor_##NAME##_scalar_kernel_xpu_( \ @@ -74,47 +41,6 @@ FOREACH_BINARY_OP_SCALARLIST(add, /*div_op*/ false); FOREACH_BINARY_OP_SCALARLIST(mul, /*div_op*/ false); FOREACH_BINARY_OP_SCALARLIST(div, /*div_op*/ true); -void foreach_tensor_addcmul_scalar_slow_( - at::TensorList self, - at::TensorList tensor1, - at::TensorList tensor2, - const at::Scalar& value = 1); -::std::vector foreach_tensor_addcmul_scalar_slow( - at::TensorList self, - at::TensorList tensor1, - at::TensorList tensor2, - const at::Scalar& value = 1); -::std::vector foreach_tensor_addcmul_scalarlist_slow( - at::TensorList self, - at::TensorList tensor1, - at::TensorList tensor2, - at::ArrayRef scalars); -void foreach_tensor_addcmul_scalarlist_slow_( - at::TensorList self, - at::TensorList tensor1, - at::TensorList tensor2, - at::ArrayRef scalars); -void foreach_tensor_addcdiv_scalar_slow_( - at::TensorList self, - at::TensorList tensor1, - at::TensorList tensor2, - const at::Scalar& value = 1); -::std::vector foreach_tensor_addcdiv_scalar_slow( - at::TensorList self, - at::TensorList tensor1, - at::TensorList tensor2, - const at::Scalar& value = 1); -::std::vector foreach_tensor_addcdiv_scalarlist_slow( - at::TensorList self, - at::TensorList tensor1, - at::TensorList tensor2, - at::ArrayRef scalars); -void foreach_tensor_addcdiv_scalarlist_slow_( - at::TensorList self, - at::TensorList tensor1, - at::TensorList tensor2, - at::ArrayRef scalars); - #define FOREACH_POINTWISE_OP_SCALARLIST(NAME) \ std::vector foreach_tensor_##NAME##_scalarlist_xpu( \ TensorList input, \ diff --git a/src/ATen/native/xpu/ForeachUnaryOp.cpp b/src/ATen/native/xpu/ForeachUnaryOp.cpp index 89cd0ab4e..4492d8313 100644 --- a/src/ATen/native/xpu/ForeachUnaryOp.cpp +++ b/src/ATen/native/xpu/ForeachUnaryOp.cpp @@ -1,4 +1,6 @@ #include +#include + #include namespace at { @@ -6,9 +8,6 @@ namespace native { // given a functor and a "dispatch function", creates the outplace and inplace // operations -::std::vector foreach_tensor_sqrt_slow(at::TensorList self); -void foreach_tensor_sqrt_slow_(at::TensorList self); - #define FOREACH_UNARY_OP(op_name) \ std::vector foreach_tensor_##op_name##_xpu(TensorList tensors) { \ check_foreach_api_restrictions(tensors); \ diff --git a/src/ATen/native/xpu/Nonzero.cpp b/src/ATen/native/xpu/Nonzero.cpp index deb646f6c..9988631d3 100644 --- a/src/ATen/native/xpu/Nonzero.cpp +++ b/src/ATen/native/xpu/Nonzero.cpp @@ -5,7 +5,7 @@ #include namespace at { -namespace native{ +namespace native { Tensor& nonzero_out_xpu(const Tensor& self, Tensor& out) { TORCH_CHECK( self.numel() < std::numeric_limits::max(), @@ -38,5 +38,5 @@ Tensor nonzero_xpu(const Tensor& self) { nonzero_out_xpu(self, out); return out; } -} +} // namespace native } // namespace at \ No newline at end of file diff --git a/src/ATen/native/xpu/RangeFactories.cpp b/src/ATen/native/xpu/RangeFactories.cpp index ad0a6ffc6..c9376f177 100644 --- a/src/ATen/native/xpu/RangeFactories.cpp +++ b/src/ATen/native/xpu/RangeFactories.cpp @@ -31,7 +31,8 @@ Tensor& arange_out_xpu( TORCH_CHECK(xstep > 0 || xstep < 0, "step must be nonzero"); TORCH_CHECK( - std::isfinite(static_cast(xstart)) && std::isfinite(static_cast(xend)), + std::isfinite(static_cast(xstart)) && + std::isfinite(static_cast(xend)), "unsupported range: ", xstart, " -> ", @@ -99,7 +100,8 @@ Tensor& range_xpu_out( TORCH_CHECK(xstep > 0 || xstep < 0, "step must be nonzero"); TORCH_CHECK( - std::isfinite(static_cast(xstart)) && std::isfinite(static_cast(xend)), + std::isfinite(static_cast(xstart)) && + std::isfinite(static_cast(xend)), "unsupported range: ", xstart, " -> ", diff --git a/src/ATen/native/xpu/sycl/ActivationGluKernels.cpp b/src/ATen/native/xpu/sycl/ActivationGluKernels.cpp index f60613ec7..fd1b966d7 100644 --- a/src/ATen/native/xpu/sycl/ActivationGluKernels.cpp +++ b/src/ATen/native/xpu/sycl/ActivationGluKernels.cpp @@ -2,7 +2,6 @@ #include #include -#include #include #include diff --git a/src/ATen/native/xpu/sycl/ForeachTernaryKernels.cpp b/src/ATen/native/xpu/sycl/ForeachTernaryKernels.cpp index 21168f8a3..ed8e01653 100644 --- a/src/ATen/native/xpu/sycl/ForeachTernaryKernels.cpp +++ b/src/ATen/native/xpu/sycl/ForeachTernaryKernels.cpp @@ -5,8 +5,8 @@ #include #include -#include #include +#include namespace at::native::xpu { diff --git a/src/ATen/native/xpu/sycl/PointwiseOpsKernels.h b/src/ATen/native/xpu/sycl/PointwiseOpsKernels.h index 230b693f5..04f2021ea 100644 --- a/src/ATen/native/xpu/sycl/PointwiseOpsKernels.h +++ b/src/ATen/native/xpu/sycl/PointwiseOpsKernels.h @@ -4,9 +4,13 @@ namespace at::native::xpu { -TORCH_XPU_API void addcmul_kernel(TensorIteratorBase& iter, const Scalar& value); +TORCH_XPU_API void addcmul_kernel( + TensorIteratorBase& iter, + const Scalar& value); -TORCH_XPU_API void addcdiv_kernel(TensorIteratorBase& iter, const Scalar& value); +TORCH_XPU_API void addcdiv_kernel( + TensorIteratorBase& iter, + const Scalar& value); TORCH_XPU_API void mse_backward_kernel( TensorIterator& iter, diff --git a/src/ATen/native/xpu/sycl/PowKernels.cpp b/src/ATen/native/xpu/sycl/PowKernels.cpp index e080511d2..7b19fa4db 100644 --- a/src/ATen/native/xpu/sycl/PowKernels.cpp +++ b/src/ATen/native/xpu/sycl/PowKernels.cpp @@ -38,7 +38,8 @@ static inline c10::complex pow_(c10::complex base, c10::complex exp) { } // namespace impl #ifdef _MSC_VER -// Divergence for MSVC due to accuracy issue. https://github.com/intel/torch-xpu-ops/issues/842. +// Divergence for MSVC due to accuracy issue. +// https://github.com/intel/torch-xpu-ops/issues/842. template struct PowTensorTensorCastFunctor { using opmath_t = at::opmath_type; diff --git a/src/ATen/native/xpu/sycl/ReduceNormKernel.cpp b/src/ATen/native/xpu/sycl/ReduceNormKernel.cpp index ef405be49..4527d51bd 100644 --- a/src/ATen/native/xpu/sycl/ReduceNormKernel.cpp +++ b/src/ATen/native/xpu/sycl/ReduceNormKernel.cpp @@ -1,8 +1,8 @@ #include #include -#include #include +#include #include diff --git a/src/ATen/native/xpu/sycl/Sorting.cpp b/src/ATen/native/xpu/sycl/Sorting.cpp index cf41810dc..05fba0bb9 100644 --- a/src/ATen/native/xpu/sycl/Sorting.cpp +++ b/src/ATen/native/xpu/sycl/Sorting.cpp @@ -16,8 +16,8 @@ #include #include -#include #include +#include namespace at::native::xpu { diff --git a/src/bridge.cpp b/src/bridge.cpp index c19ce2554..714a91103 100644 --- a/src/bridge.cpp +++ b/src/bridge.cpp @@ -20,10 +20,11 @@ class LoadTorchXPUOps { LoadTorchXPUOps() { if (NULL == LoadLibrary(PATH_TO_TORCH_XPU_OPS_ATEN_LIB)) { std::ostringstream error; - error << "PyTorch XPU operators library is not loaded (ERROR: " - << GetLastError() - << "). Please check if PyTorch is installed correctly." - << " Or please file an issue on https://github.com/intel/torch-xpu-ops/issues."; + error + << "PyTorch XPU operators library is not loaded (ERROR: " + << GetLastError() + << "). Please check if PyTorch is installed correctly." + << " Or please file an issue on https://github.com/intel/torch-xpu-ops/issues."; throw std::runtime_error(error.str()); } }