From 78575b61a770ad3ee153a7ac440f21bceec24fc5 Mon Sep 17 00:00:00 2001 From: hjhee Date: Fri, 12 Jul 2024 13:03:52 +0800 Subject: [PATCH] Add aten::copysign, aten::count_nonzero (#481) - copysign.out - copysign.Tensor - copysign_.Tensor - copysign.Scalar - copysign_.Scalar - copysign.Scalar_out - count_nonzero.dim_IntList - count_nonzero --------- Co-authored-by: Feng Yuan --- src/ATen/native/xpu/BinaryOps.cpp | 23 ++++++++++++++++++ src/ATen/native/xpu/Indexing.cpp | 1 + .../native/xpu/TensorAdvancedIndexing.cpp | 4 ++++ src/ATen/native/xpu/XPUFallback.template | 2 -- src/ATen/native/xpu/sycl/CopysignKernel.cpp | 24 +++++++++++++++++++ src/ATen/native/xpu/sycl/CopysignKernel.h | 9 +++++++ test/xpu/xpu_test_utils.py | 2 ++ yaml/xpu_functions.yaml | 4 ++++ 8 files changed, 67 insertions(+), 2 deletions(-) create mode 100644 src/ATen/native/xpu/sycl/CopysignKernel.cpp create mode 100644 src/ATen/native/xpu/sycl/CopysignKernel.h diff --git a/src/ATen/native/xpu/BinaryOps.cpp b/src/ATen/native/xpu/BinaryOps.cpp index 02a22b11a..2ec722722 100644 --- a/src/ATen/native/xpu/BinaryOps.cpp +++ b/src/ATen/native/xpu/BinaryOps.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -502,4 +503,26 @@ Tensor& XPUNativeFunctions::atan2_out( return out; } +Tensor& XPUNativeFunctions::copysign_out( + const Tensor& self, + const Tensor& other, + Tensor& out) { + TensorIterator iter; + iter.build_borrowing_binary_float_op(out, self, other); + native::xpu::copysign_kernel(iter); + return out; +} + +Tensor& XPUNativeFunctions::copysign_(Tensor& self, const Tensor& other) { + return XPUNativeFunctions::copysign_out(self, other, self); +} + +Tensor XPUNativeFunctions::copysign(const Tensor& self, const Tensor& other) { + Tensor out; + TensorIterator iter; + iter.build_borrowing_binary_float_op(out, self, other); + native::xpu::copysign_kernel(iter); + return iter.output(); +} + } // namespace at diff --git a/src/ATen/native/xpu/Indexing.cpp b/src/ATen/native/xpu/Indexing.cpp index e80bee8ff..7b56ffc16 100644 --- a/src/ATen/native/xpu/Indexing.cpp +++ b/src/ATen/native/xpu/Indexing.cpp @@ -43,4 +43,5 @@ Tensor XPUNativeFunctions::index_select( auto out = at::empty({0}, self.options()); return index_select_out(self, dim, index, out); } + } // namespace at diff --git a/src/ATen/native/xpu/TensorAdvancedIndexing.cpp b/src/ATen/native/xpu/TensorAdvancedIndexing.cpp index 62bbd353d..a3b6d8c0e 100644 --- a/src/ATen/native/xpu/TensorAdvancedIndexing.cpp +++ b/src/ATen/native/xpu/TensorAdvancedIndexing.cpp @@ -1396,4 +1396,8 @@ Tensor& XPUNativeFunctions::gather_out( return out; } +Tensor XPUNativeFunctions::count_nonzero(const Tensor& self, IntArrayRef dims) { + return (self != 0).sum(dims); +} + } // namespace at diff --git a/src/ATen/native/xpu/XPUFallback.template b/src/ATen/native/xpu/XPUFallback.template index 2d109b32a..f1b861881 100644 --- a/src/ATen/native/xpu/XPUFallback.template +++ b/src/ATen/native/xpu/XPUFallback.template @@ -174,8 +174,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) { "cholesky", "cholesky_inverse", "_cholesky_solve_helper", - "copysign.out", - "count_nonzero.dim_IntList", "_ctc_loss", "_ctc_loss_backward", "_cummax_helper", diff --git a/src/ATen/native/xpu/sycl/CopysignKernel.cpp b/src/ATen/native/xpu/sycl/CopysignKernel.cpp new file mode 100644 index 000000000..3b8351abf --- /dev/null +++ b/src/ATen/native/xpu/sycl/CopysignKernel.cpp @@ -0,0 +1,24 @@ +#include +#include + +#include + +namespace at::native::xpu { + +template +struct CopysignFunctor { + scalar_t operator()(scalar_t a, scalar_t b) const { + return std::copysign(a, b); + } +}; + +void copysign_kernel(TensorIteratorBase& iter) { + AT_DISPATCH_FLOATING_TYPES_AND2( + at::ScalarType::Half, + at::ScalarType::BFloat16, + iter.common_dtype(), + "copysign_xpu", + [&]() { gpu_kernel_with_scalars(iter, CopysignFunctor()); }); +} + +} // namespace at::native::xpu diff --git a/src/ATen/native/xpu/sycl/CopysignKernel.h b/src/ATen/native/xpu/sycl/CopysignKernel.h new file mode 100644 index 000000000..cf856728c --- /dev/null +++ b/src/ATen/native/xpu/sycl/CopysignKernel.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace at::native::xpu { + +void copysign_kernel(TensorIteratorBase& iter); + +} // namespace at::native::xpu diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py index e68df6e1e..16031eda2 100644 --- a/test/xpu/xpu_test_utils.py +++ b/test/xpu/xpu_test_utils.py @@ -163,6 +163,8 @@ "renorm", "lerp", "conj_physical", + "copysign", + "count_nonzero" ] diff --git a/yaml/xpu_functions.yaml b/yaml/xpu_functions.yaml index a216619cb..f17a99051 100644 --- a/yaml/xpu_functions.yaml +++ b/yaml/xpu_functions.yaml @@ -532,6 +532,10 @@ supported: - randperm.generator_out - _amp_foreach_non_finite_check_and_unscale_ - _amp_update_scale_ + - copysign.out + - copysign.Tensor + - copysign_.Tensor + - count_nonzero.dim_IntList - conj_physical.out - conj_physical_ - ceil