diff --git a/src/ATen/native/xpu/BinaryOps.cpp b/src/ATen/native/xpu/BinaryOps.cpp index 02a22b11a..2ec722722 100644 --- a/src/ATen/native/xpu/BinaryOps.cpp +++ b/src/ATen/native/xpu/BinaryOps.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -502,4 +503,26 @@ Tensor& XPUNativeFunctions::atan2_out( return out; } +Tensor& XPUNativeFunctions::copysign_out( + const Tensor& self, + const Tensor& other, + Tensor& out) { + TensorIterator iter; + iter.build_borrowing_binary_float_op(out, self, other); + native::xpu::copysign_kernel(iter); + return out; +} + +Tensor& XPUNativeFunctions::copysign_(Tensor& self, const Tensor& other) { + return XPUNativeFunctions::copysign_out(self, other, self); +} + +Tensor XPUNativeFunctions::copysign(const Tensor& self, const Tensor& other) { + Tensor out; + TensorIterator iter; + iter.build_borrowing_binary_float_op(out, self, other); + native::xpu::copysign_kernel(iter); + return iter.output(); +} + } // namespace at diff --git a/src/ATen/native/xpu/Indexing.cpp b/src/ATen/native/xpu/Indexing.cpp index e80bee8ff..7b56ffc16 100644 --- a/src/ATen/native/xpu/Indexing.cpp +++ b/src/ATen/native/xpu/Indexing.cpp @@ -43,4 +43,5 @@ Tensor XPUNativeFunctions::index_select( auto out = at::empty({0}, self.options()); return index_select_out(self, dim, index, out); } + } // namespace at diff --git a/src/ATen/native/xpu/TensorAdvancedIndexing.cpp b/src/ATen/native/xpu/TensorAdvancedIndexing.cpp index 62bbd353d..a3b6d8c0e 100644 --- a/src/ATen/native/xpu/TensorAdvancedIndexing.cpp +++ b/src/ATen/native/xpu/TensorAdvancedIndexing.cpp @@ -1396,4 +1396,8 @@ Tensor& XPUNativeFunctions::gather_out( return out; } +Tensor XPUNativeFunctions::count_nonzero(const Tensor& self, IntArrayRef dims) { + return (self != 0).sum(dims); +} + } // namespace at diff --git a/src/ATen/native/xpu/XPUFallback.template b/src/ATen/native/xpu/XPUFallback.template index e1dba02b7..a9f3bad9a 100644 --- a/src/ATen/native/xpu/XPUFallback.template +++ b/src/ATen/native/xpu/XPUFallback.template @@ -174,8 +174,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) { "cholesky", "cholesky_inverse", "_cholesky_solve_helper", - "copysign.out", - "count_nonzero.dim_IntList", "_ctc_loss", "_ctc_loss_backward", "_cummax_helper", diff --git a/src/ATen/native/xpu/sycl/CopysignKernel.cpp b/src/ATen/native/xpu/sycl/CopysignKernel.cpp new file mode 100644 index 000000000..3b8351abf --- /dev/null +++ b/src/ATen/native/xpu/sycl/CopysignKernel.cpp @@ -0,0 +1,24 @@ +#include +#include + +#include + +namespace at::native::xpu { + +template +struct CopysignFunctor { + scalar_t operator()(scalar_t a, scalar_t b) const { + return std::copysign(a, b); + } +}; + +void copysign_kernel(TensorIteratorBase& iter) { + AT_DISPATCH_FLOATING_TYPES_AND2( + at::ScalarType::Half, + at::ScalarType::BFloat16, + iter.common_dtype(), + "copysign_xpu", + [&]() { gpu_kernel_with_scalars(iter, CopysignFunctor()); }); +} + +} // namespace at::native::xpu diff --git a/src/ATen/native/xpu/sycl/CopysignKernel.h b/src/ATen/native/xpu/sycl/CopysignKernel.h new file mode 100644 index 000000000..cf856728c --- /dev/null +++ b/src/ATen/native/xpu/sycl/CopysignKernel.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace at::native::xpu { + +void copysign_kernel(TensorIteratorBase& iter); + +} // namespace at::native::xpu diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py index 64d6a694f..9b006bb5a 100644 --- a/test/xpu/xpu_test_utils.py +++ b/test/xpu/xpu_test_utils.py @@ -164,6 +164,8 @@ "renorm", "lerp", "conj_physical", + "copysign", + "count_nonzero" ] diff --git a/yaml/xpu_functions.yaml b/yaml/xpu_functions.yaml index e560b81b1..ef0404304 100644 --- a/yaml/xpu_functions.yaml +++ b/yaml/xpu_functions.yaml @@ -534,6 +534,10 @@ supported: - randperm.generator_out - _amp_foreach_non_finite_check_and_unscale_ - _amp_update_scale_ + - copysign.out + - copysign.Tensor + - copysign_.Tensor + - count_nonzero.dim_IntList - conj_physical.out - conj_physical_ - ceil