diff --git a/src/ATen/native/xpu/BinaryOps.cpp b/src/ATen/native/xpu/BinaryOps.cpp
index c50f8305d..2ec722722 100644
--- a/src/ATen/native/xpu/BinaryOps.cpp
+++ b/src/ATen/native/xpu/BinaryOps.cpp
@@ -9,6 +9,7 @@
 #include <ATen/native/xpu/sycl/BinaryKernels.h>
 #include <ATen/native/xpu/sycl/BinaryMiscBackwardOpsKernels.h>
 #include <ATen/native/xpu/sycl/BinaryRemainderKernel.h>
+#include <ATen/native/xpu/sycl/CopysignKernel.h>
 #include <ATen/native/xpu/sycl/GcdLcmKernels.h>
 #include <ATen/native/xpu/sycl/MaxMinElementwiseKernels.h>
 
@@ -477,4 +478,51 @@ Tensor XPUNativeFunctions::sigmoid_backward(
   return iter.output();
 }
 
+Tensor XPUNativeFunctions::atan2(const Tensor& self, const Tensor& other) {
+  Tensor out;
+  TensorIterator iter;
+  iter.build_borrowing_binary_float_op(out, self, other);
+  native::xpu::atan2_kernel(iter);
+  return iter.output();
+}
+
+Tensor& XPUNativeFunctions::atan2_(Tensor& self, const Tensor& other) {
+  TensorIterator iter;
+  iter.build_borrowing_binary_float_op(self, self, other);
+  native::xpu::atan2_kernel(iter);
+  return self;
+}
+
+Tensor& XPUNativeFunctions::atan2_out(
+    const Tensor& self,
+    const Tensor& other,
+    Tensor& out) {
+  TensorIterator iter;
+  iter.build_borrowing_binary_float_op(out, self, other);
+  native::xpu::atan2_kernel(iter);
+  return out;
+}
+
+Tensor& XPUNativeFunctions::copysign_out(
+    const Tensor& self,
+    const Tensor& other,
+    Tensor& out) {
+  TensorIterator iter;
+  iter.build_borrowing_binary_float_op(out, self, other);
+  native::xpu::copysign_kernel(iter);
+  return out;
+}
+
+Tensor& XPUNativeFunctions::copysign_(Tensor& self, const Tensor& other) {
+  return XPUNativeFunctions::copysign_out(self, other, self);
+}
+
+Tensor XPUNativeFunctions::copysign(const Tensor& self, const Tensor& other) {
+  Tensor out;
+  TensorIterator iter;
+  iter.build_borrowing_binary_float_op(out, self, other);
+  native::xpu::copysign_kernel(iter);
+  return iter.output();
+}
+
 } // namespace at
diff --git a/src/ATen/native/xpu/Indexing.cpp b/src/ATen/native/xpu/Indexing.cpp
index e80bee8ff..7b56ffc16 100644
--- a/src/ATen/native/xpu/Indexing.cpp
+++ b/src/ATen/native/xpu/Indexing.cpp
@@ -43,4 +43,5 @@ Tensor XPUNativeFunctions::index_select(
   auto out = at::empty({0}, self.options());
   return index_select_out(self, dim, index, out);
 }
+
 } // namespace at
diff --git a/src/ATen/native/xpu/TensorAdvancedIndexing.cpp b/src/ATen/native/xpu/TensorAdvancedIndexing.cpp
index 62bbd353d..a3b6d8c0e 100644
--- a/src/ATen/native/xpu/TensorAdvancedIndexing.cpp
+++ b/src/ATen/native/xpu/TensorAdvancedIndexing.cpp
@@ -1396,4 +1396,8 @@ Tensor& XPUNativeFunctions::gather_out(
   return out;
 }
 
+Tensor XPUNativeFunctions::count_nonzero(const Tensor& self, IntArrayRef dims) {
+  return (self != 0).sum(dims);
+}
+
 } // namespace at
diff --git a/src/ATen/native/xpu/UnaryOps.cpp b/src/ATen/native/xpu/UnaryOps.cpp
index ffc528fab..1d3137f79 100644
--- a/src/ATen/native/xpu/UnaryOps.cpp
+++ b/src/ATen/native/xpu/UnaryOps.cpp
@@ -9,7 +9,12 @@
 #include <ATen/native/xpu/sycl/UnaryFractionKernels.h>
 #include <ATen/native/xpu/sycl/UnaryGeometricAcosKernel.h>
 #include <ATen/native/xpu/sycl/UnaryGeometricAcoshKernel.h>
+#include <ATen/native/xpu/sycl/UnaryGeometricAsinKernel.h>
+#include <ATen/native/xpu/sycl/UnaryGeometricAsinhKernel.h>
+#include <ATen/native/xpu/sycl/UnaryGeometricAtanKernel.h>
+#include <ATen/native/xpu/sycl/UnaryGeometricAtanhKernel.h>
 #include <ATen/native/xpu/sycl/UnaryGeometricCosKernel.h>
+#include <ATen/native/xpu/sycl/UnaryGeometricCoshKernel.h>
 #include <ATen/native/xpu/sycl/UnaryGeometricSinKernel.h>
 #include <ATen/native/xpu/sycl/UnaryGeometricTanhKernel.h>
 #include <ATen/native/xpu/sycl/UnaryKernels.h>
@@ -516,6 +521,116 @@ Tensor& XPUNativeFunctions::erfc_out(const Tensor& self, Tensor& out) {
   return out;
 }
 
+Tensor XPUNativeFunctions::asinh(const Tensor& self) {
+  Tensor out;
+  TensorIterator iter;
+  iter.build_borrowing_unary_float_op(out, self);
+  native::xpu::asinh_kernel(iter);
+  return iter.output();
+}
+
+Tensor& XPUNativeFunctions::asinh_(Tensor& self) {
+  TensorIterator iter;
+  iter.build_borrowing_unary_float_op(self, self);
+  native::xpu::asinh_kernel(iter);
+  return self;
+}
+
+Tensor& XPUNativeFunctions::asinh_out(const Tensor& self, Tensor& out) {
+  TensorIterator iter;
+  iter.build_borrowing_unary_float_op(out, self);
+  native::xpu::asinh_kernel(iter);
+  return out;
+}
+
+Tensor XPUNativeFunctions::asin(const Tensor& self) {
+  Tensor out;
+  TensorIterator iter;
+  iter.build_borrowing_unary_float_op(out, self);
+  native::xpu::asin_kernel(iter);
+  return iter.output();
+}
+
+Tensor& XPUNativeFunctions::asin_(Tensor& self) {
+  TensorIterator iter;
+  iter.build_borrowing_unary_float_op(self, self);
+  native::xpu::asin_kernel(iter);
+  return self;
+}
+
+Tensor& XPUNativeFunctions::asin_out(const Tensor& self, Tensor& out) {
+  TensorIterator iter;
+  iter.build_borrowing_unary_float_op(out, self);
+  native::xpu::asin_kernel(iter);
+  return out;
+}
+
+Tensor XPUNativeFunctions::atan(const Tensor& self) {
+  Tensor out;
+  TensorIterator iter;
+  iter.build_borrowing_unary_float_op(out, self);
+  native::xpu::atan_kernel(iter);
+  return iter.output();
+}
+
+Tensor& XPUNativeFunctions::atan_(Tensor& self) {
+  TensorIterator iter;
+  iter.build_borrowing_unary_float_op(self, self);
+  native::xpu::atan_kernel(iter);
+  return self;
+}
+
+Tensor& XPUNativeFunctions::atan_out(const Tensor& self, Tensor& out) {
+  TensorIterator iter;
+  iter.build_borrowing_unary_float_op(out, self);
+  native::xpu::atan_kernel(iter);
+  return out;
+}
+
+Tensor XPUNativeFunctions::atanh(const Tensor& self) {
+  Tensor out;
+  TensorIterator iter;
+  iter.build_borrowing_unary_float_op(out, self);
+  native::xpu::atanh_kernel(iter);
+  return iter.output();
+}
+
+Tensor& XPUNativeFunctions::atanh_(Tensor& self) {
+  TensorIterator iter;
+  iter.build_borrowing_unary_float_op(self, self);
+  native::xpu::atanh_kernel(iter);
+  return self;
+}
+
+Tensor& XPUNativeFunctions::atanh_out(const Tensor& self, Tensor& out) {
+  TensorIterator iter;
+  iter.build_borrowing_unary_float_op(out, self);
+  native::xpu::atanh_kernel(iter);
+  return out;
+}
+
+Tensor XPUNativeFunctions::cosh(const Tensor& self) {
+  Tensor out;
+  TensorIterator iter;
+  iter.build_borrowing_unary_float_op(out, self);
+  native::xpu::cosh_kernel(iter);
+  return iter.output();
+}
+
+Tensor& XPUNativeFunctions::cosh_(Tensor& self) {
+  TensorIterator iter;
+  iter.build_borrowing_unary_float_op(self, self);
+  native::xpu::cosh_kernel(iter);
+  return self;
+}
+
+Tensor& XPUNativeFunctions::cosh_out(const Tensor& self, Tensor& out) {
+  TensorIterator iter;
+  iter.build_borrowing_unary_float_op(out, self);
+  native::xpu::cosh_kernel(iter);
+  return out;
+}
+
 Tensor& XPUNativeFunctions::conj_physical_out(const Tensor& self, Tensor& out) {
   auto iter = TensorIterator::unary_op(out, self);
   native::xpu::conj_physical_kernel(iter);
diff --git a/src/ATen/native/xpu/XPUFallback.template b/src/ATen/native/xpu/XPUFallback.template
index 471081ccd..f1b861881 100644
--- a/src/ATen/native/xpu/XPUFallback.template
+++ b/src/ATen/native/xpu/XPUFallback.template
@@ -162,11 +162,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
     "aminmax.out",
     "angle",
     "argmin.out",
-    "asinh.out",
-    "asin.out",
-    "atan2.out",
-    "atanh.out",
-    "atan.out",
     "avg_pool3d_backward.grad_input",
     "avg_pool3d.out",
     "binary_cross_entropy",
@@ -179,9 +174,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
     "cholesky",
     "cholesky_inverse",
     "_cholesky_solve_helper",
-    "copysign.out",
-    "cosh.out",
-    "count_nonzero.dim_IntList",
     "_ctc_loss",
     "_ctc_loss_backward",
     "_cummax_helper",
diff --git a/src/ATen/native/xpu/sycl/BinaryGeometricKernels.cpp b/src/ATen/native/xpu/sycl/BinaryGeometricKernels.cpp
index c93afe4bf..e170760e8 100644
--- a/src/ATen/native/xpu/sycl/BinaryGeometricKernels.cpp
+++ b/src/ATen/native/xpu/sycl/BinaryGeometricKernels.cpp
@@ -1,13 +1,28 @@
 #include <ATen/ATen.h>
 #include <ATen/Dispatch.h>
 #include <ATen/native/TensorIterator.h>
-
 #include <ATen/native/xpu/sycl/Loops.h>
 
 namespace at {
 namespace native {
 namespace xpu {
 
+template <typename scalar_t>
+struct Atan2Functor {
+  scalar_t operator()(scalar_t a, scalar_t b) const {
+    return std::atan2(a, b);
+  }
+};
+
+void atan2_kernel(TensorIteratorBase& iter) {
+  AT_DISPATCH_FLOATING_TYPES_AND2(
+      at::ScalarType::BFloat16,
+      at::ScalarType::Half,
+      iter.common_dtype(),
+      "atan2_xpu",
+      [&]() { gpu_kernel(iter, Atan2Functor<scalar_t>()); });
+}
+
 template <typename scalar_t>
 struct HypotFunctor {
   scalar_t operator()(scalar_t a, scalar_t b) const {
diff --git a/src/ATen/native/xpu/sycl/BinaryGeometricKernels.h b/src/ATen/native/xpu/sycl/BinaryGeometricKernels.h
index e37dd6dbf..588d52c4f 100644
--- a/src/ATen/native/xpu/sycl/BinaryGeometricKernels.h
+++ b/src/ATen/native/xpu/sycl/BinaryGeometricKernels.h
@@ -4,6 +4,8 @@
 
 namespace at::native::xpu {
 
+void atan2_kernel(TensorIteratorBase& iter);
+
 void hypot_kernel(TensorIteratorBase& iter);
 
 } // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/sycl/CopysignKernel.cpp b/src/ATen/native/xpu/sycl/CopysignKernel.cpp
new file mode 100644
index 000000000..3b8351abf
--- /dev/null
+++ b/src/ATen/native/xpu/sycl/CopysignKernel.cpp
@@ -0,0 +1,24 @@
+#include <ATen/Dispatch.h>
+#include <ATen/native/TensorIterator.h>
+
+#include <ATen/native/xpu/sycl/Loops.h>
+
+namespace at::native::xpu {
+
+template <typename scalar_t>
+struct CopysignFunctor {
+  scalar_t operator()(scalar_t a, scalar_t b) const {
+    return std::copysign(a, b);
+  }
+};
+
+void copysign_kernel(TensorIteratorBase& iter) {
+  AT_DISPATCH_FLOATING_TYPES_AND2(
+      at::ScalarType::Half,
+      at::ScalarType::BFloat16,
+      iter.common_dtype(),
+      "copysign_xpu",
+      [&]() { gpu_kernel_with_scalars(iter, CopysignFunctor<scalar_t>()); });
+}
+
+} // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/sycl/CopysignKernel.h b/src/ATen/native/xpu/sycl/CopysignKernel.h
new file mode 100644
index 000000000..cf856728c
--- /dev/null
+++ b/src/ATen/native/xpu/sycl/CopysignKernel.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <ATen/native/TensorIterator.h>
+
+namespace at::native::xpu {
+
+void copysign_kernel(TensorIteratorBase& iter);
+
+} // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/sycl/UnaryGeometricAsinKernel.cpp b/src/ATen/native/xpu/sycl/UnaryGeometricAsinKernel.cpp
new file mode 100644
index 000000000..c8b1aaca6
--- /dev/null
+++ b/src/ATen/native/xpu/sycl/UnaryGeometricAsinKernel.cpp
@@ -0,0 +1,40 @@
+#include <ATen/Dispatch.h>
+#include <ATen/OpMathType.h>
+
+#include <ATen/native/xpu/sycl/Loops.h>
+
+namespace at::native::xpu {
+
+template <typename scalar_t>
+struct AsinComplexFunctor {
+  using opmath_t = at::opmath_type<scalar_t>;
+  scalar_t operator()(const scalar_t a) const {
+    return std::asin(static_cast<opmath_t>(a));
+  }
+};
+
+template <typename scalar_t>
+struct AsinFunctor {
+  scalar_t operator()(const scalar_t a) const {
+    return std::asin(a);
+  }
+};
+
+void asin_kernel(TensorIteratorBase& iter) {
+  auto common_dtype = iter.common_dtype();
+  if (at::isComplexType(common_dtype)) {
+    AT_DISPATCH_COMPLEX_TYPES_AND(
+        kComplexHalf, common_dtype, "asin_xpu", [&]() {
+          gpu_kernel(iter, AsinComplexFunctor<scalar_t>());
+        });
+  } else {
+    AT_DISPATCH_FLOATING_TYPES_AND2(
+        ScalarType::Half,
+        ScalarType::BFloat16,
+        common_dtype,
+        "asin_xpu",
+        [&]() { gpu_kernel(iter, AsinFunctor<scalar_t>()); });
+  }
+}
+
+} // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/sycl/UnaryGeometricAsinKernel.h b/src/ATen/native/xpu/sycl/UnaryGeometricAsinKernel.h
new file mode 100644
index 000000000..194ce6479
--- /dev/null
+++ b/src/ATen/native/xpu/sycl/UnaryGeometricAsinKernel.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <ATen/native/TensorIterator.h>
+
+namespace at::native::xpu {
+
+void asin_kernel(TensorIteratorBase& iter);
+
+} // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/sycl/UnaryGeometricAsinhKernel.cpp b/src/ATen/native/xpu/sycl/UnaryGeometricAsinhKernel.cpp
new file mode 100644
index 000000000..009a68b47
--- /dev/null
+++ b/src/ATen/native/xpu/sycl/UnaryGeometricAsinhKernel.cpp
@@ -0,0 +1,40 @@
+#include <ATen/Dispatch.h>
+#include <ATen/OpMathType.h>
+
+#include <ATen/native/xpu/sycl/Loops.h>
+
+namespace at::native::xpu {
+
+template <typename scalar_t>
+struct AsinhComplexFunctor {
+  using opmath_t = at::opmath_type<scalar_t>;
+  scalar_t operator()(const scalar_t a) const {
+    return std::asinh(static_cast<opmath_t>(a));
+  }
+};
+
+template <typename scalar_t>
+struct AsinhFunctor {
+  scalar_t operator()(const scalar_t a) const {
+    return std::asinh(a);
+  }
+};
+
+void asinh_kernel(TensorIteratorBase& iter) {
+  auto common_dtype = iter.common_dtype();
+  if (at::isComplexType(common_dtype)) {
+    AT_DISPATCH_COMPLEX_TYPES_AND(
+        kComplexHalf, common_dtype, "asinh_xpu", [&]() {
+          gpu_kernel(iter, AsinhComplexFunctor<scalar_t>());
+        });
+  } else {
+    AT_DISPATCH_FLOATING_TYPES_AND2(
+        ScalarType::Half,
+        ScalarType::BFloat16,
+        common_dtype,
+        "asinh_xpu",
+        [&]() { gpu_kernel(iter, AsinhFunctor<scalar_t>()); });
+  }
+}
+
+} // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/sycl/UnaryGeometricAsinhKernel.h b/src/ATen/native/xpu/sycl/UnaryGeometricAsinhKernel.h
new file mode 100644
index 000000000..4d37288de
--- /dev/null
+++ b/src/ATen/native/xpu/sycl/UnaryGeometricAsinhKernel.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <ATen/native/TensorIterator.h>
+
+namespace at::native::xpu {
+
+void asinh_kernel(TensorIteratorBase& iter);
+
+} // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/sycl/UnaryGeometricAtanKernel.cpp b/src/ATen/native/xpu/sycl/UnaryGeometricAtanKernel.cpp
new file mode 100644
index 000000000..f241e9da3
--- /dev/null
+++ b/src/ATen/native/xpu/sycl/UnaryGeometricAtanKernel.cpp
@@ -0,0 +1,40 @@
+#include <ATen/Dispatch.h>
+#include <ATen/OpMathType.h>
+
+#include <ATen/native/xpu/sycl/Loops.h>
+
+namespace at::native::xpu {
+
+template <typename scalar_t>
+struct AtanComplexFunctor {
+  using opmath_t = at::opmath_type<scalar_t>;
+  scalar_t operator()(const scalar_t a) const {
+    return std::atan(static_cast<opmath_t>(a));
+  }
+};
+
+template <typename scalar_t>
+struct AtanFunctor {
+  scalar_t operator()(const scalar_t a) const {
+    return std::atan(a);
+  }
+};
+
+void atan_kernel(TensorIteratorBase& iter) {
+  auto common_dtype = iter.common_dtype();
+  if (at::isComplexType(common_dtype)) {
+    AT_DISPATCH_COMPLEX_TYPES_AND(
+        kComplexHalf, common_dtype, "atan_xpu", [&]() {
+          gpu_kernel(iter, AtanComplexFunctor<scalar_t>());
+        });
+  } else {
+    AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(
+        ScalarType::Half,
+        ScalarType::BFloat16,
+        common_dtype,
+        "atan_xpu",
+        [&]() { gpu_kernel(iter, AtanFunctor<scalar_t>()); });
+  }
+}
+
+} // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/sycl/UnaryGeometricAtanKernel.h b/src/ATen/native/xpu/sycl/UnaryGeometricAtanKernel.h
new file mode 100644
index 000000000..022720223
--- /dev/null
+++ b/src/ATen/native/xpu/sycl/UnaryGeometricAtanKernel.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <ATen/native/TensorIterator.h>
+
+namespace at::native::xpu {
+
+void atan_kernel(TensorIteratorBase& iter);
+
+} // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/sycl/UnaryGeometricAtanhKernel.cpp b/src/ATen/native/xpu/sycl/UnaryGeometricAtanhKernel.cpp
new file mode 100644
index 000000000..630a64d39
--- /dev/null
+++ b/src/ATen/native/xpu/sycl/UnaryGeometricAtanhKernel.cpp
@@ -0,0 +1,40 @@
+#include <ATen/Dispatch.h>
+#include <ATen/OpMathType.h>
+
+#include <ATen/native/xpu/sycl/Loops.h>
+
+namespace at::native::xpu {
+
+template <typename scalar_t>
+struct AtanhComplexFunctor {
+  using opmath_t = at::opmath_type<scalar_t>;
+  scalar_t operator()(const scalar_t a) const {
+    return std::atanh(static_cast<opmath_t>(a));
+  }
+};
+
+template <typename scalar_t>
+struct AtanhFunctor {
+  scalar_t operator()(const scalar_t a) const {
+    return std::atanh(a);
+  }
+};
+
+void atanh_kernel(TensorIteratorBase& iter) {
+  auto common_dtype = iter.common_dtype();
+  if (at::isComplexType(common_dtype)) {
+    AT_DISPATCH_COMPLEX_TYPES_AND(
+        kComplexHalf, common_dtype, "atanh_xpu", [&]() {
+          gpu_kernel(iter, AtanhComplexFunctor<scalar_t>());
+        });
+  } else {
+    AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(
+        ScalarType::Half,
+        ScalarType::BFloat16,
+        common_dtype,
+        "atanh_xpu",
+        [&]() { gpu_kernel(iter, AtanhFunctor<scalar_t>()); });
+  }
+}
+
+} // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/sycl/UnaryGeometricAtanhKernel.h b/src/ATen/native/xpu/sycl/UnaryGeometricAtanhKernel.h
new file mode 100644
index 000000000..5536641e7
--- /dev/null
+++ b/src/ATen/native/xpu/sycl/UnaryGeometricAtanhKernel.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <ATen/native/TensorIterator.h>
+
+namespace at::native::xpu {
+
+void atanh_kernel(TensorIteratorBase& iter);
+
+} // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/sycl/UnaryGeometricCoshKernel.cpp b/src/ATen/native/xpu/sycl/UnaryGeometricCoshKernel.cpp
new file mode 100644
index 000000000..11440b3d3
--- /dev/null
+++ b/src/ATen/native/xpu/sycl/UnaryGeometricCoshKernel.cpp
@@ -0,0 +1,40 @@
+#include <ATen/Dispatch.h>
+#include <ATen/OpMathType.h>
+
+#include <ATen/native/xpu/sycl/Loops.h>
+
+namespace at::native::xpu {
+
+template <typename scalar_t>
+struct CoshComplexFunctor {
+  using opmath_t = at::opmath_type<scalar_t>;
+  scalar_t operator()(scalar_t a) const {
+    return std::cosh(static_cast<opmath_t>(a));
+  }
+};
+
+template <typename scalar_t>
+struct CoshFunctor {
+  scalar_t operator()(scalar_t a) const {
+    return std::cosh(a);
+  }
+};
+
+void cosh_kernel(TensorIteratorBase& iter) {
+  auto common_dtype = iter.common_dtype();
+  if (at::isComplexType(common_dtype)) {
+    AT_DISPATCH_COMPLEX_TYPES_AND(
+        kComplexHalf, common_dtype, "cosh_xpu", [&]() {
+          gpu_kernel(iter, CoshComplexFunctor<scalar_t>());
+        });
+  } else {
+    AT_DISPATCH_FLOATING_TYPES_AND2(
+        ScalarType::Half,
+        ScalarType::BFloat16,
+        common_dtype,
+        "cosh_xpu",
+        [&]() { gpu_kernel(iter, CoshFunctor<scalar_t>()); });
+  }
+}
+
+} // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/sycl/UnaryGeometricCoshKernel.h b/src/ATen/native/xpu/sycl/UnaryGeometricCoshKernel.h
new file mode 100644
index 000000000..7f031e3ff
--- /dev/null
+++ b/src/ATen/native/xpu/sycl/UnaryGeometricCoshKernel.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <ATen/native/TensorIterator.h>
+
+namespace at::native::xpu {
+
+void cosh_kernel(TensorIteratorBase& iter);
+
+} // namespace at::native::xpu
diff --git a/test/xpu/extended/run_test_with_skip.py b/test/xpu/extended/run_test_with_skip.py
index 943d46465..108b3073d 100644
--- a/test/xpu/extended/run_test_with_skip.py
+++ b/test/xpu/extended/run_test_with_skip.py
@@ -26,6 +26,21 @@
     "test_compare_cpu_acos_xpu_complex64",
     "test_compare_cpu_acosh_xpu_complex64",
 
+    # got inconsistent values between CPU / XPU
+    # AssertionError: Tensor-likes are not close!
+    # compute results contain nan / inf
+    "test_compare_cpu_acosh_xpu_complex64",
+    "test_compare_cpu_asin_xpu_complex128",
+    "test_compare_cpu_asin_xpu_complex64",
+    "test_compare_cpu_asinh_xpu_complex128",
+    "test_compare_cpu_asinh_xpu_complex64",
+    "test_compare_cpu_atan_xpu_complex128",
+    "test_compare_cpu_atan_xpu_complex64",
+
+    # skip random failure due to accuracy
+    # AssertionError: Tensor-likes are not close!
+    "test_compare_cpu_atan2_xpu_bfloat16",
+
     # CPU result is not golden reference
     "test_compare_cpu_div_floor_rounding_xpu_bfloat16",
     "test_compare_cpu_div_trunc_rounding_xpu_float16",
@@ -59,6 +74,9 @@
     # https://github.com/intel/torch-xpu-ops/issues/281
     "test_cow_input",
 
+    # The operator 'aten::sinh.out on the XPU backend is falling back to run on the CPU.
+    "test_cow_input_cosh_xpu_float32",
+
     # XPU implementation is correct.
     # std::exp{-inf, nan}, the result is (±0,±0) (signs are unspecified)
     # std::exp{-inf, inf}, the result is (±0,±0) (signs are unspecified)
diff --git a/test/xpu/run_test_with_skip.py b/test/xpu/run_test_with_skip.py
index 28a073bd4..56fc93a48 100644
--- a/test/xpu/run_test_with_skip.py
+++ b/test/xpu/run_test_with_skip.py
@@ -802,7 +802,10 @@ def launch_test(test_case, skip_list=None, exe_list=None):
     # Retrieve the case, once avg_pool1d is supported. Test infra will change claimed dtypes in test case once the op is listed
     # in XPU supported operators. Then the case will work.
     "test_noncontiguous_samples_nn_functional_avg_pool1d_xpu_int64",
-    "test_noncontiguous_samples_nn_functional_local_response_norm_xpu_int64"
+    "test_noncontiguous_samples_nn_functional_local_response_norm_xpu_int64",
+
+    # torch.complex32 - "sinh_cpu" not implemented for 'ComplexHalf'
+    "test_dtypes_cosh_xpu",
 )
 res += launch_test("test_ops_xpu.py", skip_list)
 
@@ -1514,35 +1517,17 @@ def launch_test(test_case, skip_list=None, exe_list=None):
     "_jiterator_",
     # CPU Fallback fails: Tensor-likes are not close!
     "test_reference_numerics_extremal__refs_acos_xpu_complex128",
-    "test_reference_numerics_extremal__refs_asin_xpu_complex128",
-    "test_reference_numerics_extremal__refs_asin_xpu_complex64",
-    "test_reference_numerics_extremal__refs_atan_xpu_complex128",
-    "test_reference_numerics_extremal__refs_atan_xpu_complex64",
     "test_reference_numerics_extremal__refs_exp2_xpu_complex128",
     "test_reference_numerics_extremal__refs_exp2_xpu_complex64",
     "test_reference_numerics_extremal__refs_nn_functional_tanhshrink_xpu_complex64",
     "test_reference_numerics_extremal_acos_xpu_complex128",
-    "test_reference_numerics_extremal_asin_xpu_complex128",
-    "test_reference_numerics_extremal_asin_xpu_complex64",
-    "test_reference_numerics_extremal_atan_xpu_complex128",
-    "test_reference_numerics_extremal_atan_xpu_complex64",
     "test_reference_numerics_extremal_exp2_xpu_complex128",
     "test_reference_numerics_extremal_exp2_xpu_complex64",
     "test_reference_numerics_extremal_nn_functional_tanhshrink_xpu_complex64",
-    "test_reference_numerics_large__refs_atan_xpu_complex128",
-    "test_reference_numerics_large__refs_atan_xpu_complex64",
-    "test_reference_numerics_large_atan_xpu_complex128",
-    "test_reference_numerics_large_atan_xpu_complex64",
     "test_reference_numerics_normal__refs_nn_functional_tanhshrink_xpu_complex64",
     "test_reference_numerics_normal_nn_functional_tanhshrink_xpu_complex64",
-    "test_reference_numerics_small__refs_atan_xpu_complex128",
-    "test_reference_numerics_small__refs_atan_xpu_complex64",
-    "test_reference_numerics_small_atan_xpu_complex128",
-    "test_reference_numerics_small_atan_xpu_complex64",
-    "test_reference_numerics_large__refs_atan_xpu_complex32",
     "test_reference_numerics_large__refs_tanh_xpu_complex32",
     "test_reference_numerics_large_tanh_xpu_complex32",
-    "test_reference_numerics_small__refs_atan_xpu_complex32",
     # For extreme value processing, Numpy and XPU results are inconsistent
     "test_reference_numerics_extremal__refs_log_xpu_complex64",
     "test_reference_numerics_extremal_log_xpu_complex64",
@@ -1554,8 +1539,26 @@ def launch_test(test_case, skip_list=None, exe_list=None):
     "test_reference_numerics_extremal__refs_acosh_xpu_complex64",
     "test_reference_numerics_extremal_acos_xpu_complex64",
     "test_reference_numerics_extremal_acosh_xpu_complex64",
+    "test_reference_numerics_extremal__refs_asinh_xpu_complex64",
+    "test_reference_numerics_extremal_asinh_xpu_complex64",
+    "test_reference_numerics_extremal__refs_asin_xpu_complex64",
+    "test_reference_numerics_extremal_asin_xpu_complex64",
     "test_reference_numerics_large__refs_acosh_xpu_complex64",
     "test_reference_numerics_large_acosh_xpu_complex64",
+    "test_reference_numerics_large__refs_asinh_xpu_complex128",
+    "test_reference_numerics_large__refs_asinh_xpu_complex64",
+    "test_reference_numerics_large__refs_asinh_xpu_complex32",
+    "test_reference_numerics_large_asinh_xpu_complex128",
+    "test_reference_numerics_large_asinh_xpu_complex64",
+    "test_reference_numerics_large_asinh_xpu_complex32",
+
+    # AssertionError: Tensor-likes are not close!
+    # exceeded maximum allowed difference
+    # Greatest absolute difference: 6.266784475883469e-05 at index (463, 204) (up to 1e-05 allowed)
+    # Greatest relative difference: 1.9145216356264427e-05 at index (463, 204) (up to 1.3e-06 allowed)
+    "test_reference_numerics_normal__refs_asinh_xpu_complex64",
+    "test_reference_numerics_normal_asinh_xpu_complex64",
+
     # CPU Fallback fails
     # New ATen operators fails on CPU Fallback.
     # E.g. aten::special_spherical_bessel_j0, aten::special_airy_ai.
diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py
index f19a93dda..01e620614 100644
--- a/test/xpu/xpu_test_utils.py
+++ b/test/xpu/xpu_test_utils.py
@@ -50,7 +50,6 @@
     "clamp_min",
     "clone",
     "copy",
-    "cos",
     "cumsum",
     "eq",
     "fill",
@@ -95,7 +94,17 @@
     "remainder",
     "reshape",
     "rsqrt",
+    "cos",
+    "cosh",
+    "acos",
+    "acosh",
     "sin",
+    "asin",
+    "asinh",
+    "tanh",
+    "atan",
+    "atan2",
+    "atanh",
     "sqrt",
     "sum",
     "amin",
@@ -104,7 +113,6 @@
     "std_mean",
     "var",
     "var_mean",
-    "tanh",
     "hypot",
     "unfold",
     "uniform",
@@ -144,8 +152,6 @@
     "searchsorted",
     "grid_sampler_2d",
     # "nn.functional.grid_sample", # Lack of XPU implementation of aten::grid_sampler_3d.
-    "acos",
-    "acosh",
     "addr",
     "cdist",
     "nn.functional.group_norm",
@@ -158,6 +164,8 @@
     "lerp",
     "conj_physical",
     "histogram",
+    "copysign",
+    "count_nonzero",
 ]
 
 
diff --git a/yaml/xpu_functions.yaml b/yaml/xpu_functions.yaml
index 262b6e020..485c16edd 100644
--- a/yaml/xpu_functions.yaml
+++ b/yaml/xpu_functions.yaml
@@ -511,9 +511,31 @@ supported:
   - addcmul.out
   - addcmul
   - addcmul_
+  - asinh
+  - asinh.out
+  - asinh_
+  - asin
+  - asin.out
+  - asin_
+  - atan
+  - atan.out
+  - atan_
+  - atan2
+  - atan2.out
+  - atan2_
+  - atanh
+  - atanh.out
+  - atanh_
+  - cosh
+  - cosh.out
+  - cosh_
   - randperm.generator_out
   - _amp_foreach_non_finite_check_and_unscale_
   - _amp_update_scale_
+  - copysign.out
+  - copysign.Tensor
+  - copysign_.Tensor
+  - count_nonzero.dim_IntList
   - conj_physical.out
   - conj_physical_
   - ceil