diff --git a/src/ATen/native/xpu/UnaryOps.cpp b/src/ATen/native/xpu/UnaryOps.cpp
index bae3c3c39..2be0fd33c 100644
--- a/src/ATen/native/xpu/UnaryOps.cpp
+++ b/src/ATen/native/xpu/UnaryOps.cpp
@@ -80,6 +80,7 @@ REGISTER_XPU_DISPATCH(round_stub, &xpu::round_kernel);
 REGISTER_XPU_DISPATCH(round_decimals_stub, &xpu::round_decimals_kernel);
 REGISTER_XPU_DISPATCH(floor_stub, &xpu::floor_kernel);
 REGISTER_XPU_DISPATCH(trunc_stub, &xpu::trunc_kernel);
+REGISTER_XPU_DISPATCH(i0_stub, &xpu::i0_kernel);
 REGISTER_XPU_DISPATCH(special_i0e_stub, &xpu::i0e_kernel);
 REGISTER_XPU_DISPATCH(special_i1_stub, &xpu::i1_kernel);
 REGISTER_XPU_DISPATCH(special_i1e_stub, &xpu::i1e_kernel);
diff --git a/src/ATen/native/xpu/XPUFallback.template b/src/ATen/native/xpu/XPUFallback.template
index 68af798e2..65fa5d667 100644
--- a/src/ATen/native/xpu/XPUFallback.template
+++ b/src/ATen/native/xpu/XPUFallback.template
@@ -171,7 +171,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
     "frexp.Tensor_out",
     "_fused_moving_avg_obs_fq_helper",
     "geqrf",
-    "i0.out",
     "igammac.out",
     "igamma.out",
     "index_reduce.out",
diff --git a/src/ATen/native/xpu/sycl/UnarySpecialOpsKernels.cpp b/src/ATen/native/xpu/sycl/UnarySpecialOpsKernels.cpp
index 6889e13c0..71300b743 100644
--- a/src/ATen/native/xpu/sycl/UnarySpecialOpsKernels.cpp
+++ b/src/ATen/native/xpu/sycl/UnarySpecialOpsKernels.cpp
@@ -176,6 +176,23 @@ void logit_kernel(TensorIteratorBase& iter, const Scalar& eps_scalar) {
       });
 }
 
+template <typename scalar_t>
+struct I0Functor {
+  scalar_t operator()(scalar_t a) const {
+    using opmath_t = at::opmath_type<scalar_t>;
+    return calc_i0<opmath_t>(a);
+  }
+};
+
+void i0_kernel(TensorIteratorBase& iter) {
+  AT_DISPATCH_FLOATING_TYPES_AND2(
+      ScalarType::Half,
+      ScalarType::BFloat16,
+      iter.common_dtype(),
+      "i0_xpu",
+      [&]() { gpu_kernel(iter, I0Functor<scalar_t>()); });
+}
+
 template <typename scalar_t>
 struct I0eFunctor {
   scalar_t operator()(scalar_t a) const {
diff --git a/src/ATen/native/xpu/sycl/UnarySpecialOpsKernels.h b/src/ATen/native/xpu/sycl/UnarySpecialOpsKernels.h
index 16518cf2d..c85d47411 100644
--- a/src/ATen/native/xpu/sycl/UnarySpecialOpsKernels.h
+++ b/src/ATen/native/xpu/sycl/UnarySpecialOpsKernels.h
@@ -18,6 +18,8 @@ TORCH_XPU_API void logit_kernel(
     TensorIteratorBase& iter,
     const Scalar& eps_scalar);
 
+TORCH_XPU_API void i0_kernel(TensorIteratorBase& iter);
+
 TORCH_XPU_API void i0e_kernel(TensorIteratorBase& iter);
 
 TORCH_XPU_API void i1_kernel(TensorIteratorBase& iter);
diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py
index 7a2ddb75f..95807265d 100644
--- a/test/xpu/xpu_test_utils.py
+++ b/test/xpu/xpu_test_utils.py
@@ -82,6 +82,7 @@
     "hardswish",
     "nn.functional.hardshrink",
     "nn.functional.mish",
+    "i0",
     "index_add",
     "index_fill",
     "index_put",
diff --git a/yaml/native/native_functions.yaml b/yaml/native/native_functions.yaml
index 1283b9f24..ef036d5f8 100644
--- a/yaml/native/native_functions.yaml
+++ b/yaml/native/native_functions.yaml
@@ -6865,6 +6865,23 @@
 - func: index_copy.dimname(Tensor self, Dimname dim, Tensor index, Tensor source) -> Tensor
   variants: function, method
 
+- func: i0(Tensor self) -> Tensor
+  structured_delegate: i0.out
+  variants: function, method
+  tags: pointwise
+
+- func: i0_(Tensor(a!) self) -> Tensor(a!)
+  structured_delegate: i0.out
+  variants: function, method
+  tags: pointwise
+
+- func: i0.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)
+  structured: True
+  structured_inherits: TensorIteratorBase
+  dispatch:
+    XPU: i0_out
+  tags: pointwise
+
 - func: special_i0e(Tensor self) -> Tensor
   python_module: special
   variants: function