diff --git a/src/ATen/native/xpu/TensorFactories.cpp b/src/ATen/native/xpu/TensorFactories.cpp
index 110590958..44da487f7 100644
--- a/src/ATen/native/xpu/TensorFactories.cpp
+++ b/src/ATen/native/xpu/TensorFactories.cpp
@@ -151,6 +151,21 @@ Tensor& XPUNativeFunctions::complex_out(
   return result;
 }
 
+Tensor& XPUNativeFunctions::polar_out(
+    const Tensor& abs,
+    const Tensor& angle,
+    Tensor& result) {
+  complex_check_dtype(result, abs, angle);
+  auto iter = TensorIteratorConfig()
+                  .add_output(result)
+                  .add_const_input(abs)
+                  .add_const_input(angle)
+                  .check_all_same_dtype(false)
+                  .build();
+  native::xpu::polar_kernel(iter);
+  return result;
+}
+
 Tensor& XPUNativeFunctions::randperm_out(
     int64_t n,
     c10::optional<Generator> generator,
diff --git a/src/ATen/native/xpu/XPUFallback.template b/src/ATen/native/xpu/XPUFallback.template
index 93321f23d..4a4c96828 100644
--- a/src/ATen/native/xpu/XPUFallback.template
+++ b/src/ATen/native/xpu/XPUFallback.template
@@ -240,7 +240,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
     "ormqr",
     "_pdist_backward",
     "_pdist_forward",
-    "polar.out",
     "_prelu_kernel",
     "_prelu_kernel_backward",
     "prod",
diff --git a/src/ATen/native/xpu/sycl/ComplexKernels.cpp b/src/ATen/native/xpu/sycl/ComplexKernels.cpp
index 56b25d0ef..87504bd5e 100644
--- a/src/ATen/native/xpu/sycl/ComplexKernels.cpp
+++ b/src/ATen/native/xpu/sycl/ComplexKernels.cpp
@@ -21,4 +21,18 @@ void complex_kernel(TensorIterator& iter) {
       });
 }
 
+template <typename scalar_t>
+struct PolarFunctor {
+  c10::complex<scalar_t> operator()(scalar_t a, scalar_t b) const {
+    return c10::complex<scalar_t>(a * std::cos(b), a * std::sin(b));
+  }
+};
+
+void polar_kernel(TensorIterator& iter) {
+  AT_DISPATCH_FLOATING_TYPES(iter.input_dtype(0), "polar_xpu", [&]() {
+    PolarFunctor<scalar_t> f;
+    gpu_kernel(iter, f);
+  });
+}
+
 } // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/sycl/ComplexKernels.h b/src/ATen/native/xpu/sycl/ComplexKernels.h
index 990bcd14e..d51556b4f 100644
--- a/src/ATen/native/xpu/sycl/ComplexKernels.h
+++ b/src/ATen/native/xpu/sycl/ComplexKernels.h
@@ -6,4 +6,6 @@ namespace at::native::xpu {
 
 void complex_kernel(TensorIterator& iter);
 
+void polar_kernel(TensorIterator& iter);
+
 } // namespace at::native::xpu
diff --git a/test/xpu/extended/run_test_with_skip.py b/test/xpu/extended/run_test_with_skip.py
index 6f8fe8d3a..a75d2e675 100644
--- a/test/xpu/extended/run_test_with_skip.py
+++ b/test/xpu/extended/run_test_with_skip.py
@@ -154,6 +154,10 @@
     # Greatest relative difference: 0.00396728515625 at index (610,) (up to 0.001 allowed)
     "test_compare_cpu_hypot_xpu_bfloat16",
 
+    # RuntimeError: Expected both inputs to be Half, Float or Double tensors but got BFloat16 and BFloat16.
+    # Polar's backward is calculated using complex(), which does not support bfloat16. CUDA fails with same error.
+    "test_compare_cpu_polar_xpu_bfloat16",
+
     # Regressions due to PyTorch uplift (Numeric difference in float and bfloat)
     # https://github.com/intel/torch-xpu-ops/issues/549
     # Example fail log
diff --git a/test/xpu/run_test_with_skip.py b/test/xpu/run_test_with_skip.py
index 719af3ca4..7d051607e 100644
--- a/test/xpu/run_test_with_skip.py
+++ b/test/xpu/run_test_with_skip.py
@@ -782,6 +782,10 @@ def launch_test(test_case, skip_list=None, exe_list=None):
     # torch.complex32 - "sinh_cpu" not implemented for 'ComplexHalf'
     "test_dtypes_cosh_xpu",
 
+    # RuntimeError: Expected both inputs to be Half, Float or Double tensors but got BFloat16 and BFloat16.
+    # Polar's backward is calculated using complex(), which does not support bfloat16. CUDA fails with same error.
+    "test_dtypes_polar_xpu",
+
     # implemented aten::histogram to align MPS operators coverage, CUDA doesn't support
     # but test_dtypes infrastructure leverage CUDA supported datatypes
     "test_dtypes_histogram_xpu",
@@ -3016,8 +3020,12 @@ def launch_test(test_case, skip_list=None, exe_list=None):
 res += launch_test("nn/test_load_state_dict_xpu.py")
 
 # test_module_hooks
-
-res += launch_test("nn/test_module_hooks_xpu.py")
+skip_list = (
+    # TypeError: TestStateDictHooks.test_register_state_dict_post_hook() missing 1 required positional argument: 'private'
+    # https://github.com/intel/torch-xpu-ops/issues/658
+    "test_register_state_dict_post_hook",
+)
+res += launch_test("nn/test_module_hooks_xpu.py", skip_list)
 
 # test_parametrization
 
diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py
index c281747f2..823988488 100644
--- a/test/xpu/xpu_test_utils.py
+++ b/test/xpu/xpu_test_utils.py
@@ -208,6 +208,7 @@
     "unique",
     "multinomial",
     "lerp",
+    "polar",
     "frac",
     "aminmax",
     "argmin",
diff --git a/yaml/xpu_functions.yaml b/yaml/xpu_functions.yaml
index fd087c7bc..9d453d215 100644
--- a/yaml/xpu_functions.yaml
+++ b/yaml/xpu_functions.yaml
@@ -268,6 +268,7 @@ supported:
   - eye.m_out
   - _efficientzerotensor
   - complex.out
+  - polar.out
   - clone
   - fill_.Scalar
   - fill_.Tensor