From eea348f5ed635aaa96489a52ab03a4e5cb77ea90 Mon Sep 17 00:00:00 2001
From: yucai <huaiyu.zheng@intel.com>
Date: Wed, 17 Jul 2024 06:43:39 +0000
Subject: [PATCH 1/6] add polar

---
 src/ATen/native/xpu/TensorFactories.cpp     | 23 +++++++++++++++++++++
 src/ATen/native/xpu/XPUFallback.template    |  1 -
 src/ATen/native/xpu/sycl/ComplexKernels.cpp | 14 +++++++++++++
 src/ATen/native/xpu/sycl/ComplexKernels.h   |  2 ++
 test/xpu/xpu_test_utils.py                  |  1 +
 yaml/xpu_functions.yaml                     |  2 ++
 6 files changed, 42 insertions(+), 1 deletion(-)
diff --git a/src/ATen/native/xpu/TensorFactories.cpp b/src/ATen/native/xpu/TensorFactories.cpp
index 110590958..a1c096398 100644
--- a/src/ATen/native/xpu/TensorFactories.cpp
+++ b/src/ATen/native/xpu/TensorFactories.cpp
@@ -151,6 +151,29 @@ Tensor& XPUNativeFunctions::complex_out(
   return result;
 }
 
+Tensor& XPUNativeFunctions::polar_out(
+    const Tensor& abs,
+    const Tensor& angle,
+    Tensor& result) {
+  complex_check_dtype(result, abs, angle);
+  auto iter = TensorIteratorConfig()
+                  .add_output(result)
+                  .add_const_input(abs)
+                  .add_const_input(angle)
+                  .check_all_same_dtype(false)
+                  .build();
+  native::xpu::polar_kernel(iter);
+  return result;
+}
+
+Tensor XPUNativeFunctions::polar(const Tensor& abs, const Tensor& angle) {
+  complex_check_floating(abs, angle);
+  c10::TensorOptions options = abs.options();
+  options = options.dtype(toComplexType(abs.scalar_type()));
+  Tensor result = at::empty(0, options);
+  return at::polar_out(result, abs, angle);
+}
+
 Tensor& XPUNativeFunctions::randperm_out(
     int64_t n,
     c10::optional<Generator> generator,
diff --git a/src/ATen/native/xpu/XPUFallback.template b/src/ATen/native/xpu/XPUFallback.template
index 561142654..fb2d15445 100644
--- a/src/ATen/native/xpu/XPUFallback.template
+++ b/src/ATen/native/xpu/XPUFallback.template
@@ -270,7 +270,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
     "ormqr",
     "_pdist_backward",
     "_pdist_forward",
-    "polar.out",
     "polygamma.out",
     "_prelu_kernel",
     "_prelu_kernel_backward",
diff --git a/src/ATen/native/xpu/sycl/ComplexKernels.cpp b/src/ATen/native/xpu/sycl/ComplexKernels.cpp
index 56b25d0ef..87504bd5e 100644
--- a/src/ATen/native/xpu/sycl/ComplexKernels.cpp
+++ b/src/ATen/native/xpu/sycl/ComplexKernels.cpp
@@ -21,4 +21,18 @@ void complex_kernel(TensorIterator& iter) {
       });
 }
 
+template <typename scalar_t>
+struct PolarFunctor {
+  c10::complex<scalar_t> operator()(scalar_t a, scalar_t b) const {
+    return c10::complex<scalar_t>(a * std::cos(b), a * std::sin(b));
+  }
+};
+
+void polar_kernel(TensorIterator& iter) {
+  AT_DISPATCH_FLOATING_TYPES(iter.input_dtype(0), "polar_xpu", [&]() {
+    PolarFunctor<scalar_t> f;
+    gpu_kernel(iter, f);
+  });
+}
+
 } // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/sycl/ComplexKernels.h b/src/ATen/native/xpu/sycl/ComplexKernels.h
index 990bcd14e..d51556b4f 100644
--- a/src/ATen/native/xpu/sycl/ComplexKernels.h
+++ b/src/ATen/native/xpu/sycl/ComplexKernels.h
@@ -6,4 +6,6 @@ namespace at::native::xpu {
 
 void complex_kernel(TensorIterator& iter);
 
+void polar_kernel(TensorIterator& iter);
+
 } // namespace at::native::xpu
diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py
index c44dad1d7..5013debce 100644
--- a/test/xpu/xpu_test_utils.py
+++ b/test/xpu/xpu_test_utils.py
@@ -183,6 +183,7 @@
     "renorm",
     "multinomial",
     "lerp",
+    "polar",
     "conj_physical",
     "copysign",
     "count_nonzero"
diff --git a/yaml/xpu_functions.yaml b/yaml/xpu_functions.yaml
index 0d6d3c79f..9a7ddb919 100644
--- a/yaml/xpu_functions.yaml
+++ b/yaml/xpu_functions.yaml
@@ -223,6 +223,8 @@ supported:
   - eye.m_out
   - _efficientzerotensor
   - complex.out
+  - polar.out
+  - polar
   - clone
   - fill_.Scalar
   - fill_.Tensor

From 845bb085f324114e3dc02817a12c99732bb16046 Mon Sep 17 00:00:00 2001
From: yucai <huaiyu.zheng@intel.com>
Date: Thu, 25 Jul 2024 06:04:10 +0000
Subject: [PATCH 2/6] test skip

---
 test/xpu/extended/run_test_with_skip.py | 3 +++
 test/xpu/run_test_with_skip.py          | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/test/xpu/extended/run_test_with_skip.py b/test/xpu/extended/run_test_with_skip.py
index 6c8968510..a37288c8d 100644
--- a/test/xpu/extended/run_test_with_skip.py
+++ b/test/xpu/extended/run_test_with_skip.py
@@ -151,6 +151,9 @@
     # Greatest relative difference: 0.00396728515625 at index (610,) (up to 0.001 allowed)
     "test_compare_cpu_hypot_xpu_bfloat16",
 
+    # Polar's backward is calculated using complex(), which does not support bfloat16.
+    "test_compare_cpu_polar_xpu_bfloat16",
+
     # Regressions due to PyTorch uplift (Numeric difference in float and bfloat)
     # https://github.com/intel/torch-xpu-ops/issues/549
     # Example fail log
diff --git a/test/xpu/run_test_with_skip.py b/test/xpu/run_test_with_skip.py
index e1dc4788a..e2bdb13bb 100644
--- a/test/xpu/run_test_with_skip.py
+++ b/test/xpu/run_test_with_skip.py
@@ -796,6 +796,9 @@ def launch_test(test_case, skip_list=None, exe_list=None):
     "test_noncontiguous_samples_nn_functional_local_response_norm_xpu_int64",
     # torch.complex32 - "sinh_cpu" not implemented for 'ComplexHalf'
     "test_dtypes_cosh_xpu",
+
+    #Polar's backward is calculated using complex(), which does not support bfloat16.
+    "test_dtypes_polar_xpu",
 )
 res += launch_test("test_ops_xpu.py", skip_list)
 

From 870d15dd8be5d6c3cd803acbefe3800db6f27e69 Mon Sep 17 00:00:00 2001
From: yucai <yu.cai@intel.com>
Date: Mon, 29 Jul 2024 04:51:08 +0000
Subject: [PATCH 3/6] skip bf16 test

---
 test/xpu/extended/run_test_with_skip.py | 3 ++-
 test/xpu/run_test_with_skip.py          | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/test/xpu/extended/run_test_with_skip.py b/test/xpu/extended/run_test_with_skip.py
index a37288c8d..3b22dd695 100644
--- a/test/xpu/extended/run_test_with_skip.py
+++ b/test/xpu/extended/run_test_with_skip.py
@@ -151,7 +151,8 @@
     # Greatest relative difference: 0.00396728515625 at index (610,) (up to 0.001 allowed)
     "test_compare_cpu_hypot_xpu_bfloat16",
 
-    # Polar's backward is calculated using complex(), which does not support bfloat16.
+    # RuntimeError: Expected both inputs to be Half, Float or Double tensors but got BFloat16 and BFloat16.
+    # Polar's backward is calculated using complex(), which does not support bfloat16. Cuda fails with same error.
     "test_compare_cpu_polar_xpu_bfloat16",
 
     # Regressions due to PyTorch uplift (Numeric difference in float and bfloat)
diff --git a/test/xpu/run_test_with_skip.py b/test/xpu/run_test_with_skip.py
index e2bdb13bb..20ddf1679 100644
--- a/test/xpu/run_test_with_skip.py
+++ b/test/xpu/run_test_with_skip.py
@@ -797,7 +797,8 @@ def launch_test(test_case, skip_list=None, exe_list=None):
     # torch.complex32 - "sinh_cpu" not implemented for 'ComplexHalf'
     "test_dtypes_cosh_xpu",
 
-    #Polar's backward is calculated using complex(), which does not support bfloat16.
+    # RuntimeError: Expected both inputs to be Half, Float or Double tensors but got BFloat16 and BFloat16.
+    # Polar's backward is calculated using complex(), which does not support bfloat16. Cuda fails with same error.
     "test_dtypes_polar_xpu",
 )
 res += launch_test("test_ops_xpu.py", skip_list)

From c3a99bf3069f3abd6091a8c00a70a11cf11e3c4a Mon Sep 17 00:00:00 2001
From: Feng Yuan <feng1.yuan@intel.com>
Date: Mon, 29 Jul 2024 13:38:16 +0800
Subject: [PATCH 4/6] Update test/xpu/extended/run_test_with_skip.py

---
 test/xpu/extended/run_test_with_skip.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/xpu/extended/run_test_with_skip.py b/test/xpu/extended/run_test_with_skip.py
index 1a8752096..a75d2e675 100644
--- a/test/xpu/extended/run_test_with_skip.py
+++ b/test/xpu/extended/run_test_with_skip.py
@@ -155,7 +155,7 @@
     "test_compare_cpu_hypot_xpu_bfloat16",
 
     # RuntimeError: Expected both inputs to be Half, Float or Double tensors but got BFloat16 and BFloat16.
-    # Polar's backward is calculated using complex(), which does not support bfloat16. Cuda fails with same error.
+    # Polar's backward is calculated using complex(), which does not support bfloat16. CUDA fails with same error.
     "test_compare_cpu_polar_xpu_bfloat16",
 
     # Regressions due to PyTorch uplift (Numeric difference in float and bfloat)

From 32a2641b21dbc895851a204340a3d82e5fee1988 Mon Sep 17 00:00:00 2001
From: Feng Yuan <feng1.yuan@intel.com>
Date: Mon, 29 Jul 2024 13:41:20 +0800
Subject: [PATCH 5/6] Remove unnecessary registration

---
 src/ATen/native/xpu/TensorFactories.cpp | 8 --------
 yaml/xpu_functions.yaml                 | 1 -
 2 files changed, 9 deletions(-)

diff --git a/src/ATen/native/xpu/TensorFactories.cpp b/src/ATen/native/xpu/TensorFactories.cpp
index a1c096398..44da487f7 100644
--- a/src/ATen/native/xpu/TensorFactories.cpp
+++ b/src/ATen/native/xpu/TensorFactories.cpp
@@ -166,14 +166,6 @@ Tensor& XPUNativeFunctions::polar_out(
   return result;
 }
 
-Tensor XPUNativeFunctions::polar(const Tensor& abs, const Tensor& angle) {
-  complex_check_floating(abs, angle);
-  c10::TensorOptions options = abs.options();
-  options = options.dtype(toComplexType(abs.scalar_type()));
-  Tensor result = at::empty(0, options);
-  return at::polar_out(result, abs, angle);
-}
-
 Tensor& XPUNativeFunctions::randperm_out(
     int64_t n,
     c10::optional<Generator> generator,
diff --git a/yaml/xpu_functions.yaml b/yaml/xpu_functions.yaml
index b195273c5..f5ecda76a 100644
--- a/yaml/xpu_functions.yaml
+++ b/yaml/xpu_functions.yaml
@@ -269,7 +269,6 @@ supported:
   - _efficientzerotensor
   - complex.out
   - polar.out
-  - polar
   - clone
   - fill_.Scalar
   - fill_.Tensor

From 1a2c34b8f0b88e29c49521e571a3254781a2a40b Mon Sep 17 00:00:00 2001
From: Feng Yuan <feng1.yuan@intel.com>
Date: Mon, 29 Jul 2024 23:58:43 +0800
Subject: [PATCH 6/6] Skip case

---
 test/xpu/run_test_with_skip.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/test/xpu/run_test_with_skip.py b/test/xpu/run_test_with_skip.py
index b8e1b288a..7d051607e 100644
--- a/test/xpu/run_test_with_skip.py
+++ b/test/xpu/run_test_with_skip.py
@@ -3020,8 +3020,12 @@ def launch_test(test_case, skip_list=None, exe_list=None):
 res += launch_test("nn/test_load_state_dict_xpu.py")
 
 # test_module_hooks
-
-res += launch_test("nn/test_module_hooks_xpu.py")
+skip_list = (
+    # TypeError: TestStateDictHooks.test_register_state_dict_post_hook() missing 1 required positional argument: 'private'
+    # https://github.com/intel/torch-xpu-ops/issues/658
+    "test_register_state_dict_post_hook",
+)
+res += launch_test("nn/test_module_hooks_xpu.py", skip_list)
 
 # test_parametrization