From f6d0f7711beaef35516fe54ee8940ba052f84fc0 Mon Sep 17 00:00:00 2001
From: Yutao Xu <yutao.xu@intel.com>
Date: Sun, 4 Aug 2024 10:12:59 +0800
Subject: [PATCH] Resolve the memory format issue of GroupNorm (#677)

GroupNorm XPU kernel supports channel last input, which differs from
CUDA's behavior. Therefore, NCHW check cannot be performed.
For details, see:
https://github.com/pytorch/pytorch/commit/e9cabef6631395c3dbb8d3d82b94e108e6b87db3

---------

Co-authored-by: Feng Yuan <feng1.yuan@intel.com>
---
 src/ATen/native/xpu/GroupNorm.cpp | 6 +-----
 test/xpu/run_test_with_skip.py    | 2 --
 2 files changed, 1 insertion(+), 7 deletions(-)

diff --git a/src/ATen/native/xpu/GroupNorm.cpp b/src/ATen/native/xpu/GroupNorm.cpp
index e24a6d5c8..0e0a2e558 100644
--- a/src/ATen/native/xpu/GroupNorm.cpp
+++ b/src/ATen/native/xpu/GroupNorm.cpp
@@ -60,10 +60,6 @@ std::tuple<Tensor, Tensor, Tensor> XPUNativeFunctions::native_group_norm(
   // repeated check so expanded weights can call native_group_norm directly but
   // save mean and variance from forward
   check_group_norm_inputs(X, gamma, beta, C, group);
-  auto memory_format = X.device().is_cpu() ? X.suggest_memory_format()
-                                           : at::MemoryFormat::Contiguous;
-
-  TORCH_CHECK(X.is_contiguous(memory_format));
 
   bool mixed_type = at::native::is_mixed_type(X, gamma, beta);
   if (mixed_type) {
@@ -76,7 +72,7 @@ std::tuple<Tensor, Tensor, Tensor> XPUNativeFunctions::native_group_norm(
       c10::nullopt /* layout */,
       c10::nullopt /* device */,
       c10::nullopt /* pin_memory */,
-      memory_format);
+      MemoryFormat::Contiguous);
   const auto dtype = at::native::param_scalar_type(X, mixed_type);
   Tensor mean = at::empty({N, group}, X.options().dtype(dtype));
   Tensor rstd = at::empty({N, group}, X.options().dtype(dtype));
diff --git a/test/xpu/run_test_with_skip.py b/test/xpu/run_test_with_skip.py
index a96cf26f8..e7c34283e 100644
--- a/test/xpu/run_test_with_skip.py
+++ b/test/xpu/run_test_with_skip.py
@@ -1286,8 +1286,6 @@ def launch_test(test_case, skip_list=None, exe_list=None):
     "test_rnn_retain_variables_xpu_float64",
     "test_transformerencoderlayer_xpu_float64",
     "test_variable_sequence_xpu_float64",
-    # native_group_norm : RuntimeError: Expected X.is_contiguous(memory_format) to be true, but got false.  (Could this error message be improved?  If so, please report an enhancement request to PyTorch.)
-    "test_GroupNorm_memory_format_xpu",
     # AssertionError: Scalars are not close!
     "test_InstanceNorm1d_general_xpu",
     "test_InstanceNorm2d_general_xpu",