From 2f75c47256a93b83647d349ff1b3ab65776064d4 Mon Sep 17 00:00:00 2001
From: Feng Yuan <feng1.yuan@intel.com>
Date: Mon, 5 Aug 2024 09:29:41 +0800
Subject: [PATCH] Move kernel pieces of cumsum/cumprod into unfied file. (#681)

1. Fixing compilation error in BUILD_SEPARATE_OPS mode.
2. Although we aligned with file naming of PyTorch in-tree, putting host
only code in a file under ../xpu/sycl doesn't align with our design.

Signed-off-by: Feng Yuan <feng1.yuan@intel.com>
---
 src/ATen/native/xpu/sycl/CumprodKernel.cpp | 18 ++++++++
 src/ATen/native/xpu/sycl/CumsumKernel.cpp  | 18 ++++++++
 src/ATen/native/xpu/sycl/ScanKernels.cpp   | 49 ----------------------
 3 files changed, 36 insertions(+), 49 deletions(-)
 delete mode 100644 src/ATen/native/xpu/sycl/ScanKernels.cpp
diff --git a/src/ATen/native/xpu/sycl/CumprodKernel.cpp b/src/ATen/native/xpu/sycl/CumprodKernel.cpp
index 6c129183d..f35cc7979 100644
--- a/src/ATen/native/xpu/sycl/CumprodKernel.cpp
+++ b/src/ATen/native/xpu/sycl/CumprodKernel.cpp
@@ -21,4 +21,22 @@ void launch_cumprod_kernel(
       });
 }
 
+static c10::MaybeOwned<Tensor> contiguous_out_arg(const Tensor& tensor) {
+  if (tensor.is_contiguous()) {
+    return c10::MaybeOwned<Tensor>::borrowed(tensor);
+  }
+  return c10::MaybeOwned<Tensor>::owned(
+      at::empty(tensor.sizes(), tensor.options()));
+}
+
+void cumprod_kernel(const Tensor& result, const Tensor& self, int64_t dim) {
+  auto result_ = contiguous_out_arg(result);
+
+  launch_cumprod_kernel(*result_, self, dim);
+
+  if (!result.is_same(*result_)) {
+    result.copy_(*result_);
+  }
+}
+
 } // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/sycl/CumsumKernel.cpp b/src/ATen/native/xpu/sycl/CumsumKernel.cpp
index 8a89b2231..c2754869b 100644
--- a/src/ATen/native/xpu/sycl/CumsumKernel.cpp
+++ b/src/ATen/native/xpu/sycl/CumsumKernel.cpp
@@ -21,4 +21,22 @@ void launch_cumsum_kernel(
       });
 }
 
+static c10::MaybeOwned<Tensor> contiguous_out_arg(const Tensor& tensor) {
+  if (tensor.is_contiguous()) {
+    return c10::MaybeOwned<Tensor>::borrowed(tensor);
+  }
+  return c10::MaybeOwned<Tensor>::owned(
+      at::empty(tensor.sizes(), tensor.options()));
+}
+
+void cumsum_kernel(const Tensor& result, const Tensor& self, int64_t dim) {
+  auto result_ = contiguous_out_arg(result);
+
+  launch_cumsum_kernel(*result_, self, dim);
+
+  if (!result.is_same(*result_)) {
+    result.copy_(*result_);
+  }
+}
+
 } // namespace at::native::xpu
diff --git a/src/ATen/native/xpu/sycl/ScanKernels.cpp b/src/ATen/native/xpu/sycl/ScanKernels.cpp
deleted file mode 100644
index ad97dc4b4..000000000
--- a/src/ATen/native/xpu/sycl/ScanKernels.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
-#include <ATen/Dispatch.h>
-#include <ATen/TensorUtils.h>
-#include <ATen/core/Tensor.h>
-
-#include <ATen/native/xpu/sycl/ScanUtils.h>
-
-#ifndef AT_PER_OPERATOR_HEADERS
-#include <ATen/Functions.h>
-#include <ATen/NativeFunctions.h>
-#else
-#include <ATen/ops/empty.h>
-#include <ATen/ops/empty_like.h>
-#endif
-
-#include <ATen/native/xpu/sycl/CumprodKernel.h>
-#include <ATen/native/xpu/sycl/CumsumKernel.h>
-
-namespace at::native::xpu {
-
-static c10::MaybeOwned<Tensor> contiguous_out_arg(const Tensor& tensor) {
-  if (tensor.is_contiguous()) {
-    return c10::MaybeOwned<Tensor>::borrowed(tensor);
-  }
-  return c10::MaybeOwned<Tensor>::owned(
-      at::empty(tensor.sizes(), tensor.options()));
-}
-
-void cumsum_kernel(const Tensor& result, const Tensor& self, int64_t dim) {
-  auto result_ = contiguous_out_arg(result);
-
-  launch_cumsum_kernel(*result_, self, dim);
-
-  if (!result.is_same(*result_)) {
-    result.copy_(*result_);
-  }
-}
-
-void cumprod_kernel(const Tensor& result, const Tensor& self, int64_t dim) {
-  auto result_ = contiguous_out_arg(result);
-
-  launch_cumprod_kernel(*result_, self, dim);
-
-  if (!result.is_same(*result_)) {
-    result.copy_(*result_);
-  }
-}
-
-} // namespace at::native::xpu