intel · xytintel · Oct 21, 2024 · Aug 9, 2024 · Aug 9, 2024 · Aug 9, 2024
diff --git a/src/ATen/native/xpu/SummaryOps.cpp b/src/ATen/native/xpu/SummaryOps.cpp
@@ -1,3 +1,4 @@
+#include <ATen/native/Resize.h>
 #include <ATen/native/xpu/sycl/SummaryOpsKernels.h>
 #include <ATen/xpu/XPUNativeFunctions.h>
 #include <comm/SYCLContext.h>
@@ -21,4 +22,30 @@ Tensor XPUNativeFunctions::bincount(
   return native::xpu::bincount_kernel(self, weights, minlength);
 }
 
+Tensor XPUNativeFunctions::histc(
+    const Tensor& self,
+    int64_t nbins,
+    const Scalar& min,
+    const Scalar& max) {
+  if (self.scalar_type() == ScalarType::Half) {
+    AT_ERROR("HalfTensor is not supported");
+  }
+  // See Note [Writing Nondeterministic Operations]
+  // Nondeterministic because of atomicAdd usage
+  globalContext().alertNotDeterministic("_histc_xpu");
+  return native::xpu::_histc_kernel(self, nbins, min, max);
+}
+
+Tensor& XPUNativeFunctions::histc_out(
+    const Tensor& self,
+    int64_t bins,
+    const Scalar& min,
+    const Scalar& max,
+    Tensor& result) {
+  auto ret = histc(self, bins, min, max);
+  at::native::resize_output(result, ret.sizes());
+  result.copy_(ret);
+  return result;
+}
+
 } // namespace at
diff --git a/src/ATen/native/xpu/XPUFallback.template b/src/ATen/native/xpu/XPUFallback.template
@@ -191,7 +191,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
     "hardshrink_backward.grad_input",
     "hardshrink.out",
     "heaviside.out",
-    "histc",
     "i0.out",
     "igammac.out",
     "igamma.out",

diff --git a/src/ATen/native/xpu/sycl/SummaryOpsKernels.cpp b/src/ATen/native/xpu/sycl/SummaryOpsKernels.cpp
@@ -185,6 +185,61 @@ void tensor_histogram(
 
   return;
 }
+
+template <typename input_t>
+Tensor _histc_template(
+    const Tensor& self,
+    int64_t nbins,
+    at::acc_type_device<input_t, kXPU> min,
+    at::acc_type_device<input_t, kXPU> max) {
+  if (nbins <= 0) {
+    AT_ERROR("bins must be > 0");
+  }
+  Tensor output = at::zeros(
+      {nbins},
+      self.scalar_type(),
+      std::nullopt /* layout */,
+      DeviceType::XPU,
+      std::nullopt /* pin_memory */);
+  input_t minvalue = min;
+  input_t maxvalue = max;
+  if (min == max && self.numel() > 0) {
+    minvalue = *self.min().cpu().data_ptr<input_t>();
+    maxvalue = *self.max().cpu().data_ptr<input_t>();
+  }
+  if (minvalue == maxvalue) {
+    minvalue = minvalue - 1;
+    maxvalue = maxvalue + 1;
+  }
+
+  TORCH_CHECK(
+      !(std::isinf(minvalue) || std::isinf(maxvalue) || std::isnan(minvalue) ||
+        std::isnan(maxvalue)),
+      "range of [",
+      minvalue,
+      ", ",
+      maxvalue,
+      "] is not finite");
+
+  TORCH_CHECK(minvalue < maxvalue, "max must be larger than min");
+
+  tensor_histogram<input_t, input_t, false>(
+      output, self, Tensor(), nbins, minvalue, maxvalue);
+  return output;
+}
+
+Tensor _histc_kernel(
+    const Tensor& self,
+    int64_t nbins,
+    const Scalar& min,
+    const Scalar& max) {
+  return AT_DISPATCH_ALL_TYPES(self.scalar_type(), "_histc_xpu", [&] {
+    using bounds_t = at::acc_type_device<scalar_t, kXPU>;
+    return _histc_template<scalar_t>(
+        self, nbins, min.to<bounds_t>(), max.to<bounds_t>());
+  });
+}
+
 template <typename input_t, typename weights_t>
 Tensor bincount_template(
     const Tensor& self,

diff --git a/src/ATen/native/xpu/sycl/SummaryOpsKernels.h b/src/ATen/native/xpu/sycl/SummaryOpsKernels.h
@@ -8,4 +8,10 @@ Tensor bincount_kernel(
     const Tensor& weights,
     int64_t minlength);
 
+Tensor _histc_kernel(
+    const Tensor& self,
+    int64_t nbins,
+    const Scalar& min,
+    const Scalar& max);
+
 } // namespace at::native::xpu
diff --git a/test/xpu/xpu_test_utils.py b/test/xpu/xpu_test_utils.py
@@ -222,6 +222,7 @@
     "argmin",
     "conj_physical",
     "histogram",
+    "histc",
     "repeat_interleave",
     "fmax",
     "fmin",

diff --git a/yaml/xpu_functions.yaml b/yaml/xpu_functions.yaml
@@ -703,6 +703,8 @@ supported:
   - histogram.bins_tensor_out
   - histogram.bin_ct
   - histogram.bin_ct_out
+  - histc
+  - histc.out
   - repeat_interleave.Tensor
   - norm.ScalarOpt_dim_dtype
   - norm.dtype_out