intel
diff --git a/‎cmake/Modules/FindSYCL.cmake
Lines changed: 0 additions & 20 deletions b/‎cmake/Modules/FindSYCL.cmake
Lines changed: 0 additions & 20 deletions
diff --git a/‎src/ATen/native/xpu/BinaryOps.cpp
Lines changed: 126 additions & 1 deletion b/‎src/ATen/native/xpu/BinaryOps.cpp
Lines changed: 126 additions & 1 deletion
diff --git a/‎src/ATen/native/xpu/Distributions.cpp
Lines changed: 18 additions & 0 deletions b/‎src/ATen/native/xpu/Distributions.cpp
Lines changed: 18 additions & 0 deletions
diff --git a/‎src/ATen/native/xpu/Loss.cpp
Lines changed: 55 additions & 0 deletions b/‎src/ATen/native/xpu/Loss.cpp
Lines changed: 55 additions & 0 deletions
diff --git a/‎src/ATen/native/xpu/Normalization.cpp
Lines changed: 110 additions & 0 deletions b/‎src/ATen/native/xpu/Normalization.cpp
Lines changed: 110 additions & 0 deletions
@@ -71,26 +71,6 @@ else()
     )
 endif()
 
-set(SYCL_LIBRARIES)
-find_library(SYCL_RUNTIME_LIBRARY sycl HINTS ${SYCL_LIBRARY_DIR})
-# On Windows, currently there's no sycl.lib. Only sycl7.lib with version suffix,
-# where the current version of the SYCL runtime is 7.
-# Until oneAPI adds support to sycl.lib without the version suffix,
-# sycl_runtime_version needs to be hardcoded and uplifted when SYCL runtime version uplifts.
-# TODO: remove this when sycl.lib is supported on Windows
-if(WIN32)
-  set(sycl_runtime_version 7)
-  find_library(
-    SYCL_RUNTIME_LIBRARY
-    NAMES "sycl${sycl_runtime_version}"
-    HINTS ${SYCL_LIBRARY_DIR}
-  )
-  if(SYCL_RUNTIME_LIBRARY STREQUAL "SYCL_RUNTIME_LIBRARY-NOTFOUND")
-    message(FATAL_ERROR "Cannot find a SYCL library on Windows")
-  endif()
-endif()
-list(APPEND SYCL_LIBRARIES ${SYCL_RUNTIME_LIBRARY})
-
 # Parse HOST_COMPILATION mode.
 option(SYCL_HOST_COMPILATION_CXX "Generated file extension" ON)
 
 
@@ -12,10 +12,10 @@
 #include <ATen/native/xpu/sycl/BinaryRemainderKernel.h>
 #include <ATen/native/xpu/sycl/CopysignKernel.h>
 #include <ATen/native/xpu/sycl/GcdLcmKernels.h>
+#include <ATen/native/xpu/sycl/LogAddExpKernels.h>
 #include <ATen/native/xpu/sycl/MaxMinElementwiseKernels.h>
 
 namespace at {
-
 Tensor XPUNativeFunctions::add(
     const Tensor& self,
     const Tensor& other,
@@ -459,6 +459,28 @@ Tensor& XPUNativeFunctions::minimum_out(
   return output;
 }
 
+Tensor& XPUNativeFunctions::logit_backward_out(
+    const Tensor& grad_output,
+    const Tensor& input,
+    std::optional<double> eps,
+    Tensor& grad_input) {
+  TensorIterator iter;
+  iter.build_borrowing_binary_op(grad_input, grad_output, input);
+  native::xpu::logit_backward_kernel(iter, Scalar(eps ? eps.value() : -1.0));
+  return grad_input;
+}
+
+Tensor XPUNativeFunctions::logit_backward(
+    const Tensor& grad_output,
+    const Tensor& input,
+    std::optional<double> eps) {
+  Tensor grad_input;
+  TensorIterator iter;
+  iter.build_borrowing_binary_op(grad_input, grad_output, input);
+  native::xpu::logit_backward_kernel(iter, Scalar(eps ? eps.value() : -1.0));
+  return iter.output();
+}
+
 Tensor& XPUNativeFunctions::sigmoid_backward_out(
     const Tensor& grad_output,
     const Tensor& output,
@@ -479,6 +501,109 @@ Tensor XPUNativeFunctions::sigmoid_backward(
   return iter.output();
 }
 
+Tensor XPUNativeFunctions::logaddexp(const Tensor& self, const Tensor& other) {
+  Tensor out;
+  auto iter = TensorIterator::borrowing_binary_op(out, self, other);
+  native::xpu::logaddexp_kernel(iter);
+  return iter.output();
+}
+
+Tensor& XPUNativeFunctions::logaddexp_out(
+    const Tensor& self,
+    const Tensor& other,
+    Tensor& out) {
+  auto iter = TensorIterator::borrowing_binary_op(out, self, other);
+  native::xpu::logaddexp_kernel(iter);
+  return out;
+}
+
+Tensor XPUNativeFunctions::logaddexp2(const Tensor& self, const Tensor& other) {
+  Tensor out;
+  auto iter = TensorIterator::borrowing_binary_op(out, self, other);
+  native::xpu::logaddexp2_kernel(iter);
+  return iter.output();
+}
+
+Tensor& XPUNativeFunctions::logaddexp2_out(
+    const Tensor& self,
+    const Tensor& other,
+    Tensor& out) {
+  auto iter = TensorIterator::borrowing_binary_op(out, self, other);
+  native::xpu::logaddexp2_kernel(iter);
+  return out;
+}
+
+Tensor& XPUNativeFunctions::floor_divide_out(
+    const Tensor& self,
+    const Tensor& other,
+    Tensor& output) {
+  auto iter = TensorIterator::binary_op(output, self, other);
+  native::xpu::div_floor_kernel(iter);
+  if (!output.defined()) {
+    output = iter.output();
+  }
+  return output;
+}
+
+Tensor XPUNativeFunctions::floor_divide(
+    const Tensor& self,
+    const Tensor& other) {
+  Tensor output;
+  auto iter = TensorIterator::binary_op(output, self, other);
+  native::xpu::div_floor_kernel(iter);
+  return iter.output();
+}
+
+Tensor& XPUNativeFunctions::floor_divide_(Tensor& self, const Tensor& other) {
+  return XPUNativeFunctions::floor_divide_out(self, other, self);
+}
+
+TensorIterator meta_fmin_fmax(
+    const char* const name,
+    const Tensor& self,
+    const Tensor& other,
+    Tensor& output) {
+  TORCH_CHECK(
+      !self.is_complex() && !other.is_complex(),
+      name,
+      " not implemented for complex tensors.");
+  TensorIterator iter;
+  iter.build_binary_op(output, self, other);
+  return iter;
+}
+
+Tensor& XPUNativeFunctions::fmax_out(
+    const Tensor& self,
+    const Tensor& other,
+    Tensor& output) {
+  auto iter = meta_fmin_fmax("fmax", self, other, output);
+  native::xpu::fmax_kernel(iter);
+  return output;
+}
+
+Tensor XPUNativeFunctions::fmax(const Tensor& self, const Tensor& other) {
+  Tensor output;
+  auto iter = meta_fmin_fmax("fmax", self, other, output);
+  native::xpu::fmax_kernel(iter);
+  return iter.output();
+}
+
+Tensor& XPUNativeFunctions::fmin_out(
+    const Tensor& self,
+    const Tensor& other,
+    Tensor& output) {
+  auto iter = meta_fmin_fmax("fmin", self, other, output);
+  native::xpu::fmin_kernel(iter);
+  return output;
+}
+
+Tensor XPUNativeFunctions::fmin(const Tensor& self, const Tensor& other) {
+  Tensor output;
+  auto iter = meta_fmin_fmax("fmin", self, other, output);
+  native::xpu::fmin_kernel(iter);
+  return iter.output();
+}
+
 Tensor XPUNativeFunctions::atan2(const Tensor& self, const Tensor& other) {
   Tensor out;
   TensorIterator iter;
 
@@ -191,6 +191,24 @@ Tensor& XPUNativeFunctions::random_(
   return random_(self, 0, to, std::move(generator));
 }
 
+template <typename RNG>
+struct ExponentialStub {
+  void operator()(
+      TensorIteratorBase& iter,
+      double lambda,
+      c10::optional<Generator> gen) {
+    native::xpu::exponential_kernel(iter, lambda, gen);
+  }
+};
+
+Tensor& XPUNativeFunctions::exponential_(
+    Tensor& self,
+    double lambda,
+    std::optional<Generator> generator) {
+  return native::templates::exponential_impl_<ExponentialStub, Generator>(
+      self, lambda, std::move(generator));
+}
+
 /* The largest consecutive integer representable in float32 (2^24) */
 constexpr int64_t FLOAT32_MAX_CONSECUTIVE_INT = 1 << (24);
 
 
@@ -2,6 +2,7 @@
 #include <ATen/core/Reduction.h>
 #include <ATen/core/Tensor.h>
 #include <ATen/native/xpu/sycl/BinaryMiscOpsKernels.h>
+#include <ATen/native/xpu/sycl/LossKernels.h>
 #include <ATen/native/xpu/sycl/PointwiseOpsKernels.h>
 #include <ATen/xpu/XPUNativeFunctions.h>
 #include <comm/RegisterUtils.h>
@@ -79,6 +80,60 @@ Tensor& XPUNativeFunctions::mse_loss_backward_out(
   return grad_input;
 }
 
+Tensor XPUNativeFunctions::binary_cross_entropy(
+    const Tensor& self,
+    const Tensor& target,
+    const std::optional<Tensor>& weight_opt,
+    int64_t reduction) {
+  c10::MaybeOwned<Tensor> weight_maybe_owned =
+      at::borrow_from_optional_tensor(weight_opt);
+  const Tensor& weight = *weight_maybe_owned;
+  Tensor loss = at::empty_like(self);
+  return native::xpu::binary_cross_entropy_kernel(
+      self, target, weight, reduction, loss);
+}
+
+Tensor& XPUNativeFunctions::binary_cross_entropy_out(
+    const Tensor& self,
+    const Tensor& target,
+    const std::optional<Tensor>& weight_opt,
+    int64_t reduction,
+    Tensor& loss) {
+  c10::MaybeOwned<Tensor> weight_maybe_owned =
+      at::borrow_from_optional_tensor(weight_opt);
+  const Tensor& weight = *weight_maybe_owned;
+  return native::xpu::binary_cross_entropy_kernel(
+      self, target, weight, reduction, loss);
+}
+
+Tensor XPUNativeFunctions::binary_cross_entropy_backward(
+    const Tensor& grad_output,
+    const Tensor& self,
+    const Tensor& target,
+    const std::optional<Tensor>& weight_opt,
+    int64_t reduction) {
+  c10::MaybeOwned<Tensor> weight_maybe_owned =
+      at::borrow_from_optional_tensor(weight_opt);
+  const Tensor& weight = *weight_maybe_owned;
+  Tensor grad_input = at::empty_like(self);
+  return native::xpu::binary_cross_entropy_backward_kernel(
+      grad_output, self, target, weight, reduction, grad_input);
+}
+
+Tensor& XPUNativeFunctions::binary_cross_entropy_backward_out(
+    const Tensor& grad_output,
+    const Tensor& self,
+    const Tensor& target,
+    const std::optional<Tensor>& weight_opt,
+    int64_t reduction,
+    Tensor& grad_input) {
+  c10::MaybeOwned<Tensor> weight_maybe_owned =
+      at::borrow_from_optional_tensor(weight_opt);
+  const Tensor& weight = *weight_maybe_owned;
+  return native::xpu::binary_cross_entropy_backward_kernel(
+      grad_output, self, target, weight, reduction, grad_input);
+}
+
 Tensor XPUNativeFunctions::huber_loss(
     const Tensor& input,
     const Tensor& target,
 
@@ -0,0 +1,110 @@
+#include <ATen/ATen.h>
+#include <ATen/AccumulateType.h>
+#include <ATen/core/Tensor.h>
+#include <ATen/native/xpu/sycl/RenormKernel.h>
+#include <ATen/xpu/XPUNativeFunctions.h>
+#include <comm/RegisterUtils.h>
+
+namespace at {
+
+void renorm_meta(
+    const Tensor& self,
+    const Scalar& p,
+    int64_t dim,
+    const Scalar& maxnorm,
+    Tensor& output) {
+  TORCH_CHECK(!p.isComplex(), "renorm: p must be real-valued");
+  TORCH_CHECK(p.toDouble() > 0.0, "renorm: non-positive-norm not supported");
+  TORCH_CHECK(!maxnorm.isComplex(), "renorm: maxnorm must be real-valued");
+  TORCH_CHECK(
+      maxnorm.toDouble() >= 0.0,
+      "renorm: expected maxnorm to be >= 0 but got ",
+      maxnorm.toDouble());
+  const auto ndim = self.dim();
+  TORCH_CHECK(
+      ndim > 1,
+      "renorm: input needs at least 2 dimensions, got ",
+      ndim,
+      " dimensions");
+  if (output.defined()) {
+    xpu::resize_out(output, self.sizes(), {}, self.options());
+  } else {
+    output = xpu::create_out(self.sizes(), {}, self.options());
+  }
+}
+
+Tensor& renorm_impl(
+    const Tensor& self,
+    const Scalar& p,
+    int64_t dim,
+    const Scalar& maxnorm,
+    Tensor& out) {
+  auto self_sizes = self.sizes();
+  dim = c10::maybe_wrap_dim(dim, self_sizes.size());
+
+  DimVector reduce_dims(self_sizes.size());
+  std::iota(reduce_dims.begin(), reduce_dims.end(), 0);
+  reduce_dims.erase(reduce_dims.begin() + dim);
+
+  auto dtype = self.scalar_type();
+  auto acc_type = at::toAccumulateType(dtype, c10::DeviceType::XPU);
+  Tensor norm;
+  if (acc_type != dtype) {
+    norm = at::linalg_vector_norm(
+        self,
+        p.toDouble(),
+        reduce_dims,
+        /*keepdim=*/true,
+        /*dtype=*/acc_type);
+  } else {
+    norm = at::linalg_vector_norm(
+        self,
+        p.toDouble(),
+        reduce_dims,
+        /*keepdim=*/true);
+  }
+
+  auto factor = (acc_type == c10::toRealValueType(dtype))
+      ? norm
+      : at::empty(norm.sizes(), self.options());
+  auto iter = TensorIteratorConfig()
+                  .add_output(factor)
+                  .add_input(norm)
+                  .set_check_mem_overlap(false)
+                  .cast_common_dtype_to_outputs(true)
+                  .build();
+
+  at::native::xpu::renorm_scale_factor_kernel(iter, maxnorm.toDouble());
+  return at::mul_outf(self, factor, const_cast<Tensor&>(out));
+}
+
+Tensor& XPUNativeFunctions::renorm_(
+    Tensor& self,
+    const Scalar& p,
+    int64_t dim,
+    const Scalar& maxnorm) {
+  renorm_meta(self, p, dim, maxnorm, self);
+  renorm_impl(self, p, dim, maxnorm, self);
+  return self;
+}
+Tensor& XPUNativeFunctions::renorm_out(
+    const Tensor& self,
+    const Scalar& p,
+    int64_t dim,
+    const Scalar& maxnorm,
+    Tensor& out) {
+  renorm_meta(self, p, dim, maxnorm, out);
+  renorm_impl(self, p, dim, maxnorm, out);
+  return out;
+}
+Tensor XPUNativeFunctions::renorm(
+    const Tensor& self,
+    const Scalar& p,
+    int64_t dim,
+    const Scalar& maxnorm) {
+  Tensor out;
+  renorm_meta(self, p, dim, maxnorm, out);
+  renorm_impl(self, p, dim, maxnorm, out);
+  return out;
+}
+} // namespace at