Skip to content

Commit

Permalink
Merge branch 'main' into tongsu/dilated_max_pool
Browse files Browse the repository at this point in the history
  • Loading branch information
Stonepia authored Jul 22, 2024
2 parents 3216512 + 5f41843 commit abcae7d
Show file tree
Hide file tree
Showing 50 changed files with 2,876 additions and 175 deletions.
20 changes: 0 additions & 20 deletions cmake/Modules/FindSYCL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -71,26 +71,6 @@ else()
)
endif()

set(SYCL_LIBRARIES)
find_library(SYCL_RUNTIME_LIBRARY sycl HINTS ${SYCL_LIBRARY_DIR})
# On Windows, currently there's no sycl.lib. Only sycl7.lib with version suffix,
# where the current version of the SYCL runtime is 7.
# Until oneAPI adds support to sycl.lib without the version suffix,
# sycl_runtime_version needs to be hardcoded and uplifted when SYCL runtime version uplifts.
# TODO: remove this when sycl.lib is supported on Windows
if(WIN32)
set(sycl_runtime_version 7)
find_library(
SYCL_RUNTIME_LIBRARY
NAMES "sycl${sycl_runtime_version}"
HINTS ${SYCL_LIBRARY_DIR}
)
if(SYCL_RUNTIME_LIBRARY STREQUAL "SYCL_RUNTIME_LIBRARY-NOTFOUND")
message(FATAL_ERROR "Cannot find a SYCL library on Windows")
endif()
endif()
list(APPEND SYCL_LIBRARIES ${SYCL_RUNTIME_LIBRARY})

# Parse HOST_COMPILATION mode.
option(SYCL_HOST_COMPILATION_CXX "Generated file extension" ON)

Expand Down
127 changes: 126 additions & 1 deletion src/ATen/native/xpu/BinaryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
#include <ATen/native/xpu/sycl/BinaryRemainderKernel.h>
#include <ATen/native/xpu/sycl/CopysignKernel.h>
#include <ATen/native/xpu/sycl/GcdLcmKernels.h>
#include <ATen/native/xpu/sycl/LogAddExpKernels.h>
#include <ATen/native/xpu/sycl/MaxMinElementwiseKernels.h>

namespace at {

Tensor XPUNativeFunctions::add(
const Tensor& self,
const Tensor& other,
Expand Down Expand Up @@ -459,6 +459,28 @@ Tensor& XPUNativeFunctions::minimum_out(
return output;
}

Tensor& XPUNativeFunctions::logit_backward_out(
const Tensor& grad_output,
const Tensor& input,
std::optional<double> eps,
Tensor& grad_input) {
TensorIterator iter;
iter.build_borrowing_binary_op(grad_input, grad_output, input);
native::xpu::logit_backward_kernel(iter, Scalar(eps ? eps.value() : -1.0));
return grad_input;
}

Tensor XPUNativeFunctions::logit_backward(
const Tensor& grad_output,
const Tensor& input,
std::optional<double> eps) {
Tensor grad_input;
TensorIterator iter;
iter.build_borrowing_binary_op(grad_input, grad_output, input);
native::xpu::logit_backward_kernel(iter, Scalar(eps ? eps.value() : -1.0));
return iter.output();
}

Tensor& XPUNativeFunctions::sigmoid_backward_out(
const Tensor& grad_output,
const Tensor& output,
Expand All @@ -479,6 +501,109 @@ Tensor XPUNativeFunctions::sigmoid_backward(
return iter.output();
}

Tensor XPUNativeFunctions::logaddexp(const Tensor& self, const Tensor& other) {
Tensor out;
auto iter = TensorIterator::borrowing_binary_op(out, self, other);
native::xpu::logaddexp_kernel(iter);
return iter.output();
}

Tensor& XPUNativeFunctions::logaddexp_out(
const Tensor& self,
const Tensor& other,
Tensor& out) {
auto iter = TensorIterator::borrowing_binary_op(out, self, other);
native::xpu::logaddexp_kernel(iter);
return out;
}

Tensor XPUNativeFunctions::logaddexp2(const Tensor& self, const Tensor& other) {
Tensor out;
auto iter = TensorIterator::borrowing_binary_op(out, self, other);
native::xpu::logaddexp2_kernel(iter);
return iter.output();
}

Tensor& XPUNativeFunctions::logaddexp2_out(
const Tensor& self,
const Tensor& other,
Tensor& out) {
auto iter = TensorIterator::borrowing_binary_op(out, self, other);
native::xpu::logaddexp2_kernel(iter);
return out;
}

Tensor& XPUNativeFunctions::floor_divide_out(
const Tensor& self,
const Tensor& other,
Tensor& output) {
auto iter = TensorIterator::binary_op(output, self, other);
native::xpu::div_floor_kernel(iter);
if (!output.defined()) {
output = iter.output();
}
return output;
}

Tensor XPUNativeFunctions::floor_divide(
const Tensor& self,
const Tensor& other) {
Tensor output;
auto iter = TensorIterator::binary_op(output, self, other);
native::xpu::div_floor_kernel(iter);
return iter.output();
}

Tensor& XPUNativeFunctions::floor_divide_(Tensor& self, const Tensor& other) {
return XPUNativeFunctions::floor_divide_out(self, other, self);
}

TensorIterator meta_fmin_fmax(
const char* const name,
const Tensor& self,
const Tensor& other,
Tensor& output) {
TORCH_CHECK(
!self.is_complex() && !other.is_complex(),
name,
" not implemented for complex tensors.");
TensorIterator iter;
iter.build_binary_op(output, self, other);
return iter;
}

Tensor& XPUNativeFunctions::fmax_out(
const Tensor& self,
const Tensor& other,
Tensor& output) {
auto iter = meta_fmin_fmax("fmax", self, other, output);
native::xpu::fmax_kernel(iter);
return output;
}

Tensor XPUNativeFunctions::fmax(const Tensor& self, const Tensor& other) {
Tensor output;
auto iter = meta_fmin_fmax("fmax", self, other, output);
native::xpu::fmax_kernel(iter);
return iter.output();
}

Tensor& XPUNativeFunctions::fmin_out(
const Tensor& self,
const Tensor& other,
Tensor& output) {
auto iter = meta_fmin_fmax("fmin", self, other, output);
native::xpu::fmin_kernel(iter);
return output;
}

Tensor XPUNativeFunctions::fmin(const Tensor& self, const Tensor& other) {
Tensor output;
auto iter = meta_fmin_fmax("fmin", self, other, output);
native::xpu::fmin_kernel(iter);
return iter.output();
}

Tensor XPUNativeFunctions::atan2(const Tensor& self, const Tensor& other) {
Tensor out;
TensorIterator iter;
Expand Down
18 changes: 18 additions & 0 deletions src/ATen/native/xpu/Distributions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,24 @@ Tensor& XPUNativeFunctions::random_(
return random_(self, 0, to, std::move(generator));
}

template <typename RNG>
struct ExponentialStub {
void operator()(
TensorIteratorBase& iter,
double lambda,
c10::optional<Generator> gen) {
native::xpu::exponential_kernel(iter, lambda, gen);
}
};

Tensor& XPUNativeFunctions::exponential_(
Tensor& self,
double lambda,
std::optional<Generator> generator) {
return native::templates::exponential_impl_<ExponentialStub, Generator>(
self, lambda, std::move(generator));
}

/* The largest consecutive integer representable in float32 (2^24) */
constexpr int64_t FLOAT32_MAX_CONSECUTIVE_INT = 1 << (24);

Expand Down
55 changes: 55 additions & 0 deletions src/ATen/native/xpu/Loss.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <ATen/core/Reduction.h>
#include <ATen/core/Tensor.h>
#include <ATen/native/xpu/sycl/BinaryMiscOpsKernels.h>
#include <ATen/native/xpu/sycl/LossKernels.h>
#include <ATen/native/xpu/sycl/PointwiseOpsKernels.h>
#include <ATen/xpu/XPUNativeFunctions.h>
#include <comm/RegisterUtils.h>
Expand Down Expand Up @@ -79,6 +80,60 @@ Tensor& XPUNativeFunctions::mse_loss_backward_out(
return grad_input;
}

Tensor XPUNativeFunctions::binary_cross_entropy(
const Tensor& self,
const Tensor& target,
const std::optional<Tensor>& weight_opt,
int64_t reduction) {
c10::MaybeOwned<Tensor> weight_maybe_owned =
at::borrow_from_optional_tensor(weight_opt);
const Tensor& weight = *weight_maybe_owned;
Tensor loss = at::empty_like(self);
return native::xpu::binary_cross_entropy_kernel(
self, target, weight, reduction, loss);
}

Tensor& XPUNativeFunctions::binary_cross_entropy_out(
const Tensor& self,
const Tensor& target,
const std::optional<Tensor>& weight_opt,
int64_t reduction,
Tensor& loss) {
c10::MaybeOwned<Tensor> weight_maybe_owned =
at::borrow_from_optional_tensor(weight_opt);
const Tensor& weight = *weight_maybe_owned;
return native::xpu::binary_cross_entropy_kernel(
self, target, weight, reduction, loss);
}

Tensor XPUNativeFunctions::binary_cross_entropy_backward(
const Tensor& grad_output,
const Tensor& self,
const Tensor& target,
const std::optional<Tensor>& weight_opt,
int64_t reduction) {
c10::MaybeOwned<Tensor> weight_maybe_owned =
at::borrow_from_optional_tensor(weight_opt);
const Tensor& weight = *weight_maybe_owned;
Tensor grad_input = at::empty_like(self);
return native::xpu::binary_cross_entropy_backward_kernel(
grad_output, self, target, weight, reduction, grad_input);
}

Tensor& XPUNativeFunctions::binary_cross_entropy_backward_out(
const Tensor& grad_output,
const Tensor& self,
const Tensor& target,
const std::optional<Tensor>& weight_opt,
int64_t reduction,
Tensor& grad_input) {
c10::MaybeOwned<Tensor> weight_maybe_owned =
at::borrow_from_optional_tensor(weight_opt);
const Tensor& weight = *weight_maybe_owned;
return native::xpu::binary_cross_entropy_backward_kernel(
grad_output, self, target, weight, reduction, grad_input);
}

Tensor XPUNativeFunctions::huber_loss(
const Tensor& input,
const Tensor& target,
Expand Down
110 changes: 110 additions & 0 deletions src/ATen/native/xpu/Normalization.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#include <ATen/ATen.h>
#include <ATen/AccumulateType.h>
#include <ATen/core/Tensor.h>
#include <ATen/native/xpu/sycl/RenormKernel.h>
#include <ATen/xpu/XPUNativeFunctions.h>
#include <comm/RegisterUtils.h>

namespace at {

void renorm_meta(
const Tensor& self,
const Scalar& p,
int64_t dim,
const Scalar& maxnorm,
Tensor& output) {
TORCH_CHECK(!p.isComplex(), "renorm: p must be real-valued");
TORCH_CHECK(p.toDouble() > 0.0, "renorm: non-positive-norm not supported");
TORCH_CHECK(!maxnorm.isComplex(), "renorm: maxnorm must be real-valued");
TORCH_CHECK(
maxnorm.toDouble() >= 0.0,
"renorm: expected maxnorm to be >= 0 but got ",
maxnorm.toDouble());
const auto ndim = self.dim();
TORCH_CHECK(
ndim > 1,
"renorm: input needs at least 2 dimensions, got ",
ndim,
" dimensions");
if (output.defined()) {
xpu::resize_out(output, self.sizes(), {}, self.options());
} else {
output = xpu::create_out(self.sizes(), {}, self.options());
}
}

Tensor& renorm_impl(
const Tensor& self,
const Scalar& p,
int64_t dim,
const Scalar& maxnorm,
Tensor& out) {
auto self_sizes = self.sizes();
dim = c10::maybe_wrap_dim(dim, self_sizes.size());

DimVector reduce_dims(self_sizes.size());
std::iota(reduce_dims.begin(), reduce_dims.end(), 0);
reduce_dims.erase(reduce_dims.begin() + dim);

auto dtype = self.scalar_type();
auto acc_type = at::toAccumulateType(dtype, c10::DeviceType::XPU);
Tensor norm;
if (acc_type != dtype) {
norm = at::linalg_vector_norm(
self,
p.toDouble(),
reduce_dims,
/*keepdim=*/true,
/*dtype=*/acc_type);
} else {
norm = at::linalg_vector_norm(
self,
p.toDouble(),
reduce_dims,
/*keepdim=*/true);
}

auto factor = (acc_type == c10::toRealValueType(dtype))
? norm
: at::empty(norm.sizes(), self.options());
auto iter = TensorIteratorConfig()
.add_output(factor)
.add_input(norm)
.set_check_mem_overlap(false)
.cast_common_dtype_to_outputs(true)
.build();

at::native::xpu::renorm_scale_factor_kernel(iter, maxnorm.toDouble());
return at::mul_outf(self, factor, const_cast<Tensor&>(out));
}

Tensor& XPUNativeFunctions::renorm_(
Tensor& self,
const Scalar& p,
int64_t dim,
const Scalar& maxnorm) {
renorm_meta(self, p, dim, maxnorm, self);
renorm_impl(self, p, dim, maxnorm, self);
return self;
}
Tensor& XPUNativeFunctions::renorm_out(
const Tensor& self,
const Scalar& p,
int64_t dim,
const Scalar& maxnorm,
Tensor& out) {
renorm_meta(self, p, dim, maxnorm, out);
renorm_impl(self, p, dim, maxnorm, out);
return out;
}
Tensor XPUNativeFunctions::renorm(
const Tensor& self,
const Scalar& p,
int64_t dim,
const Scalar& maxnorm) {
Tensor out;
renorm_meta(self, p, dim, maxnorm, out);
renorm_impl(self, p, dim, maxnorm, out);
return out;
}
} // namespace at
Loading

0 comments on commit abcae7d

Please sign in to comment.