Skip to content

Commit

Permalink
Add aten::logit/backward and variant operators (#546)
Browse files Browse the repository at this point in the history
Co-authored-by: Feng Yuan <[email protected]>
  • Loading branch information
yucai-intel and fengyuan14 authored Jul 20, 2024
1 parent c96d313 commit b8888da
Show file tree
Hide file tree
Showing 10 changed files with 169 additions and 10 deletions.
22 changes: 22 additions & 0 deletions src/ATen/native/xpu/BinaryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -459,6 +459,28 @@ Tensor& XPUNativeFunctions::minimum_out(
return output;
}

Tensor& XPUNativeFunctions::logit_backward_out(
const Tensor& grad_output,
const Tensor& input,
std::optional<double> eps,
Tensor& grad_input) {
TensorIterator iter;
iter.build_borrowing_binary_op(grad_input, grad_output, input);
native::xpu::logit_backward_kernel(iter, Scalar(eps ? eps.value() : -1.0));
return grad_input;
}

Tensor XPUNativeFunctions::logit_backward(
const Tensor& grad_output,
const Tensor& input,
std::optional<double> eps) {
Tensor grad_input;
TensorIterator iter;
iter.build_borrowing_binary_op(grad_input, grad_output, input);
native::xpu::logit_backward_kernel(iter, Scalar(eps ? eps.value() : -1.0));
return iter.output();
}

Tensor& XPUNativeFunctions::sigmoid_backward_out(
const Tensor& grad_output,
const Tensor& output,
Expand Down
24 changes: 24 additions & 0 deletions src/ATen/native/xpu/UnaryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,30 @@ Tensor& XPUNativeFunctions::sigmoid_out(const Tensor& self, Tensor& out) {
return out;
}

Tensor& XPUNativeFunctions::logit_out(
const Tensor& self,
std::optional<double> eps,
Tensor& out) {
TensorIterator iter;
iter.build_borrowing_unary_float_op(out, self);
native::xpu::logit_kernel(iter, Scalar(eps ? eps.value() : -1.0));
return out;
}

Tensor XPUNativeFunctions::logit(
const Tensor& self,
std::optional<double> eps) {
Tensor out;
TensorIterator iter;
iter.build_borrowing_unary_float_op(out, self);
native::xpu::logit_kernel(iter, Scalar(eps ? eps.value() : -1.0));
return iter.output();
}

Tensor& XPUNativeFunctions::logit_(Tensor& self, std::optional<double> eps) {
return at::logit_out(self, self, eps);
}

Tensor XPUNativeFunctions::sgn(const Tensor& self) {
Tensor out;
TensorIterator iter;
Expand Down
2 changes: 0 additions & 2 deletions src/ATen/native/xpu/XPUFallback.template
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
"_linalg_svd.U",
"linspace.out",
"_logcumsumexp",
"logit",
"logit_backward.grad_input",
"log_normal_",
"logspace.out",
"lu_unpack.out",
Expand Down
54 changes: 53 additions & 1 deletion src/ATen/native/xpu/sycl/BinaryMiscBackwardOpsKernels.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#include <ATen/ATen.h>
#include <ATen/AccumulateType.h>
#include <ATen/Dispatch.h>
#include <ATen/NumericUtils.h>
#include <ATen/native/Activation.h>
#include <ATen/native/TensorIterator.h>

#include <ATen/native/xpu/sycl/Loops.h>

namespace at::native::xpu {
Expand Down Expand Up @@ -43,6 +43,58 @@ void sigmoid_backward_kernel(TensorIteratorBase& iter) {
}
}

template <typename scalar_t>
struct LogitBackward0Functor {
using T_ACC = acc_type_device<scalar_t, c10::DeviceType::XPU>;
scalar_t operator()(scalar_t dy, scalar_t x) const {
const T_ACC dy_acc = static_cast<T_ACC>(dy);
const T_ACC x_acc = static_cast<T_ACC>(x);
// suppress compiler optimization on data type promotion.
volatile T_ACC res = (x_acc < T_ACC(0) || x_acc > T_ACC(1))
? std::numeric_limits<T_ACC>::quiet_NaN()
: dy_acc / (x_acc * (T_ACC(1) - x_acc));
return res;
}
};

template <typename scalar_t>
struct LogitBackward1Functor {
using T_ACC = acc_type_device<scalar_t, c10::DeviceType::XPU>;
scalar_t operator()(scalar_t dy, scalar_t x) const {
const T_ACC dy_acc = static_cast<T_ACC>(dy);
const T_ACC x_acc = static_cast<T_ACC>(x);
// suppress compiler optimization on data type promotion.
volatile T_ACC res = (x_acc < lo_ || x_acc > hi_)
? T_ACC(0)
: dy_acc / (x_acc * (T_ACC(1) - x_acc));
return res;
}
LogitBackward1Functor(const T_ACC lo, const T_ACC hi) : lo_(lo), hi_(hi) {}

private:
T_ACC lo_;
T_ACC hi_;
};

void logit_backward_kernel(TensorIteratorBase& iter, const Scalar& eps_scalar) {
AT_DISPATCH_FLOATING_TYPES_AND2(
at::ScalarType::Half,
at::ScalarType::BFloat16,
iter.dtype(),
"logit_xpu",
[&]() {
using T_ACC = acc_type<scalar_t, true>;
const T_ACC eps = eps_scalar.to<T_ACC>();
if (eps < T_ACC(0)) {
gpu_kernel(iter, LogitBackward0Functor<scalar_t>());
} else {
const T_ACC lo = eps;
const T_ACC hi = T_ACC(1) - eps;
gpu_kernel(iter, LogitBackward1Functor<scalar_t>(lo, hi));
}
});
}

template <typename scalar_t>
struct TanhBackwardComplexFunctor {
scalar_t operator()(scalar_t a, scalar_t b) const {
Expand Down
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/BinaryMiscBackwardOpsKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,6 @@ void sigmoid_backward_kernel(TensorIteratorBase& iter);

void tanh_backward_kernel(TensorIteratorBase& iter);

void logit_backward_kernel(TensorIteratorBase& iter, const Scalar& eps_scalar);

} // namespace at::native::xpu
56 changes: 53 additions & 3 deletions src/ATen/native/xpu/sycl/UnarySpecialOpsKernels.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
#include <ATen/ATen.h>

#include <ATen/AccumulateType.h>
#include <ATen/Dispatch.h>
#include <ATen/NumericUtils.h>
#include <ATen/core/Tensor.h>
#include <ATen/native/Math.h>
#include <ATen/native/TensorIterator.h>
#include <c10/core/ScalarType.h>

#include <ATen/native/xpu/sycl/Loops.h>
#include <c10/core/Scalar.h>
#include <c10/core/ScalarType.h>
#include <c10/util/complex.h>

namespace at::native::xpu {

Expand Down Expand Up @@ -74,4 +77,51 @@ void erfc_kernel(TensorIteratorBase& iter) {
[&]() { gpu_kernel(iter, ErfcFunctor<scalar_t>()); });
}

template <typename scalar_t>
struct Logit0Functor {
using T_ACC = acc_type_device<scalar_t, c10::DeviceType::XPU>;
scalar_t operator()(scalar_t x) const {
const T_ACC x_acc = static_cast<T_ACC>(x);
// suppress compiler optimization on data type promotion.
volatile T_ACC res = std::log(x_acc / (T_ACC(1) - x_acc));
return res;
}
};

template <typename scalar_t>
struct Logit1Functor {
using T_ACC = acc_type_device<scalar_t, c10::DeviceType::XPU>;
scalar_t operator()(scalar_t x) const {
const T_ACC x_acc = static_cast<T_ACC>(x);
T_ACC z = x_acc < lo_ ? lo_ : (x_acc > hi_ ? hi_ : x_acc);
// suppress compiler optimization on data type promotion.
volatile T_ACC res = std::log(z / (T_ACC(1) - z));
return res;
}
Logit1Functor(const T_ACC lo, const T_ACC hi) : lo_(lo), hi_(hi) {}

private:
T_ACC lo_;
T_ACC hi_;
};

void logit_kernel(TensorIteratorBase& iter, const Scalar& eps_scalar) {
AT_DISPATCH_FLOATING_TYPES_AND2(
at::ScalarType::Half,
at::ScalarType::BFloat16,
iter.common_dtype(),
"logit_xpu",
[&]() {
using T_ACC = acc_type_device<scalar_t, c10::DeviceType::XPU>;
const T_ACC eps = eps_scalar.to<T_ACC>();
if (eps < T_ACC(0)) {
gpu_kernel(iter, Logit0Functor<scalar_t>());
} else {
const T_ACC lo = eps;
const T_ACC hi = T_ACC(1) - eps;
gpu_kernel(iter, Logit1Functor<scalar_t>(lo, hi));
}
});
}

} // namespace at::native::xpu
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/UnarySpecialOpsKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,6 @@ void erf_kernel(TensorIteratorBase& iter);

void erfc_kernel(TensorIteratorBase& iter);

void logit_kernel(TensorIteratorBase& iter, const Scalar& eps_scalar);

} // namespace at::native::xpu
3 changes: 3 additions & 0 deletions test/xpu/extended/run_test_with_skip.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@
"test_compare_cpu__batch_norm_with_update_xpu_bfloat16",
"test_compare_cpu__batch_norm_with_update_xpu_float16",
"test_compare_cpu_nn_functional_huber_loss_xpu_bfloat16",
# Align with CUDA impl by using accumulate type. But CPU doesn't use.
# When XPU uses original data type, the case passes.
"test_compare_cpu_logit_xpu_bfloat16",

# Not implemented operators, aten::upsample_linear1d, aten::upsample_bilinear2d,
# aten::upsample_trilinear3d
Expand Down
1 change: 1 addition & 0 deletions test/xpu/xpu_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
"log2",
"logaddexp",
"logaddexp2",
"logit",
"lt",
"logical_and",
"logical_or",
Expand Down
13 changes: 9 additions & 4 deletions yaml/xpu_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,15 @@ supported:
- log2
- log2_
- log2.out
- logaddexp
- logaddexp.out
- logaddexp2
- logaddexp2.out
- logit
- logit_
- logit.out
- logit_backward.grad_input
- logit_backward
- logical_and
- logical_and_
- logical_and.out
Expand All @@ -203,10 +212,6 @@ supported:
- logical_not
- logical_not_
- logical_not.out
- logaddexp
- logaddexp.out
- logaddexp2
- logaddexp2.out
- sqrt
- sqrt_
- sqrt.out
Expand Down

0 comments on commit b8888da

Please sign in to comment.