Skip to content

Commit

Permalink
Add aten::huber_loss/backward and their variants (#562)
Browse files Browse the repository at this point in the history
Task list:
- [x] huber_loss
- [x] huber_loss.out
- [x] huber_loss_backward.out
  • Loading branch information
xytintel authored Jul 12, 2024
1 parent 34f00ad commit cbb4ab1
Show file tree
Hide file tree
Showing 9 changed files with 131 additions and 4 deletions.
63 changes: 61 additions & 2 deletions src/ATen/native/xpu/Loss.cpp
Original file line number Diff line number Diff line change
@@ -1,14 +1,24 @@
#include <ATen/ATen.h>
#include <ATen/core/Reduction.h>
#include <ATen/core/Tensor.h>
#include <ATen/xpu/XPUNativeFunctions.h>

#include <ATen/native/xpu/sycl/BinaryMiscOpsKernels.h>
#include <ATen/native/xpu/sycl/PointwiseOpsKernels.h>
#include <ATen/xpu/XPUNativeFunctions.h>
#include <comm/RegisterUtils.h>

namespace at {

static inline at::Tensor apply_loss_reduction(
const at::Tensor& unreduced,
int64_t reduction) {
if (reduction == at::Reduction::Mean) {
return unreduced.mean();
} else if (reduction == at::Reduction::Sum) {
return unreduced.sum();
}
return unreduced;
}

Tensor& XPUNativeFunctions::mse_loss_out(
const Tensor& input,
const Tensor& target,
Expand Down Expand Up @@ -69,4 +79,53 @@ Tensor& XPUNativeFunctions::mse_loss_backward_out(
return grad_input;
}

Tensor XPUNativeFunctions::huber_loss(
const Tensor& input,
const Tensor& target,
int64_t reduction,
double delta) {
TORCH_CHECK(
delta > 0, "huber_loss does not support non-positive values for delta.")
Tensor loss = at::empty_like(input);
auto iter = TensorIterator::borrowing_binary_op(loss, input, target);
native::xpu::huber_kernel(iter, delta);
return apply_loss_reduction(loss, reduction);
}

Tensor& XPUNativeFunctions::huber_loss_out(
const Tensor& input,
const Tensor& target,
int64_t reduction,
double delta,
Tensor& result) {
TORCH_CHECK(
delta > 0, "huber_loss does not support non-positive values for delta.")
auto iter = TensorIterator::borrowing_binary_op(result, input, target);
native::xpu::huber_kernel(iter, delta);
if (reduction != Reduction::None) {
auto reduced = apply_loss_reduction(result, reduction);
result.resize_({});
result.copy_(reduced);
}
return result;
}

Tensor& XPUNativeFunctions::huber_loss_backward_out(
const Tensor& grad_output,
const Tensor& input,
const Tensor& target,
int64_t reduction,
double delta,
Tensor& grad_input) {
auto norm = (reduction == Reduction::Mean) ? (1. / input.numel()) : 1.;
auto iter = at::TensorIteratorConfig()
.add_output(grad_input)
.add_const_input(input)
.add_const_input(target)
.add_const_input(grad_output)
.build();
native::xpu::huber_backward_kernel(iter, norm, delta);
return grad_input;
}

} // namespace at
2 changes: 0 additions & 2 deletions src/ATen/native/xpu/XPUFallback.template
Original file line number Diff line number Diff line change
Expand Up @@ -210,8 +210,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
"hardshrink.out",
"heaviside.out",
"histc",
"huber_loss",
"huber_loss_backward.out",
"i0.out",
"igammac.out",
"igamma.out",
Expand Down
21 changes: 21 additions & 0 deletions src/ATen/native/xpu/sycl/BinaryMiscOpsKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,25 @@ void mse_kernel(TensorIteratorBase& iter) {
[&]() { gpu_kernel(iter, MSEFunctor<scalar_t>()); });
}

template <typename scalar_t>
struct HuberFunctor {
scalar_t operator()(scalar_t a, scalar_t b) const {
auto z = std::abs(a - b);
return z < delta_val_ ? scalar_t(0.5) * z * z
: delta_val_ * (z - scalar_t(0.5) * delta_val_);
}
HuberFunctor(scalar_t delta_val) : delta_val_(delta_val) {}

private:
scalar_t delta_val_;
};

void huber_kernel(TensorIterator& iter, double delta) {
AT_DISPATCH_FLOATING_TYPES_AND2(
kBFloat16, kHalf, iter.dtype(), "huber_xpu", [&iter, delta] {
scalar_t delta_val(delta);
gpu_kernel(iter, HuberFunctor<scalar_t>(delta_val));
});
}

} // namespace at::native::xpu
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/BinaryMiscOpsKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ namespace at::native::xpu {

void mse_kernel(TensorIteratorBase& iter);

void huber_kernel(TensorIterator& iter, double delta);

} // namespace at::native::xpu
37 changes: 37 additions & 0 deletions src/ATen/native/xpu/sycl/PointwiseOpsKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,4 +125,41 @@ void mse_backward_kernel(TensorIterator& iter, const Scalar& value) {
});
}

template <typename scalar_t>
struct HuberBackwardFunctor {
scalar_t operator()(scalar_t input, scalar_t target, scalar_t grad_output)
const {
const auto x = input - target;
if (x < -delta_val_) {
return -norm_val_ * grad_output * delta_val_;
} else if (x > delta_val_) {
return norm_val_ * grad_output * delta_val_;
} else {
return norm_val_ * x * grad_output;
}
}
HuberBackwardFunctor(scalar_t norm_val, scalar_t delta_val)
: norm_val_(norm_val), delta_val_(delta_val) {}

private:
scalar_t norm_val_;
scalar_t delta_val_;
};

void huber_backward_kernel(
TensorIterator& iter,
const Scalar& norm,
double delta) {
AT_DISPATCH_FLOATING_TYPES_AND2(
kBFloat16,
kHalf,
iter.dtype(),
"huber_backward_xpu",
[&iter, &norm, delta] {
auto norm_val = norm.to<scalar_t>();
scalar_t delta_val(delta);
gpu_kernel(iter, HuberBackwardFunctor<scalar_t>(norm_val, delta_val));
});
}

} // namespace at::native::xpu
5 changes: 5 additions & 0 deletions src/ATen/native/xpu/sycl/PointwiseOpsKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,9 @@ void addcdiv_kernel(TensorIterator& iter, Scalar value);

void mse_backward_kernel(TensorIterator& iter, const Scalar& value);

void huber_backward_kernel(
TensorIterator& iter,
const Scalar& norm,
double delta);

} // namespace at::native::xpu
1 change: 1 addition & 0 deletions test/xpu/extended/run_test_with_skip.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@
"test_compare_cpu_nn_functional_batch_norm_xpu_bfloat16",
"test_compare_cpu__batch_norm_with_update_xpu_bfloat16",
"test_compare_cpu__batch_norm_with_update_xpu_float16",
"test_compare_cpu_nn_functional_huber_loss_xpu_bfloat16",

# Not implemented operators, aten::upsample_linear1d, aten::upsample_bilinear2d,
# aten::upsample_trilinear3d
Expand Down
1 change: 1 addition & 0 deletions test/xpu/xpu_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@
"nn.functional.upsample_nearest",
# "nn.functional.nll_loss", # Lack of XPU implementation of aten::nll_loss2d_forward. Will retrieve the case, only if the op is implemented.
"nn.functional.mse_loss",
"nn.functional.huber_loss",
"sigmoid",
"sgn",
"nn.functional.embedding_bag",
Expand Down
3 changes: 3 additions & 0 deletions yaml/xpu_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,9 @@ supported:
- nll_loss_forward
- nll_loss_backward.grad_input
- nll_loss_backward
- huber_loss
- huber_loss.out
- huber_loss_backward.out
- batch_norm_stats
- batch_norm_elemt
- batch_norm_elemt.out
Expand Down

0 comments on commit cbb4ab1

Please sign in to comment.