Skip to content

Commit

Permalink
Merge branch 'main' into polar
Browse files Browse the repository at this point in the history
  • Loading branch information
fengyuan14 authored Jul 29, 2024
2 parents 1a2c34b + e210c5c commit dc57d2b
Show file tree
Hide file tree
Showing 8 changed files with 126 additions and 2 deletions.
58 changes: 58 additions & 0 deletions src/ATen/native/xpu/Loss.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,64 @@ Tensor& XPUNativeFunctions::mse_loss_backward_out(
return grad_input;
}


Tensor& XPUNativeFunctions::smooth_l1_loss_out(
const Tensor& input,
const Tensor& target,
int64_t reduction,
double beta,
Tensor& result) {
if (reduction != Reduction::None) {
TORCH_INTERNAL_ASSERT(
reduction == Reduction::Mean || reduction == Reduction::Sum);
result.resize_({});
Tensor loss;
auto iter = TensorIterator::borrowing_binary_op(loss, input, target);
native::xpu::smooth_l1_kernel(iter, beta);
if (reduction == Reduction::Mean) {
at::mean_out(const_cast<Tensor&>(result), iter.output(), IntArrayRef{});
} else {
at::sum_out(const_cast<Tensor&>(result), iter.output(), IntArrayRef{});
}
} else {
auto iter = TensorIterator::borrowing_binary_op(result, input, target);
native::xpu::smooth_l1_kernel(iter, beta);
}
return result;
}

Tensor XPUNativeFunctions::smooth_l1_loss(
const Tensor& input,
const Tensor& target,
int64_t reduction,
double beta) {
Tensor result = at::empty_like(input, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
result = XPUNativeFunctions::smooth_l1_loss_out(
input, target, reduction, beta, result);
return result;
}

Tensor& XPUNativeFunctions::smooth_l1_loss_backward_out(
const Tensor& grad_output,
const Tensor& input,
const Tensor& target,
int64_t reduction,
double beta,
Tensor& grad_input) {
auto norm = reduction == Reduction::Mean ? 1. / input.numel() : 1.;
auto iter = at::TensorIteratorConfig()
.add_output(grad_input)
.add_const_input(input)
.add_const_input(target)
.add_const_input(grad_output)
.promote_inputs_to_common_dtype(true)
.cast_common_dtype_to_outputs(true)
.enforce_safe_casting_to_output(true)
.build();
native::xpu::smooth_l1_backward_kernel(iter, norm, beta);
return grad_input;
}

Tensor XPUNativeFunctions::binary_cross_entropy(
const Tensor& self,
const Tensor& target,
Expand Down
2 changes: 0 additions & 2 deletions src/ATen/native/xpu/XPUFallback.template
Original file line number Diff line number Diff line change
Expand Up @@ -256,8 +256,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
"signbit.out",
"sign.out",
"sinc.out",
"smooth_l1_loss_backward.grad_input",
"smooth_l1_loss.out",
"special_airy_ai.out",
"special_bessel_j0.out",
"special_bessel_j1.out",
Expand Down
26 changes: 26 additions & 0 deletions src/ATen/native/xpu/sycl/BinaryMiscOpsKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,32 @@ void mse_kernel(TensorIteratorBase& iter) {
[&]() { gpu_kernel(iter, MSEFunctor<scalar_t>()); });
}

template <typename scalar_t>
struct SmoothL1Functor {
scalar_t operator()(scalar_t input, scalar_t target) const {
auto z = std::abs(input - target);
return z < beta_val ? scalar_t(0.5) * z * z / beta_val
: z - scalar_t(0.5) * beta_val;
}
SmoothL1Functor(scalar_t beta_val) : beta_val(beta_val) {}

private:
scalar_t beta_val;
};

void smooth_l1_kernel(TensorIteratorBase& iter, double beta) {
AT_DISPATCH_FLOATING_TYPES_AND2(
at::ScalarType::Half,
at::ScalarType::BFloat16,
iter.dtype(),
"smooth_l1_xpu",
[&iter, beta]() {
scalar_t beta_val(beta);
SmoothL1Functor<scalar_t> f(beta_val);
gpu_kernel(iter, f);
});
}

template <typename scalar_t>
struct HuberFunctor {
scalar_t operator()(scalar_t a, scalar_t b) const {
Expand Down
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/BinaryMiscOpsKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ namespace at::native::xpu {

void mse_kernel(TensorIteratorBase& iter);

void smooth_l1_kernel(TensorIteratorBase& iter, double beta);

void huber_kernel(TensorIterator& iter, double delta);

} // namespace at::native::xpu
34 changes: 34 additions & 0 deletions src/ATen/native/xpu/sycl/PointwiseOpsKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,40 @@ void mse_backward_kernel(TensorIterator& iter, const Scalar& value) {
});
}

template <typename scalar_t>
struct SmoothL1BackwardFunctor {
scalar_t operator()(scalar_t input, scalar_t target, scalar_t grad_output)
const {
const auto x = input - target;
if (x < -beta_val)
return -norm_val * grad_output;
else if (x > beta_val)
return norm_val * grad_output;
else
return norm_val * x * grad_output / beta_val;
}
SmoothL1BackwardFunctor(scalar_t norm_val, scalar_t beta_val)
: norm_val(norm_val), beta_val(beta_val) {}

private:
scalar_t norm_val;
scalar_t beta_val;
};

void smooth_l1_backward_kernel(TensorIterator& iter, Scalar norm, double beta) {
AT_DISPATCH_ALL_TYPES_AND2(
kHalf,
kBFloat16,
iter.dtype(),
"smooth_l1_backward_xpu",
[&iter, &norm, beta] {
auto norm_val = norm.to<scalar_t>();
scalar_t beta_val(beta);
SmoothL1BackwardFunctor<scalar_t> f(norm_val, beta_val);
gpu_kernel(iter, f);
});
}

template <typename scalar_t>
struct HuberBackwardFunctor {
scalar_t operator()(scalar_t input, scalar_t target, scalar_t grad_output)
Expand Down
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/PointwiseOpsKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ void addcdiv_kernel(TensorIterator& iter, Scalar value);

void mse_backward_kernel(TensorIterator& iter, const Scalar& value);

void smooth_l1_backward_kernel(TensorIterator& iter, Scalar norm, double beta);

void huber_backward_kernel(
TensorIterator& iter,
const Scalar& norm,
Expand Down
1 change: 1 addition & 0 deletions test/xpu/xpu_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,7 @@
"nn.functional.upsample_bilinear",
"nn.functional.upsample_nearest",
"nn.functional.nll_loss",
"nn.functional.smooth_l1_loss",
"nn.functional.mse_loss",
"nn.functional.binary_cross_entropy",
"nn.functional.huber_loss",
Expand Down
3 changes: 3 additions & 0 deletions yaml/xpu_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,9 @@ supported:
- bitwise_and.Tensor_out
- bitwise_or.Tensor_out
- bitwise_xor.Tensor_out
- smooth_l1_loss
- smooth_l1_loss.out
- smooth_l1_loss_backward.grad_input
- bitwise_not.out
- where.self_out
- where.self
Expand Down

0 comments on commit dc57d2b

Please sign in to comment.