Skip to content

Commit abcae7d

Browse files
authored
Merge branch 'main' into tongsu/dilated_max_pool
2 parents 3216512 + 5f41843 commit abcae7d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

50 files changed

+2876
-175
lines changed

cmake/Modules/FindSYCL.cmake

Lines changed: 0 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -71,26 +71,6 @@ else()
7171
)
7272
endif()
7373

74-
set(SYCL_LIBRARIES)
75-
find_library(SYCL_RUNTIME_LIBRARY sycl HINTS ${SYCL_LIBRARY_DIR})
76-
# On Windows, currently there's no sycl.lib. Only sycl7.lib with version suffix,
77-
# where the current version of the SYCL runtime is 7.
78-
# Until oneAPI adds support to sycl.lib without the version suffix,
79-
# sycl_runtime_version needs to be hardcoded and uplifted when SYCL runtime version uplifts.
80-
# TODO: remove this when sycl.lib is supported on Windows
81-
if(WIN32)
82-
set(sycl_runtime_version 7)
83-
find_library(
84-
SYCL_RUNTIME_LIBRARY
85-
NAMES "sycl${sycl_runtime_version}"
86-
HINTS ${SYCL_LIBRARY_DIR}
87-
)
88-
if(SYCL_RUNTIME_LIBRARY STREQUAL "SYCL_RUNTIME_LIBRARY-NOTFOUND")
89-
message(FATAL_ERROR "Cannot find a SYCL library on Windows")
90-
endif()
91-
endif()
92-
list(APPEND SYCL_LIBRARIES ${SYCL_RUNTIME_LIBRARY})
93-
9474
# Parse HOST_COMPILATION mode.
9575
option(SYCL_HOST_COMPILATION_CXX "Generated file extension" ON)
9676

src/ATen/native/xpu/BinaryOps.cpp

Lines changed: 126 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
#include <ATen/native/xpu/sycl/BinaryRemainderKernel.h>
1313
#include <ATen/native/xpu/sycl/CopysignKernel.h>
1414
#include <ATen/native/xpu/sycl/GcdLcmKernels.h>
15+
#include <ATen/native/xpu/sycl/LogAddExpKernels.h>
1516
#include <ATen/native/xpu/sycl/MaxMinElementwiseKernels.h>
1617

1718
namespace at {
18-
1919
Tensor XPUNativeFunctions::add(
2020
const Tensor& self,
2121
const Tensor& other,
@@ -459,6 +459,28 @@ Tensor& XPUNativeFunctions::minimum_out(
459459
return output;
460460
}
461461

462+
Tensor& XPUNativeFunctions::logit_backward_out(
463+
const Tensor& grad_output,
464+
const Tensor& input,
465+
std::optional<double> eps,
466+
Tensor& grad_input) {
467+
TensorIterator iter;
468+
iter.build_borrowing_binary_op(grad_input, grad_output, input);
469+
native::xpu::logit_backward_kernel(iter, Scalar(eps ? eps.value() : -1.0));
470+
return grad_input;
471+
}
472+
473+
Tensor XPUNativeFunctions::logit_backward(
474+
const Tensor& grad_output,
475+
const Tensor& input,
476+
std::optional<double> eps) {
477+
Tensor grad_input;
478+
TensorIterator iter;
479+
iter.build_borrowing_binary_op(grad_input, grad_output, input);
480+
native::xpu::logit_backward_kernel(iter, Scalar(eps ? eps.value() : -1.0));
481+
return iter.output();
482+
}
483+
462484
Tensor& XPUNativeFunctions::sigmoid_backward_out(
463485
const Tensor& grad_output,
464486
const Tensor& output,
@@ -479,6 +501,109 @@ Tensor XPUNativeFunctions::sigmoid_backward(
479501
return iter.output();
480502
}
481503

504+
Tensor XPUNativeFunctions::logaddexp(const Tensor& self, const Tensor& other) {
505+
Tensor out;
506+
auto iter = TensorIterator::borrowing_binary_op(out, self, other);
507+
native::xpu::logaddexp_kernel(iter);
508+
return iter.output();
509+
}
510+
511+
Tensor& XPUNativeFunctions::logaddexp_out(
512+
const Tensor& self,
513+
const Tensor& other,
514+
Tensor& out) {
515+
auto iter = TensorIterator::borrowing_binary_op(out, self, other);
516+
native::xpu::logaddexp_kernel(iter);
517+
return out;
518+
}
519+
520+
Tensor XPUNativeFunctions::logaddexp2(const Tensor& self, const Tensor& other) {
521+
Tensor out;
522+
auto iter = TensorIterator::borrowing_binary_op(out, self, other);
523+
native::xpu::logaddexp2_kernel(iter);
524+
return iter.output();
525+
}
526+
527+
Tensor& XPUNativeFunctions::logaddexp2_out(
528+
const Tensor& self,
529+
const Tensor& other,
530+
Tensor& out) {
531+
auto iter = TensorIterator::borrowing_binary_op(out, self, other);
532+
native::xpu::logaddexp2_kernel(iter);
533+
return out;
534+
}
535+
536+
Tensor& XPUNativeFunctions::floor_divide_out(
537+
const Tensor& self,
538+
const Tensor& other,
539+
Tensor& output) {
540+
auto iter = TensorIterator::binary_op(output, self, other);
541+
native::xpu::div_floor_kernel(iter);
542+
if (!output.defined()) {
543+
output = iter.output();
544+
}
545+
return output;
546+
}
547+
548+
Tensor XPUNativeFunctions::floor_divide(
549+
const Tensor& self,
550+
const Tensor& other) {
551+
Tensor output;
552+
auto iter = TensorIterator::binary_op(output, self, other);
553+
native::xpu::div_floor_kernel(iter);
554+
return iter.output();
555+
}
556+
557+
Tensor& XPUNativeFunctions::floor_divide_(Tensor& self, const Tensor& other) {
558+
return XPUNativeFunctions::floor_divide_out(self, other, self);
559+
}
560+
561+
TensorIterator meta_fmin_fmax(
562+
const char* const name,
563+
const Tensor& self,
564+
const Tensor& other,
565+
Tensor& output) {
566+
TORCH_CHECK(
567+
!self.is_complex() && !other.is_complex(),
568+
name,
569+
" not implemented for complex tensors.");
570+
TensorIterator iter;
571+
iter.build_binary_op(output, self, other);
572+
return iter;
573+
}
574+
575+
Tensor& XPUNativeFunctions::fmax_out(
576+
const Tensor& self,
577+
const Tensor& other,
578+
Tensor& output) {
579+
auto iter = meta_fmin_fmax("fmax", self, other, output);
580+
native::xpu::fmax_kernel(iter);
581+
return output;
582+
}
583+
584+
Tensor XPUNativeFunctions::fmax(const Tensor& self, const Tensor& other) {
585+
Tensor output;
586+
auto iter = meta_fmin_fmax("fmax", self, other, output);
587+
native::xpu::fmax_kernel(iter);
588+
return iter.output();
589+
}
590+
591+
Tensor& XPUNativeFunctions::fmin_out(
592+
const Tensor& self,
593+
const Tensor& other,
594+
Tensor& output) {
595+
auto iter = meta_fmin_fmax("fmin", self, other, output);
596+
native::xpu::fmin_kernel(iter);
597+
return output;
598+
}
599+
600+
Tensor XPUNativeFunctions::fmin(const Tensor& self, const Tensor& other) {
601+
Tensor output;
602+
auto iter = meta_fmin_fmax("fmin", self, other, output);
603+
native::xpu::fmin_kernel(iter);
604+
return iter.output();
605+
}
606+
482607
Tensor XPUNativeFunctions::atan2(const Tensor& self, const Tensor& other) {
483608
Tensor out;
484609
TensorIterator iter;

src/ATen/native/xpu/Distributions.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,24 @@ Tensor& XPUNativeFunctions::random_(
191191
return random_(self, 0, to, std::move(generator));
192192
}
193193

194+
template <typename RNG>
195+
struct ExponentialStub {
196+
void operator()(
197+
TensorIteratorBase& iter,
198+
double lambda,
199+
c10::optional<Generator> gen) {
200+
native::xpu::exponential_kernel(iter, lambda, gen);
201+
}
202+
};
203+
204+
Tensor& XPUNativeFunctions::exponential_(
205+
Tensor& self,
206+
double lambda,
207+
std::optional<Generator> generator) {
208+
return native::templates::exponential_impl_<ExponentialStub, Generator>(
209+
self, lambda, std::move(generator));
210+
}
211+
194212
/* The largest consecutive integer representable in float32 (2^24) */
195213
constexpr int64_t FLOAT32_MAX_CONSECUTIVE_INT = 1 << (24);
196214

src/ATen/native/xpu/Loss.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include <ATen/core/Reduction.h>
33
#include <ATen/core/Tensor.h>
44
#include <ATen/native/xpu/sycl/BinaryMiscOpsKernels.h>
5+
#include <ATen/native/xpu/sycl/LossKernels.h>
56
#include <ATen/native/xpu/sycl/PointwiseOpsKernels.h>
67
#include <ATen/xpu/XPUNativeFunctions.h>
78
#include <comm/RegisterUtils.h>
@@ -79,6 +80,60 @@ Tensor& XPUNativeFunctions::mse_loss_backward_out(
7980
return grad_input;
8081
}
8182

83+
Tensor XPUNativeFunctions::binary_cross_entropy(
84+
const Tensor& self,
85+
const Tensor& target,
86+
const std::optional<Tensor>& weight_opt,
87+
int64_t reduction) {
88+
c10::MaybeOwned<Tensor> weight_maybe_owned =
89+
at::borrow_from_optional_tensor(weight_opt);
90+
const Tensor& weight = *weight_maybe_owned;
91+
Tensor loss = at::empty_like(self);
92+
return native::xpu::binary_cross_entropy_kernel(
93+
self, target, weight, reduction, loss);
94+
}
95+
96+
Tensor& XPUNativeFunctions::binary_cross_entropy_out(
97+
const Tensor& self,
98+
const Tensor& target,
99+
const std::optional<Tensor>& weight_opt,
100+
int64_t reduction,
101+
Tensor& loss) {
102+
c10::MaybeOwned<Tensor> weight_maybe_owned =
103+
at::borrow_from_optional_tensor(weight_opt);
104+
const Tensor& weight = *weight_maybe_owned;
105+
return native::xpu::binary_cross_entropy_kernel(
106+
self, target, weight, reduction, loss);
107+
}
108+
109+
Tensor XPUNativeFunctions::binary_cross_entropy_backward(
110+
const Tensor& grad_output,
111+
const Tensor& self,
112+
const Tensor& target,
113+
const std::optional<Tensor>& weight_opt,
114+
int64_t reduction) {
115+
c10::MaybeOwned<Tensor> weight_maybe_owned =
116+
at::borrow_from_optional_tensor(weight_opt);
117+
const Tensor& weight = *weight_maybe_owned;
118+
Tensor grad_input = at::empty_like(self);
119+
return native::xpu::binary_cross_entropy_backward_kernel(
120+
grad_output, self, target, weight, reduction, grad_input);
121+
}
122+
123+
Tensor& XPUNativeFunctions::binary_cross_entropy_backward_out(
124+
const Tensor& grad_output,
125+
const Tensor& self,
126+
const Tensor& target,
127+
const std::optional<Tensor>& weight_opt,
128+
int64_t reduction,
129+
Tensor& grad_input) {
130+
c10::MaybeOwned<Tensor> weight_maybe_owned =
131+
at::borrow_from_optional_tensor(weight_opt);
132+
const Tensor& weight = *weight_maybe_owned;
133+
return native::xpu::binary_cross_entropy_backward_kernel(
134+
grad_output, self, target, weight, reduction, grad_input);
135+
}
136+
82137
Tensor XPUNativeFunctions::huber_loss(
83138
const Tensor& input,
84139
const Tensor& target,

src/ATen/native/xpu/Normalization.cpp

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
#include <ATen/ATen.h>
2+
#include <ATen/AccumulateType.h>
3+
#include <ATen/core/Tensor.h>
4+
#include <ATen/native/xpu/sycl/RenormKernel.h>
5+
#include <ATen/xpu/XPUNativeFunctions.h>
6+
#include <comm/RegisterUtils.h>
7+
8+
namespace at {
9+
10+
void renorm_meta(
11+
const Tensor& self,
12+
const Scalar& p,
13+
int64_t dim,
14+
const Scalar& maxnorm,
15+
Tensor& output) {
16+
TORCH_CHECK(!p.isComplex(), "renorm: p must be real-valued");
17+
TORCH_CHECK(p.toDouble() > 0.0, "renorm: non-positive-norm not supported");
18+
TORCH_CHECK(!maxnorm.isComplex(), "renorm: maxnorm must be real-valued");
19+
TORCH_CHECK(
20+
maxnorm.toDouble() >= 0.0,
21+
"renorm: expected maxnorm to be >= 0 but got ",
22+
maxnorm.toDouble());
23+
const auto ndim = self.dim();
24+
TORCH_CHECK(
25+
ndim > 1,
26+
"renorm: input needs at least 2 dimensions, got ",
27+
ndim,
28+
" dimensions");
29+
if (output.defined()) {
30+
xpu::resize_out(output, self.sizes(), {}, self.options());
31+
} else {
32+
output = xpu::create_out(self.sizes(), {}, self.options());
33+
}
34+
}
35+
36+
Tensor& renorm_impl(
37+
const Tensor& self,
38+
const Scalar& p,
39+
int64_t dim,
40+
const Scalar& maxnorm,
41+
Tensor& out) {
42+
auto self_sizes = self.sizes();
43+
dim = c10::maybe_wrap_dim(dim, self_sizes.size());
44+
45+
DimVector reduce_dims(self_sizes.size());
46+
std::iota(reduce_dims.begin(), reduce_dims.end(), 0);
47+
reduce_dims.erase(reduce_dims.begin() + dim);
48+
49+
auto dtype = self.scalar_type();
50+
auto acc_type = at::toAccumulateType(dtype, c10::DeviceType::XPU);
51+
Tensor norm;
52+
if (acc_type != dtype) {
53+
norm = at::linalg_vector_norm(
54+
self,
55+
p.toDouble(),
56+
reduce_dims,
57+
/*keepdim=*/true,
58+
/*dtype=*/acc_type);
59+
} else {
60+
norm = at::linalg_vector_norm(
61+
self,
62+
p.toDouble(),
63+
reduce_dims,
64+
/*keepdim=*/true);
65+
}
66+
67+
auto factor = (acc_type == c10::toRealValueType(dtype))
68+
? norm
69+
: at::empty(norm.sizes(), self.options());
70+
auto iter = TensorIteratorConfig()
71+
.add_output(factor)
72+
.add_input(norm)
73+
.set_check_mem_overlap(false)
74+
.cast_common_dtype_to_outputs(true)
75+
.build();
76+
77+
at::native::xpu::renorm_scale_factor_kernel(iter, maxnorm.toDouble());
78+
return at::mul_outf(self, factor, const_cast<Tensor&>(out));
79+
}
80+
81+
Tensor& XPUNativeFunctions::renorm_(
82+
Tensor& self,
83+
const Scalar& p,
84+
int64_t dim,
85+
const Scalar& maxnorm) {
86+
renorm_meta(self, p, dim, maxnorm, self);
87+
renorm_impl(self, p, dim, maxnorm, self);
88+
return self;
89+
}
90+
Tensor& XPUNativeFunctions::renorm_out(
91+
const Tensor& self,
92+
const Scalar& p,
93+
int64_t dim,
94+
const Scalar& maxnorm,
95+
Tensor& out) {
96+
renorm_meta(self, p, dim, maxnorm, out);
97+
renorm_impl(self, p, dim, maxnorm, out);
98+
return out;
99+
}
100+
Tensor XPUNativeFunctions::renorm(
101+
const Tensor& self,
102+
const Scalar& p,
103+
int64_t dim,
104+
const Scalar& maxnorm) {
105+
Tensor out;
106+
renorm_meta(self, p, dim, maxnorm, out);
107+
renorm_impl(self, p, dim, maxnorm, out);
108+
return out;
109+
}
110+
} // namespace at

0 commit comments

Comments
 (0)