Skip to content

Commit

Permalink
Merge branch 'main' into raymond/UT_FP64_Win_ARC
Browse files Browse the repository at this point in the history
  • Loading branch information
min-jean-cho authored Aug 27, 2024
2 parents efb9c10 + 455deaf commit ccecded
Show file tree
Hide file tree
Showing 16 changed files with 63 additions and 68 deletions.
19 changes: 9 additions & 10 deletions src/ATen/native/xpu/Copy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,9 @@ Tensor& XPUNativeFunctions::copy_(
const Tensor& src,
bool non_blocking) {
if (self._is_zerotensor()) {
TORCH_CHECK(false, "ZeroTensors are immutable. Please materialize the tensor using `.clone()`, if you want a mutable zero tensor.");
TORCH_CHECK(
false,
"ZeroTensors are immutable. Please materialize the tensor using `.clone()`, if you want a mutable zero tensor.");
}
if (src._is_zerotensor()) {
return self.zero_();
Expand All @@ -316,15 +318,12 @@ Tensor& XPUNativeFunctions::copy_(
// TODO: Support quantization

// Exit early if self and src are views of the same data
const bool is_same_data = (
self.is_alias_of(src) &&
self.storage_offset() == src.storage_offset() &&
self.strides().equals(src.strides()) &&
self.sizes().equals(src.sizes()) &&
self.scalar_type() == src.scalar_type() &&
self.is_conj() == src.is_conj() &&
self.is_neg() == src.is_neg()
);
const bool is_same_data =
(self.is_alias_of(src) && self.storage_offset() == src.storage_offset() &&
self.strides().equals(src.strides()) &&
self.sizes().equals(src.sizes()) &&
self.scalar_type() == src.scalar_type() &&
self.is_conj() == src.is_conj() && self.is_neg() == src.is_neg());
if (is_same_data) {
return self;
}
Expand Down
3 changes: 2 additions & 1 deletion src/ATen/native/xpu/Resize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ Tensor _copy_from_and_resize(const at::Tensor& self, const at::Tensor& dst) {
// For test infrastructure
Tensor _copy_from(const Tensor& self, const Tensor& dst, bool non_blocking) {
dst.resize_as_(self);
return at::XPUNativeFunctions::copy_(const_cast<Tensor&>(dst), self, non_blocking);
return at::XPUNativeFunctions::copy_(
const_cast<Tensor&>(dst), self, non_blocking);
}

// Should not register the operator. Desc of
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/Sorting.cpp
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
#include <ATen/ATen.h>
#include <ATen/core/Tensor.h>
#include <ATen/core/op_registration/adaption.h>
#include <ATen/native/ReduceOpsUtils.h>
#include <ATen/native/TensorIterator.h>
#include <ATen/native/xpu/sycl/Sorting.h>
#include <ATen/native/ReduceOpsUtils.h>
#include <ATen/xpu/XPUNativeFunctions.h>
#include <comm/TensorInfo.h>
#include <comm/RegisterUtils.h>
#include <comm/TensorInfo.h>

namespace at {

Expand Down
1 change: 0 additions & 1 deletion src/ATen/native/xpu/UnaryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1037,7 +1037,6 @@ Tensor& XPUNativeFunctions::ceil_out(const Tensor& self, Tensor& out) {
return out;
}


Tensor XPUNativeFunctions::round(const Tensor& self) {
if (c10::isIntegralType(self.scalar_type(), /*includeBool=*/false)) {
return self.clone();
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/sycl/ActivationGeluKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ struct GeluErfFunctor {
using opmath_t = at::opmath_type<scalar_t>;
constexpr opmath_t kAlpha = M_SQRT1_2;
return static_cast<opmath_t>(x) * opmath_t(0.5) *
(opmath_t(1) + ::erf(static_cast<opmath_t>(x) * kAlpha));
(opmath_t(1) + std::erf(static_cast<opmath_t>(x) * kAlpha));
}
};

Expand All @@ -66,7 +66,7 @@ struct GeluErfBackwardFunctor {
constexpr opmath_t kBeta = M_2_SQRTPI * M_SQRT1_2 * opmath_t(0.5);
constexpr opmath_t kAlpha = M_SQRT1_2;
const opmath_t cdf = opmath_t(0.5) *
(opmath_t(1) + ::erf(static_cast<opmath_t>(x) * kAlpha));
(opmath_t(1) + std::erf(static_cast<opmath_t>(x) * kAlpha));
const opmath_t pdf = c10::xpu::compat::exp(
opmath_t(-0.5) * static_cast<opmath_t>(x) *
static_cast<opmath_t>(x)) *
Expand Down
4 changes: 1 addition & 3 deletions src/ATen/native/xpu/sycl/Atomics.h
Original file line number Diff line number Diff line change
Expand Up @@ -325,9 +325,7 @@ static inline void atomicAdd(
target.fetch_add(val);
}

static inline void atomicAdd(
const sycl_local_ptr<int>& address,
int val) {
static inline void atomicAdd(const sycl_local_ptr<int>& address, int val) {
sycl_atomic_ref_rlx_wg_local_t<int> target(*address);
target.fetch_add(val);
}
Expand Down
2 changes: 1 addition & 1 deletion src/ATen/native/xpu/sycl/BinaryRemainderKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ template <typename scalar_t>
struct FmodFloatingFunctor {
scalar_t operator()(scalar_t a, scalar_t b) const
__ubsan_ignore_float_divide_by_zero__ {
return ::fmod(a, b);
return std::fmod(a, b);
}
};

Expand Down
30 changes: 15 additions & 15 deletions src/ATen/native/xpu/sycl/Math.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ static inline C10_HOST_DEVICE scalar_t calc_digamma(scalar_t in) {
return std::copysign(static_cast<scalar_t>(INFINITY), -x);
}

bool x_is_integer = x == ::trunc(x);
bool x_is_integer = x == std::trunc(x);
accscalar_t result = 0;
if (x < 0) {
if (x_is_integer) {
Expand All @@ -47,8 +47,8 @@ static inline C10_HOST_DEVICE scalar_t calc_digamma(scalar_t in) {
// a periodicity of pi, in practice the computation of pi * x is a source of
// error (when |x| > 1).
double q, r;
r = ::modf(static_cast<double>(x), &q);
result = static_cast<accscalar_t>(-PI_f64 / ::tan(PI_f64 * r));
r = std::modf(static_cast<double>(x), &q);
result = static_cast<accscalar_t>(-PI_f64 / std::tan(PI_f64 * r));
x = 1 - x;
}

Expand All @@ -72,7 +72,7 @@ static inline C10_HOST_DEVICE scalar_t calc_digamma(scalar_t in) {
}

return static_cast<scalar_t>(
::log(x) - (static_cast<accscalar_t>(0.5) / x) - y + result);
std::log(x) - (static_cast<accscalar_t>(0.5) / x) - y + result);
}

template <typename scalar_t>
Expand All @@ -84,7 +84,7 @@ static inline C10_HOST_DEVICE scalar_t calc_trigamma(scalar_t in) {
accscalar_t result = 0;
if (x < 0.5f) {
sign = -1;
accscalar_t sin_pi_x = ::sin(PI * x);
accscalar_t sin_pi_x = std::sin(PI * x);
result -= (PI * PI) / (sin_pi_x * sin_pi_x);
x = 1 - x;
}
Expand Down Expand Up @@ -190,22 +190,22 @@ static inline C10_HOST_DEVICE scalar_t calc_i0(scalar_t _x) {
"don't instantiate with low precision type");
// Upcast input for numerical accuracy purposes
// Needed for accurate results if input is bfloat16 or float16
scalar_t x = ::abs(_x);
scalar_t x = std::abs(_x);

if (x <= scalar_t{8.0}) {
auto coeff_pair = chebyshev_coefficients_i0e_A<scalar_t>();
auto A = std::get<0>(coeff_pair);
auto len = std::get<1>(coeff_pair);
scalar_t y = (x / scalar_t{2.0}) - scalar_t{2.0};
return (::exp(x) * chbevl(y, A, len));
return (std::exp(x) * chbevl(y, A, len));
}

auto coeff_pair = chebyshev_coefficients_i0e_B<scalar_t>();
auto B = std::get<0>(coeff_pair);
auto len = std::get<1>(coeff_pair);
return (
::exp(x) * chbevl(scalar_t{32.0} / x - scalar_t{2.0}, B, len) /
::sqrt(x));
std::exp(x) * chbevl(scalar_t{32.0} / x - scalar_t{2.0}, B, len) /
std::sqrt(x));
}

template <typename T>
Expand Down Expand Up @@ -319,28 +319,28 @@ C10_HOST_DEVICE inline typename std::

template <typename scalar_t>
static inline C10_HOST_DEVICE scalar_t calc_i1(scalar_t _x) {
const auto x = ::abs(_x);
const auto x = std::abs(_x);
if (x <= scalar_t{8.0}) {
auto coeff_pair = chebyshev_coefficients_i1e_A<scalar_t>();
auto A = std::get<0>(coeff_pair);
auto len = std::get<1>(coeff_pair);
scalar_t y = x / scalar_t{2.0} - scalar_t{2.0};
const scalar_t out = ::exp(x) * x * chbevl(y, A, len);
const scalar_t out = std::exp(x) * x * chbevl(y, A, len);
return (_x < scalar_t{0.0}) ? -out : out;
}

auto coeff_pair = chebyshev_coefficients_i1e_B<scalar_t>();
auto B = std::get<0>(coeff_pair);
auto len = std::get<1>(coeff_pair);
const scalar_t out =
(::exp(x) * chbevl(scalar_t{32.0} / x - scalar_t{2.0}, B, len)) /
::sqrt(x);
(std::exp(x) * chbevl(scalar_t{32.0} / x - scalar_t{2.0}, B, len)) /
std::sqrt(x);
return (_x < scalar_t{0.0}) ? -out : out;
}

template <typename scalar_t>
static inline C10_HOST_DEVICE scalar_t calc_i1e(scalar_t _x) {
const auto x = ::abs(_x);
const auto x = std::abs(_x);
if (x <= scalar_t{8.0}) {
auto coeff_pair = chebyshev_coefficients_i1e_A<scalar_t>();
auto A = std::get<0>(coeff_pair);
Expand All @@ -354,7 +354,7 @@ static inline C10_HOST_DEVICE scalar_t calc_i1e(scalar_t _x) {
auto B = std::get<0>(coeff_pair);
auto len = std::get<1>(coeff_pair);
const scalar_t out =
chbevl(scalar_t{32.0} / x - scalar_t{2.0}, B, len) / ::sqrt(x);
chbevl(scalar_t{32.0} / x - scalar_t{2.0}, B, len) / std::sqrt(x);
return (_x < scalar_t{0.0}) ? -out : out;
}

Expand Down
2 changes: 1 addition & 1 deletion src/ATen/native/xpu/sycl/Pow.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ powI_(Base_type base, Exp_type exp) {
#else
template <typename Base_type, typename Exp_type>
static inline Base_type pow_(Base_type base, Exp_type exp) {
return ::pow(base, exp);
return std::pow(base, exp);
}
#endif

Expand Down
2 changes: 1 addition & 1 deletion src/ATen/native/xpu/sycl/PowKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace impl {

template <typename Base_type, typename Exp_type>
static inline Base_type pow_(Base_type base, Exp_type exp) {
return ::pow(base, exp);
return std::pow(base, exp);
}

template <typename T>
Expand Down
5 changes: 2 additions & 3 deletions src/ATen/native/xpu/sycl/Reduce.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,9 +246,8 @@ struct ReduceConfig {
// in side of SG. It is functional WA. We got case failures on some
// platforms supporting SIMD8.
// https://github.com/intel/torch-xpu-ops/issues/698
auto max_sg_sz = syclMinSubGroupSize() == 8
? syclMinSubGroupSize()
: syclMaxSubGroupSize();
auto max_sg_sz = syclMinSubGroupSize() == 8 ? syclMinSubGroupSize()
: syclMaxSubGroupSize();
const int max_num_items = max_wg_sz / output_vec_size;
int dim0_pow2 = dim0 < max_num_items ? static_cast<int>(last_pow2(dim0))
: max_num_items;
Expand Down
34 changes: 17 additions & 17 deletions src/ATen/native/xpu/sycl/SoftMaxKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ struct DispatchSoftmaxForwardKernelFunctor
++i) {
#pragma unroll(vec_size)
for (int j = 0; j < vec_size; ++j) {
sum_value += ::exp(reg_in[i][j] - max_value);
sum_value += std::exp(reg_in[i][j] - max_value);
}
}
if (local_size_ > 1) {
Expand All @@ -285,7 +285,7 @@ struct DispatchSoftmaxForwardKernelFunctor
[](accscalar_t a, accscalar_t b) { return a + b; });
}
if constexpr (LogSoftMax)
sum_value = ::log(sum_value);
sum_value = std::log(sum_value);
else if (sum_value != 0)
sum_value = accscalar_t(1) / sum_value;

Expand All @@ -305,7 +305,7 @@ struct DispatchSoftmaxForwardKernelFunctor
reg_in[i][j] = nan_;
} else {
reg_in[i][j] = static_cast<scalar_t>(
::exp(reg_in[i][j] - max_value) * sum_value);
std::exp(reg_in[i][j] - max_value) * sum_value);
}
}
*(reinterpret_cast<vec_t*>(out_data_ + group_offset + index)) = reg_in[i];
Expand Down Expand Up @@ -520,13 +520,13 @@ struct SoftmaxForwardKernelFunctor {
for (int j = 0; j < vec_size; ++j) {
IndexType linear_idx = i * vec_size + j - start;
if (linear_idx >= 0 && linear_idx < dim_size_)
sum_value += ::exp(accscalar_t(in_val[j]) - max_value);
sum_value += std::exp(accscalar_t(in_val[j]) - max_value);
}
}
sum_value = sycl::reduce_over_group(
item.get_group(), sum_value, sycl::plus<accscalar_t>());
if (LogSoftMax)
sum_value = ::log(sum_value);
sum_value = std::log(sum_value);
else
sum_value = accscalar_t(1) / sum_value;

Expand All @@ -543,7 +543,7 @@ struct SoftmaxForwardKernelFunctor {
in_data_[group_offset + linear_idx] - max_value - sum_value);
else
out_data_[group_offset + linear_idx] = static_cast<scalar_t>(
::exp(in_data_[group_offset + linear_idx] - max_value) *
std::exp(in_data_[group_offset + linear_idx] - max_value) *
sum_value);
}
}
Expand All @@ -556,8 +556,8 @@ struct SoftmaxForwardKernelFunctor {
in_val[j] =
static_cast<scalar_t>(in_val[j] - max_value - sum_value);
else
in_val[j] =
static_cast<scalar_t>(::exp(in_val[j] - max_value) * sum_value);
in_val[j] = static_cast<scalar_t>(
std::exp(in_val[j] - max_value) * sum_value);
}
*(reinterpret_cast<vec_t*>(
out_data_ + group_offset - start + i * vec_size)) = in_val;
Expand Down Expand Up @@ -670,14 +670,14 @@ struct SpatialSoftmaxForwardKernelFunctor
value = *(reinterpret_cast<vec_t*>(in_data_ + group_offset + offset));
#pragma unroll(vec_size)
for (int j = 0; j < vec_size; ++j) {
sum_value[j] = ::exp(value[j] - max_value[j]);
sum_value[j] = std::exp(value[j] - max_value[j]);
}
for (int i = local_row_id + block_row_; i < dim_size_; i += block_row_) {
offset = i * inner_size_ + global_col * vec_size;
value = *(reinterpret_cast<vec_t*>(in_data_ + group_offset + offset));
#pragma unroll(vec_size)
for (int j = 0; j < vec_size; ++j) {
sum_value[j] += ::exp(value[j] - max_value[j]);
sum_value[j] += std::exp(value[j] - max_value[j]);
}
}
if (block_row_ > 1) {
Expand All @@ -690,15 +690,15 @@ struct SpatialSoftmaxForwardKernelFunctor
#pragma unroll(vec_size)
for (int j = 0; j < vec_size; ++j) {
if (LogSoftMax)
sum_value[j] = ::log(local_data_[0][local_col_id][j]);
sum_value[j] = std::log(local_data_[0][local_col_id][j]);
else
sum_value[j] = accscalar_t(1) / local_data_[0][local_col_id][j];
}
} else {
#pragma unroll(vec_size)
for (int j = 0; j < vec_size; ++j) {
if (LogSoftMax)
sum_value[j] = ::log(sum_value[j]);
sum_value[j] = std::log(sum_value[j]);
else
sum_value[j] = accscalar_t(1) / sum_value[j];
}
Expand All @@ -717,7 +717,7 @@ struct SpatialSoftmaxForwardKernelFunctor
static_cast<scalar_t>(in_val[j] - max_value[j] - sum_value[j]);
else
in_val[j] = static_cast<scalar_t>(
::exp(in_val[j] - max_value[j]) * sum_value[j]);
std::exp(in_val[j] - max_value[j]) * sum_value[j]);
}
*(reinterpret_cast<vec_t*>(out_data_ + group_offset + offset)) = in_val;
}
Expand Down Expand Up @@ -890,7 +890,7 @@ struct DispatchSoftmaxBackwardKernelFunctor
for (int j = 0; j < vec_size; ++j) {
if (LogSoftMax) {
reg_out[i][j] = static_cast<scalar_t>(
reg_gradout[i][j] - ::exp(reg_out[i][j]) * sum_value);
reg_gradout[i][j] - std::exp(reg_out[i][j]) * sum_value);
} else {
reg_out[i][j] = static_cast<scalar_t>(
reg_out[i][j] * (reg_gradout[i][j] - sum_value));
Expand Down Expand Up @@ -1117,7 +1117,7 @@ struct SoftmaxBackwardKernelFunctor {
auto offset = group_offset + linear_idx;
if (LogSoftMax) {
gradInput_[offset] =
gradOutput_[offset] - ::exp(output_[offset]) * sum_value;
gradOutput_[offset] - std::exp(output_[offset]) * sum_value;
} else {
gradInput_[offset] =
output_[offset] * (gradOutput_[offset] - sum_value);
Expand All @@ -1130,7 +1130,7 @@ struct SoftmaxBackwardKernelFunctor {
#pragma unroll(vec_size)
for (int j = 0; j < vec_size; ++j) {
if (LogSoftMax) {
out_val[j] = grad_val[j] - ::exp(out_val[j]) * sum_value;
out_val[j] = grad_val[j] - std::exp(out_val[j]) * sum_value;
} else {
out_val[j] = out_val[j] * (grad_val[j] - sum_value);
}
Expand Down Expand Up @@ -1259,7 +1259,7 @@ struct SpatialSoftmaxBackwardKernelFunctor
for (int j = 0; j < vec_size; ++j) {
if (LogSoftMax) {
out_val[j] = static_cast<scalar_t>(
gradout_val[j] - ::exp(out_val[j]) * sum_value[j]);
gradout_val[j] - std::exp(out_val[j]) * sum_value[j]);
} else {
out_val[j] = static_cast<scalar_t>(
out_val[j] * (gradout_val[j] - sum_value[j]));
Expand Down
Loading

0 comments on commit ccecded

Please sign in to comment.