Skip to content

Commit

Permalink
Enable cow_input_* test cases (#1067)
Browse files Browse the repository at this point in the history
Resolve issues in #281

---------

Co-authored-by: ZhiweiYan-96 <[email protected]>
Co-authored-by: Yutao Xu <[email protected]>
  • Loading branch information
3 people authored Nov 14, 2024
1 parent 023194f commit 804a03b
Show file tree
Hide file tree
Showing 41 changed files with 671 additions and 675 deletions.
18 changes: 9 additions & 9 deletions src/ATen/native/xpu/sycl/AdaptiveAveragePooling2dKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ struct AdaptiveAvgPool2dBwdKernelFunctor {
}

AdaptiveAvgPool2dBwdKernelFunctor(
PackedTensorAccessor64<scalar_t, 4> gyacc,
PackedTensorAccessor64<const scalar_t, 4> gyacc,
PackedTensorAccessor64<scalar_t, 4> gxacc)
: gyacc_(gyacc), gxacc_(gxacc) {
ib_ = gxacc_.size(0);
Expand Down Expand Up @@ -97,7 +97,7 @@ struct AdaptiveAvgPool2dBwdKernelFunctor {
int64_t numel_;
int global_range_;
int local_range_;
PackedTensorAccessor64<scalar_t, 4> gyacc_;
PackedTensorAccessor64<const scalar_t, 4> gyacc_;
PackedTensorAccessor64<scalar_t, 4> gxacc_;
};

Expand Down Expand Up @@ -183,7 +183,7 @@ struct AdaptiveAvgPool2dBwdSLMKernelFunctor
}

AdaptiveAvgPool2dBwdSLMKernelFunctor(
PackedTensorAccessor64<scalar_t, 4> gyacc,
PackedTensorAccessor64<const scalar_t, 4> gyacc,
PackedTensorAccessor64<scalar_t, 4> gxacc)
: gyacc_(gyacc), gxacc_(gxacc) {
ib_ = gxacc_.size(0);
Expand Down Expand Up @@ -220,7 +220,7 @@ struct AdaptiveAvgPool2dBwdSLMKernelFunctor
int64_t numel_;
int local_range_;
int global_range_;
PackedTensorAccessor64<scalar_t, 4> gyacc_;
PackedTensorAccessor64<const scalar_t, 4> gyacc_;
PackedTensorAccessor64<scalar_t, 4> gxacc_;
sycl_local_acc_t<int> _oh0_cached_;
sycl_local_acc_t<int> _oh1_cached_;
Expand Down Expand Up @@ -282,7 +282,7 @@ void adaptive_avg_pool2d_backward_kernel(
"adaptive_avg_pool2d_backward_xpu",
[&]() {
using opmath_t = at::opmath_type<scalar_t>;
auto gyacc = grad_output.packed_accessor64<scalar_t, 4>();
auto gyacc = grad_output.packed_accessor64<const scalar_t, 4>();
auto gxacc = grad_input.packed_accessor64<scalar_t, 4>();

int64_t ohw01_shared_size =
Expand Down Expand Up @@ -375,7 +375,7 @@ struct AdaptiveAvgPool2dKernelFunctor {
int ow,
int64_t numel,
int global_range,
PackedTensorAccessor64<scalar_t, 4> input,
PackedTensorAccessor64<const scalar_t, 4> input,
PackedTensorAccessor64<scalar_t, 4> output)
: ih_(ih),
iw_(iw),
Expand All @@ -397,13 +397,13 @@ struct AdaptiveAvgPool2dKernelFunctor {
int ow_;
int64_t numel_;
int global_range_;
PackedTensorAccessor64<scalar_t, 4> input_;
PackedTensorAccessor64<const scalar_t, 4> input_;
PackedTensorAccessor64<scalar_t, 4> output_;
};

template <typename scalar_t, typename opmath_t, bool is_channels_last>
void launch_adaptive_avg_pool2d_kernel(
PackedTensorAccessor64<scalar_t, 4> input,
PackedTensorAccessor64<const scalar_t, 4> input,
PackedTensorAccessor64<scalar_t, 4> output) {
int ih = input.size(2);
int iw = input.size(3);
Expand Down Expand Up @@ -495,7 +495,7 @@ void adaptive_avg_pool2d_kernel(
"adaptive_avg_pool2d_xpu",
[&]() {
using opmath_t = at::opmath_type<scalar_t>;
auto iacc = input_.packed_accessor64<scalar_t, 4>();
auto iacc = input_.packed_accessor64<const scalar_t, 4>();
auto oacc = output.packed_accessor64<scalar_t, 4>();
if (is_smf_channels_last(output)) {
launch_adaptive_avg_pool2d_kernel<scalar_t, opmath_t, true>(
Expand Down
16 changes: 8 additions & 8 deletions src/ATen/native/xpu/sycl/AveragePool2dKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,8 @@ void launch_avg_pool2d_channels_last_kernel(
const int divisor_override,
const bool count_include_pad,
const bool use_divisor) {
scalar_t* top_data = output.data_ptr<scalar_t>();
const scalar_t* bottom_data = input.data_ptr<scalar_t>();
scalar_t* top_data = output.mutable_data_ptr<scalar_t>();
const scalar_t* bottom_data = input.const_data_ptr<scalar_t>();

auto& queue = at::xpu::getCurrentSYCLQueue();
const uint32_t group_size = static_cast<int>(syclMaxWorkItemsPerEU());
Expand Down Expand Up @@ -295,8 +295,8 @@ void launch_avg_pool2d_kernel(
const int divisor_override,
const bool count_include_pad,
const bool use_divisor) {
scalar_t* top_data = output.data_ptr<scalar_t>();
const scalar_t* bottom_data = input.data_ptr<scalar_t>();
scalar_t* top_data = output.mutable_data_ptr<scalar_t>();
const scalar_t* bottom_data = input.const_data_ptr<scalar_t>();

auto& queue = at::xpu::getCurrentSYCLQueue();
const uint32_t group_size = static_cast<int>(syclMaxWorkItemsPerEU());
Expand Down Expand Up @@ -552,8 +552,8 @@ void launch_avg_pool2d_backward_channels_last_kernel(
const int divisor_override,
bool count_include_pad,
bool use_divisor) {
const scalar_t* top_data = grad_output.data_ptr<scalar_t>();
scalar_t* bottom_data = grad_input.data_ptr<scalar_t>();
const scalar_t* top_data = grad_output.const_data_ptr<scalar_t>();
scalar_t* bottom_data = grad_input.mutable_data_ptr<scalar_t>();

auto& queue = at::xpu::getCurrentSYCLQueue();
const uint32_t group_size = static_cast<int>(syclMaxWorkItemsPerEU());
Expand Down Expand Up @@ -603,8 +603,8 @@ void launch_avg_pool2d_backward_kernel(
const int divisor_override,
bool count_include_pad,
bool use_divisor) {
const scalar_t* top_data = grad_output.data_ptr<scalar_t>();
scalar_t* bottom_data = grad_input.data_ptr<scalar_t>();
const scalar_t* top_data = grad_output.const_data_ptr<scalar_t>();
scalar_t* bottom_data = grad_input.mutable_data_ptr<scalar_t>();

auto& queue = at::xpu::getCurrentSYCLQueue();
const uint32_t group_size = static_cast<int>(syclMaxWorkItemsPerEU());
Expand Down
21 changes: 8 additions & 13 deletions src/ATen/native/xpu/sycl/BucketizationKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,17 +91,15 @@ struct SearchsortedKernelFunctor {
int64_t idim_in,
int64_t idim_bd,
const int64_t* data_st,
output_t* data_out,
bool is_1d_boundaries,
input_t* data_in_data,
input_t* data_bd_data,
const input_t* data_in_data,
const input_t* data_bd_data,
output_t* data_out_data)
: right_(right),
numel_in_(numel_in),
idim_in_(idim_in),
idim_bd_(idim_bd),
data_st_(data_st),
data_out_(data_out),
is_1d_boundaries_(is_1d_boundaries),
data_in_data_(data_in_data),
data_bd_data_(data_bd_data),
Expand All @@ -113,10 +111,9 @@ struct SearchsortedKernelFunctor {
int64_t idim_in_;
int64_t idim_bd_;
const int64_t* data_st_;
output_t* data_out_;
bool is_1d_boundaries_;
input_t* data_in_data_;
input_t* data_bd_data_;
const input_t* data_in_data_;
const input_t* data_bd_data_;
output_t* data_out_data_;
};
template <typename input_t, typename output_t>
Expand All @@ -133,20 +130,18 @@ void searchsorted_template(
int64_t idim_bd = boundaries.sizes().back();

const int64_t* data_st =
sorter.defined() ? sorter.data_ptr<int64_t>() : nullptr;
output_t* data_out = result.data_ptr<output_t>();
sorter.defined() ? sorter.const_data_ptr<int64_t>() : nullptr;

bool is_1d_boundaries = boundaries.dim() == 1;
auto data_in_data = input.data_ptr<input_t>();
auto data_bd_data = boundaries.data_ptr<input_t>();
auto data_out_data = result.data_ptr<output_t>();
auto data_in_data = input.const_data_ptr<input_t>();
auto data_bd_data = boundaries.const_data_ptr<input_t>();
auto data_out_data = result.mutable_data_ptr<output_t>();
SearchsortedKernelFunctor<input_t, output_t> kfn(
right,
numel_in,
idim_in,
idim_bd,
data_st,
data_out,
is_1d_boundaries,
data_in_data,
data_bd_data,
Expand Down
2 changes: 1 addition & 1 deletion src/ATen/native/xpu/sycl/Col2ImKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,7 @@ void col2im_kernel(
output_n = output.select(0, elt);

col2im_kernel<scalar_t>(
input_n.data_ptr<scalar_t>(),
input_n.const_data_ptr<scalar_t>(),
n_output_plane,
output_height,
output_width,
Expand Down
2 changes: 1 addition & 1 deletion src/ATen/native/xpu/sycl/CumprodKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ void launch_cumprod_kernel(
"cumprod_xpu",
[&]() {
scalar_t init = 1;
scan<INCLUSIVE_TYPE, scalar_t, scalar_t>(
scan<INCLUSIVE_TYPE, const scalar_t, scalar_t>(
result, self, dim, init, std::multiplies<scalar_t>());
});
}
Expand Down
2 changes: 1 addition & 1 deletion src/ATen/native/xpu/sycl/CumsumKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ void launch_cumsum_kernel(
"cumsum_xpu",
[&]() {
scalar_t init = 0;
scan<INCLUSIVE_TYPE, scalar_t, scalar_t>(
scan<INCLUSIVE_TYPE, const scalar_t, scalar_t>(
result, self, dim, init, std::plus<scalar_t>());
});
}
Expand Down
Loading

0 comments on commit 804a03b

Please sign in to comment.