Skip to content

Commit

Permalink
further change
Browse files Browse the repository at this point in the history
  • Loading branch information
Stonepia committed Jul 26, 2024
1 parent 90edd0f commit 95dccc4
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 13 deletions.
2 changes: 1 addition & 1 deletion src/ATen/native/xpu/NMS.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ Tensor nms(const Tensor& dets, const Tensor& scores, double iou_threshold_) {
auto mask = nms_kernel(dets_sorted, iou_threshold);

at::Tensor mask_cpu = mask.to(at::kCPU);
unsigned long long* mask_host = (unsigned long long*)mask_cpu.data_ptr();
unsigned long long* mask_host = (unsigned long long*)mask_cpu.mutable_data_ptr();

std::vector<unsigned long long> remv(col_blocks);
memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/sycl/MultiTensorApply.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ void multi_tensor_apply(
tensor_lists[0][0].options().dtype(at::kByte));
auto metaAddressInput =
static_cast<TLMetaForAddressScalar<scalar_vals_t, depth>*>(
addressStorage.data_ptr());
addressStorage.mutable_data_ptr());
TLMetaForAddressScalar<scalar_vals_t, depth>* tlAddress = nullptr;

auto tlAddress_dptr = at::xpu::HostAlloc(
Expand Down Expand Up @@ -356,7 +356,7 @@ void multi_tensor_apply_for_fused_optimizer(
auto wgMetaStorage = at::empty(
{(int)(sizeof(TLMetaForWG) * totalWG)},
tensor_lists[0][0].options().dtype(at::kByte));
auto metaWGInput = static_cast<TLMetaForWG*>(wgMetaStorage.data_ptr());
auto metaWGInput = static_cast<TLMetaForWG*>(wgMetaStorage.mutable_data_ptr());
TLMetaForWG* tlWGMeta = nullptr;

auto tlWGMeta_dptr = at::xpu::HostAlloc(sizeof(TLMetaForWG) * totalWG);
Expand Down
2 changes: 1 addition & 1 deletion src/ATen/native/xpu/sycl/NMSKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ Tensor nms_kernel(const Tensor& dets_sorted, float iou_threshold) {
sycl::range<2> local_range{1, (size_t)nms_items_per_group};
using acc_t = acc_type_device<scalar_t, kXPU>;
auto dets_sorted_ptr = dets_sorted.const_data_ptr<scalar_t>();
auto mask_ptr = (unsigned long long*)mask.data_ptr();
auto mask_ptr = (unsigned long long*)mask.mutable_data_ptr();
auto caller = NMSKernelFunctor<scalar_t, acc_t>(
dets_num, iou_threshold, dets_sorted_ptr, mask_ptr);
sycl_kernel_submit(
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/sycl/Shape.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ void parallel_cat(
int nDims) {
// First, let's set up our kernel parameters. We start with a raw pointer to
// the storage for the output Tensor.
scalar_out_t* data = static_cast<scalar_out_t*>(out.data_ptr());
scalar_out_t* data = static_cast<scalar_out_t*>(out.mutable_data_ptr());

// Kernel Parameter
int64_t tensorMetadataSize =
Expand All @@ -195,7 +195,7 @@ void parallel_cat(
auto d_inputs_storage =
at::empty({tensorMetadataSize}, out.options().dtype(at::kByte));
auto d_inputs = static_cast<CatArrInputTensor<scalar_in_t, unsigned int>*>(
d_inputs_storage.data_ptr());
d_inputs_storage.mutable_data_ptr());

OutputTensorSizeStride<unsigned int, CAT_ARRAY_MAX_INPUT_DIMS> param;

Expand Down
10 changes: 5 additions & 5 deletions src/ATen/native/xpu/sycl/SoftMaxKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1367,9 +1367,9 @@ void spatial_softmax_forward(Tensor& output, Tensor& input, int dim) {
using vec_t = at::native::memory::aligned_vector<scalar_t, max_vec_size>;
constexpr int align_bytes = alignof(vec_t);
int input_start =
((uint64_t)input.data_ptr()) % align_bytes / sizeof(scalar_t);
((uint64_t)input.const_data_ptr()) % align_bytes / sizeof(scalar_t);
int output_start =
((uint64_t)output.data_ptr()) % align_bytes / sizeof(scalar_t);
((uint64_t)output.const_data_ptr()) % align_bytes / sizeof(scalar_t);

// decide indexing range: uint32_t (4GB) or uint64_t (>4GB)
bool can_use_32bit_index =
Expand Down Expand Up @@ -1558,11 +1558,11 @@ void spatial_softmax_backward(
using vec_t = at::native::memory::aligned_vector<scalar_t, max_vec_size>;
constexpr int align_bytes = alignof(vec_t);
int gradin_start =
((uint64_t)gradInput.data_ptr()) % align_bytes / sizeof(scalar_t);
((uint64_t)gradInput.const_data_ptr()) % align_bytes / sizeof(scalar_t);
int output_start =
((uint64_t)output.data_ptr()) % align_bytes / sizeof(scalar_t);
((uint64_t)output.const_data_ptr()) % align_bytes / sizeof(scalar_t);
int gradoutput_start =
((uint64_t)gradOutput.data_ptr()) % align_bytes / sizeof(scalar_t);
((uint64_t)gradOutput.const_data_ptr()) % align_bytes / sizeof(scalar_t);

// decide indexing range: uint32_t (4GB) or uint64_t (>4GB)
bool can_use_32bit_index = canUse32BitIndexMath(gradInput) &&
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/sycl/TriangularOpsKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,8 @@ void apply_triu_tril(Tensor& result, const Tensor& self, const int64_t k) {
IndexType result_stride_0 = (IndexType)result.stride(-2);
IndexType result_stride_1 = (IndexType)result.stride(-1);

scalar_t* result_ptr = (scalar_t*)(result.data_ptr());
scalar_t* self_ptr = (scalar_t*)(self.data_ptr());
scalar_t* result_ptr = result.data_ptr<scalar_t>();
scalar_t* self_ptr = self.data_ptr<scalar_t>();

ApplyTriuTrilKernelFunctor<scalar_t, IndexType, upper> kfn(
k,
Expand Down

0 comments on commit 95dccc4

Please sign in to comment.