Skip to content

Commit

Permalink
Merge branch 'main' into mengfeil/weekly
Browse files Browse the repository at this point in the history
  • Loading branch information
mengfei25 authored Jul 29, 2024
2 parents de5e34f + 0608225 commit 2f31d86
Show file tree
Hide file tree
Showing 21 changed files with 1,323 additions and 134 deletions.
54 changes: 52 additions & 2 deletions src/ATen/native/xpu/Indexing.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#include <ATen/ATen.h>
#include <ATen/ExpandUtils.h>
#include <ATen/MemoryOverlap.h>
#include <ATen/NamedTensorUtils.h>
#include <ATen/WrapDimUtils.h>
#include <ATen/core/op_registration/adaption.h>
#include <ATen/xpu/XPUNativeFunctions.h>

#include <ATen/native/xpu/sycl/IndexingKernels.h>
#include <ATen/xpu/XPUNativeFunctions.h>
#include <comm/TensorInfo.h>

namespace at {
Expand Down Expand Up @@ -44,4 +45,53 @@ Tensor XPUNativeFunctions::index_select(
return index_select_out(self, dim, index, out);
}

static Tensor& masked_select_out_impl(
Tensor& result,
const Tensor& self,
const Tensor& mask) {
NoNamesGuard guard;

TORCH_CHECK(
mask.scalar_type() == ScalarType::Bool,
"masked_select: expected BoolTensor for mask");
TORCH_CHECK(
self.scalar_type() == result.scalar_type(),
"masked_select(): self and result must have the same scalar type");

auto mask_temp = (mask.dim() == 0)
? c10::MaybeOwned<Tensor>::owned(mask.unsqueeze(0))
: c10::MaybeOwned<Tensor>::borrowed(mask);
auto self_temp = (self.dim() == 0)
? c10::MaybeOwned<Tensor>::owned(self.unsqueeze(0))
: c10::MaybeOwned<Tensor>::borrowed(self);

// Cannot reassign to mask_temp and self_temp here! if they are
// owning and expand_outplace returns a borrow, the returned borrow
// would dangle.
auto mask_self_expanded = expand_outplace(*mask_temp, *self_temp);
XPUNativeFunctions::index_out(
*std::get<1>(mask_self_expanded),
c10::List<std::optional<at::Tensor>>(
{*std::move(std::get<0>(mask_self_expanded))}),
result);

return result;
}

Tensor XPUNativeFunctions::masked_select(
const Tensor& self,
const Tensor& mask) {
namedinference::compute_broadcast_outnames(self, mask);
Tensor result = at::empty({0}, self.options());
return masked_select_out_impl(result, self, mask);
}

Tensor& XPUNativeFunctions::masked_select_out(
const Tensor& self,
const Tensor& mask,
Tensor& result) {
namedinference::compute_broadcast_outnames(self, mask);
return masked_select_out_impl(result, self, mask);
}

} // namespace at
5 changes: 3 additions & 2 deletions src/ATen/native/xpu/NMS.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,15 @@ Tensor nms(const Tensor& dets, const Tensor& scores, double iou_threshold_) {
auto mask = nms_kernel(dets_sorted, iou_threshold);

at::Tensor mask_cpu = mask.to(at::kCPU);
unsigned long long* mask_host = (unsigned long long*)mask_cpu.data_ptr();
unsigned long long* mask_host =
(unsigned long long*)mask_cpu.mutable_data_ptr();

std::vector<unsigned long long> remv(col_blocks);
memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);

at::Tensor keep =
at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU));
int64_t* keep_out = (int64_t*)keep.data_ptr();
int64_t* keep_out = keep.mutable_data_ptr<int64_t>();

int num_to_keep = 0;
for (int i = 0; i < dets_num; i++) {
Expand Down
34 changes: 34 additions & 0 deletions src/ATen/native/xpu/ReduceOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,40 @@ Tensor XPUNativeFunctions::amin(
return out;
}

Tensor& XPUNativeFunctions::nansum_out(
const Tensor& self,
at::OptionalIntArrayRef dim,
bool keepdim,
optional<ScalarType> opt_dtype,
Tensor& result) {
// For integral types, use existing sum as
// integral types don't have `Nan`.
if (c10::isIntegralType(self.scalar_type(), true)) {
return at::sum_out(result, self, dim, keepdim, opt_dtype);
}

auto out_dtype = infer_dtype_from_optional(self, opt_dtype, result);
result = resize_reduction(result, self, dim, keepdim, out_dtype);
auto iter = meta::make_reduction_from_out_ty(
self, result, dim, keepdim, result.scalar_type());

if (iter.numel() == 0) {
result = result.zero_();
} else {
native::xpu::nansum_kernel(iter);
}
return result;
}

Tensor XPUNativeFunctions::nansum(
const Tensor& self,
at::OptionalIntArrayRef dim,
bool keepdim,
std::optional<ScalarType> opt_dtype) {
Tensor result;
return XPUNativeFunctions::nansum_out(self, dim, keepdim, opt_dtype, result);
}

static ScalarType get_result_or_self_value_dtype(
const Tensor& self,
const Tensor& result,
Expand Down
Loading

0 comments on commit 2f31d86

Please sign in to comment.