Skip to content

Commit

Permalink
Merge branch 'main' into ruijie/enhance_summary
Browse files Browse the repository at this point in the history
  • Loading branch information
RUIJIEZHONG66166 authored Jul 30, 2024
2 parents 88f7a0b + 8a821bf commit 69cbd98
Show file tree
Hide file tree
Showing 44 changed files with 2,918 additions and 277 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ tacotron2,fail_to_run,fail_to_run,fail_to_run,fail_to_run,fail_to_run
timm_efficientdet,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load,model_fail_to_load
timm_efficientnet,pass,pass,pass,pass,pass
timm_nfnet,pass,pass,pass,pass,pass
timm_regnet,pass,pass,pass,pass,pass
timm_regnet,pass,fail_accuracy,pass,pass,pass
timm_resnest,pass,pass,pass,pass,pass
timm_vision_transformer,pass,pass,pass,pass,pass
timm_vision_transformer_large,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip,pass_due_to_skip
Expand Down
59 changes: 49 additions & 10 deletions .github/workflows/nightly_ondemand.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ name: Nightly-OnDemand Tests

on:
schedule:
# GMT+8 21:00 every day
- cron: '0 13 * * *'
# GMT+8 21:00 every workday
- cron: '0 13 * * 0-4'
# GMT+8 0:00 Saturday
- cron: '0 16 * * 5'
workflow_dispatch:
inputs:
pytorch:
Expand Down Expand Up @@ -78,7 +80,7 @@ jobs:
runs-on: pvc_e2e
# Don't run on forked repos
if: github.repository_owner == 'intel'
timeout-minutes: 900
timeout-minutes: 3600
env:
pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }}
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
Expand Down Expand Up @@ -174,8 +176,10 @@ jobs:
echo "$GITHUB_ENV"
rm -rf ../pytorch/inductor_log
rm -rf /tmp/torchinductor_*
# Nihglty launch
- name: Nightly Huggingface FP32/BF16/FP16 Inference & Training Accuracy Test
if: github.event_name == 'schedule'
if: github.event_name == 'schedule' && github.event.schedule == '0 13 * * 0-4'
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: huggingface
Expand All @@ -185,7 +189,7 @@ jobs:
scenario: accuracy
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Nightly Torchbench BF16 Training Accuracy Test
if: github.event_name == 'schedule'
if: github.event_name == 'schedule' && github.event.schedule == '0 13 * * 0-4'
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: torchbench
Expand All @@ -195,7 +199,7 @@ jobs:
env_prepare: true
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Nightly Timm_models FP16 Training Accuracy Test
if: github.event_name == 'schedule'
if: github.event_name == 'schedule' && github.event.schedule == '0 13 * * 0-4'
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: timm_models
Expand All @@ -204,6 +208,38 @@ jobs:
scenario: accuracy
env_prepare: true
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
# Weekly launch
- name: Weekly Huggingface Full Test
if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5'
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: huggingface
env_prepare: true
dt: float32,bfloat16,float16,amp_bf16,amp_fp16
mode: inference,training
scenario: accuracy,performance
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Weekly Torchbench Full Test
if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5'
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: torchbench
env_prepare: true
dt: float32,bfloat16,float16,amp_bf16,amp_fp16
mode: inference,training
scenario: accuracy,performance
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Weekly Timm_models Full Test
if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5'
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: timm_models
env_prepare: true
dt: float32,bfloat16,float16,amp_bf16,amp_fp16
mode: inference,training
scenario: accuracy,performance
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
# On-demand launch
- name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
if: github.event_name != 'schedule'
uses: ./.github/actions/inductor-xpu-e2e-test
Expand All @@ -216,7 +252,7 @@ jobs:
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Summarize archieve files
id: summary
if: always()
if: ${{ ! cancelled() }}
run: |
rm -rf ${{ github.workspace }}/upload_files
cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
Expand All @@ -237,14 +273,14 @@ jobs:
exit 1
fi
- name: Upload Inductor XPU E2E Data
if: always()
if: ${{ ! cancelled() }}
uses: actions/upload-artifact@v4
with:
name: Inductor-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
path: ${{ github.workspace }}/upload_files

Tests-Failure-And-Report:
if: always()
if: ${{ ! cancelled() }}
runs-on: pvc_e2e
permissions:
issues: write
Expand Down Expand Up @@ -288,6 +324,9 @@ jobs:
test_type="On-demand"
test_issue_id=426
cc_comment="CC @${GITHUB_TRIGGERING_ACTOR}"
elif [ "${{ github.event.schedule }}" == "0 16 * * 5" ];then
test_type="Weekly"
test_issue_id=432
else
test_type="Nightly"
test_issue_id=432
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ jobs:
env_prepare: true
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Summarize archieve files
if: always()
if: ${{ ! cancelled() }}
run: |
rm -rf ${{ github.workspace }}/upload_files
cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
Expand All @@ -137,7 +137,7 @@ jobs:
exit 1
fi
- name: Upload Inductor XPU E2E Data
if: always()
if: ${{ ! cancelled() }}
uses: actions/upload-artifact@v4
with:
name: Inductor-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
Expand Down
84 changes: 82 additions & 2 deletions src/ATen/native/xpu/Indexing.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#include <ATen/ATen.h>
#include <ATen/ExpandUtils.h>
#include <ATen/MemoryOverlap.h>
#include <ATen/NamedTensorUtils.h>
#include <ATen/WrapDimUtils.h>
#include <ATen/core/op_registration/adaption.h>
#include <ATen/xpu/XPUNativeFunctions.h>

#include <ATen/native/xpu/sycl/IndexingKernels.h>
#include <ATen/xpu/XPUNativeFunctions.h>
#include <comm/TensorInfo.h>

namespace at {
Expand Down Expand Up @@ -44,4 +45,83 @@ Tensor XPUNativeFunctions::index_select(
return index_select_out(self, dim, index, out);
}

Tensor& XPUNativeFunctions::masked_scatter_(
Tensor& self,
const Tensor& mask,
const Tensor& source) {
at::assert_no_internal_overlap(self);
TORCH_CHECK(
self.scalar_type() == source.scalar_type(),
"masked_scatter_: expected self and source to have same dtypes but got ",
self.scalar_type(),
" and ",
source.scalar_type());
TORCH_CHECK(
mask.dtype() == ScalarType::Bool,
"masked_scatter_ only supports boolean masks, "
"but got mask with dtype ",
mask.dtype());

c10::MaybeOwned<Tensor> b_mask =
expand_inplace(self, mask, "masked_scatter_");

if (self.numel() == 0) {
return self;
}

auto maskPrefixSum = at::empty(self.sizes(), mask.options().dtype(kLong));
native::xpu::masked_scatter_kernel(self, *b_mask, maskPrefixSum, source);

return self;
}

static Tensor& masked_select_out_impl(
Tensor& result,
const Tensor& self,
const Tensor& mask) {
NoNamesGuard guard;

TORCH_CHECK(
mask.scalar_type() == ScalarType::Bool,
"masked_select: expected BoolTensor for mask");
TORCH_CHECK(
self.scalar_type() == result.scalar_type(),
"masked_select(): self and result must have the same scalar type");

auto mask_temp = (mask.dim() == 0)
? c10::MaybeOwned<Tensor>::owned(mask.unsqueeze(0))
: c10::MaybeOwned<Tensor>::borrowed(mask);
auto self_temp = (self.dim() == 0)
? c10::MaybeOwned<Tensor>::owned(self.unsqueeze(0))
: c10::MaybeOwned<Tensor>::borrowed(self);

// Cannot reassign to mask_temp and self_temp here! if they are
// owning and expand_outplace returns a borrow, the returned borrow
// would dangle.
auto mask_self_expanded = expand_outplace(*mask_temp, *self_temp);
XPUNativeFunctions::index_out(
*std::get<1>(mask_self_expanded),
c10::List<std::optional<at::Tensor>>(
{*std::move(std::get<0>(mask_self_expanded))}),
result);

return result;
}

Tensor XPUNativeFunctions::masked_select(
const Tensor& self,
const Tensor& mask) {
namedinference::compute_broadcast_outnames(self, mask);
Tensor result = at::empty({0}, self.options());
return masked_select_out_impl(result, self, mask);
}

Tensor& XPUNativeFunctions::masked_select_out(
const Tensor& self,
const Tensor& mask,
Tensor& result) {
namedinference::compute_broadcast_outnames(self, mask);
return masked_select_out_impl(result, self, mask);
}

} // namespace at
58 changes: 58 additions & 0 deletions src/ATen/native/xpu/Loss.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,64 @@ Tensor& XPUNativeFunctions::mse_loss_backward_out(
return grad_input;
}


Tensor& XPUNativeFunctions::smooth_l1_loss_out(
const Tensor& input,
const Tensor& target,
int64_t reduction,
double beta,
Tensor& result) {
if (reduction != Reduction::None) {
TORCH_INTERNAL_ASSERT(
reduction == Reduction::Mean || reduction == Reduction::Sum);
result.resize_({});
Tensor loss;
auto iter = TensorIterator::borrowing_binary_op(loss, input, target);
native::xpu::smooth_l1_kernel(iter, beta);
if (reduction == Reduction::Mean) {
at::mean_out(const_cast<Tensor&>(result), iter.output(), IntArrayRef{});
} else {
at::sum_out(const_cast<Tensor&>(result), iter.output(), IntArrayRef{});
}
} else {
auto iter = TensorIterator::borrowing_binary_op(result, input, target);
native::xpu::smooth_l1_kernel(iter, beta);
}
return result;
}

Tensor XPUNativeFunctions::smooth_l1_loss(
const Tensor& input,
const Tensor& target,
int64_t reduction,
double beta) {
Tensor result = at::empty_like(input, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
result = XPUNativeFunctions::smooth_l1_loss_out(
input, target, reduction, beta, result);
return result;
}

Tensor& XPUNativeFunctions::smooth_l1_loss_backward_out(
const Tensor& grad_output,
const Tensor& input,
const Tensor& target,
int64_t reduction,
double beta,
Tensor& grad_input) {
auto norm = reduction == Reduction::Mean ? 1. / input.numel() : 1.;
auto iter = at::TensorIteratorConfig()
.add_output(grad_input)
.add_const_input(input)
.add_const_input(target)
.add_const_input(grad_output)
.promote_inputs_to_common_dtype(true)
.cast_common_dtype_to_outputs(true)
.enforce_safe_casting_to_output(true)
.build();
native::xpu::smooth_l1_backward_kernel(iter, norm, beta);
return grad_input;
}

Tensor XPUNativeFunctions::binary_cross_entropy(
const Tensor& self,
const Tensor& target,
Expand Down
5 changes: 3 additions & 2 deletions src/ATen/native/xpu/NMS.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,15 @@ Tensor nms(const Tensor& dets, const Tensor& scores, double iou_threshold_) {
auto mask = nms_kernel(dets_sorted, iou_threshold);

at::Tensor mask_cpu = mask.to(at::kCPU);
unsigned long long* mask_host = (unsigned long long*)mask_cpu.data_ptr();
unsigned long long* mask_host =
(unsigned long long*)mask_cpu.mutable_data_ptr();

std::vector<unsigned long long> remv(col_blocks);
memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);

at::Tensor keep =
at::empty({dets_num}, dets.options().dtype(at::kLong).device(at::kCPU));
int64_t* keep_out = (int64_t*)keep.data_ptr();
int64_t* keep_out = keep.mutable_data_ptr<int64_t>();

int num_to_keep = 0;
for (int i = 0; i < dets_num; i++) {
Expand Down
34 changes: 34 additions & 0 deletions src/ATen/native/xpu/ReduceOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -869,6 +869,40 @@ Tensor XPUNativeFunctions::amin(
return out;
}

Tensor& XPUNativeFunctions::nansum_out(
const Tensor& self,
at::OptionalIntArrayRef dim,
bool keepdim,
optional<ScalarType> opt_dtype,
Tensor& result) {
// For integral types, use existing sum as
// integral types don't have `Nan`.
if (c10::isIntegralType(self.scalar_type(), true)) {
return at::sum_out(result, self, dim, keepdim, opt_dtype);
}

auto out_dtype = infer_dtype_from_optional(self, opt_dtype, result);
result = resize_reduction(result, self, dim, keepdim, out_dtype);
auto iter = meta::make_reduction_from_out_ty(
self, result, dim, keepdim, result.scalar_type());

if (iter.numel() == 0) {
result = result.zero_();
} else {
native::xpu::nansum_kernel(iter);
}
return result;
}

Tensor XPUNativeFunctions::nansum(
const Tensor& self,
at::OptionalIntArrayRef dim,
bool keepdim,
std::optional<ScalarType> opt_dtype) {
Tensor result;
return XPUNativeFunctions::nansum_out(self, dim, keepdim, opt_dtype, result);
}

static ScalarType get_result_or_self_value_dtype(
const Tensor& self,
const Tensor& result,
Expand Down
Loading

0 comments on commit 69cbd98

Please sign in to comment.