Skip to content

Commit

Permalink
Merge branch 'main' into chao/fp64emu
Browse files Browse the repository at this point in the history
  • Loading branch information
Chao1Han authored Jul 30, 2024
2 parents b59d675 + 8a821bf commit fc75a4e
Show file tree
Hide file tree
Showing 28 changed files with 1,588 additions and 140 deletions.
59 changes: 49 additions & 10 deletions .github/workflows/nightly_ondemand.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ name: Nightly-OnDemand Tests

on:
schedule:
# GMT+8 21:00 every day
- cron: '0 13 * * *'
# GMT+8 21:00 every workday
- cron: '0 13 * * 0-4'
# GMT+8 0:00 Saturday
- cron: '0 16 * * 5'
workflow_dispatch:
inputs:
pytorch:
Expand Down Expand Up @@ -78,7 +80,7 @@ jobs:
runs-on: pvc_e2e
# Don't run on forked repos
if: github.repository_owner == 'intel'
timeout-minutes: 900
timeout-minutes: 3600
env:
pytorch: ${{ github.event_name == 'schedule' && 'main' || inputs.pytorch }}
keep_torch_xpu_ops: ${{ github.event_name == 'schedule' && 'false' || inputs.keep_torch_xpu_ops }}
Expand Down Expand Up @@ -174,8 +176,10 @@ jobs:
echo "$GITHUB_ENV"
rm -rf ../pytorch/inductor_log
rm -rf /tmp/torchinductor_*
# Nihglty launch
- name: Nightly Huggingface FP32/BF16/FP16 Inference & Training Accuracy Test
if: github.event_name == 'schedule'
if: github.event_name == 'schedule' && github.event.schedule == '0 13 * * 0-4'
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: huggingface
Expand All @@ -185,7 +189,7 @@ jobs:
scenario: accuracy
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Nightly Torchbench BF16 Training Accuracy Test
if: github.event_name == 'schedule'
if: github.event_name == 'schedule' && github.event.schedule == '0 13 * * 0-4'
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: torchbench
Expand All @@ -195,7 +199,7 @@ jobs:
env_prepare: true
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Nightly Timm_models FP16 Training Accuracy Test
if: github.event_name == 'schedule'
if: github.event_name == 'schedule' && github.event.schedule == '0 13 * * 0-4'
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: timm_models
Expand All @@ -204,6 +208,38 @@ jobs:
scenario: accuracy
env_prepare: true
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
# Weekly launch
- name: Weekly Huggingface Full Test
if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5'
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: huggingface
env_prepare: true
dt: float32,bfloat16,float16,amp_bf16,amp_fp16
mode: inference,training
scenario: accuracy,performance
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Weekly Torchbench Full Test
if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5'
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: torchbench
env_prepare: true
dt: float32,bfloat16,float16,amp_bf16,amp_fp16
mode: inference,training
scenario: accuracy,performance
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Weekly Timm_models Full Test
if: github.event_name == 'schedule' && github.event.schedule == '0 16 * * 5'
uses: ./.github/actions/inductor-xpu-e2e-test
with:
suite: timm_models
env_prepare: true
dt: float32,bfloat16,float16,amp_bf16,amp_fp16
mode: inference,training
scenario: accuracy,performance
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
# On-demand launch
- name: OnDemand Test (${{ inputs.suite }} ${{ inputs.dt }} ${{ inputs.mode }} ${{ inputs.scenario }})
if: github.event_name != 'schedule'
uses: ./.github/actions/inductor-xpu-e2e-test
Expand All @@ -216,7 +252,7 @@ jobs:
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Summarize archieve files
id: summary
if: always()
if: ${{ ! cancelled() }}
run: |
rm -rf ${{ github.workspace }}/upload_files
cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
Expand All @@ -237,14 +273,14 @@ jobs:
exit 1
fi
- name: Upload Inductor XPU E2E Data
if: always()
if: ${{ ! cancelled() }}
uses: actions/upload-artifact@v4
with:
name: Inductor-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
path: ${{ github.workspace }}/upload_files

Tests-Failure-And-Report:
if: always()
if: ${{ ! cancelled() }}
runs-on: pvc_e2e
permissions:
issues: write
Expand Down Expand Up @@ -288,6 +324,9 @@ jobs:
test_type="On-demand"
test_issue_id=426
cc_comment="CC @${GITHUB_TRIGGERING_ACTOR}"
elif [ "${{ github.event.schedule }}" == "0 16 * * 5" ];then
test_type="Weekly"
test_issue_id=432
else
test_type="Nightly"
test_issue_id=432
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ jobs:
env_prepare: true
hf_token: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}
- name: Summarize archieve files
if: always()
if: ${{ ! cancelled() }}
run: |
rm -rf ${{ github.workspace }}/upload_files
cp -r ${{ github.workspace }}/../pytorch/inductor_log ${{ github.workspace }}/upload_files
Expand All @@ -137,7 +137,7 @@ jobs:
exit 1
fi
- name: Upload Inductor XPU E2E Data
if: always()
if: ${{ ! cancelled() }}
uses: actions/upload-artifact@v4
with:
name: Inductor-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
Expand Down
30 changes: 30 additions & 0 deletions src/ATen/native/xpu/Indexing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,36 @@ Tensor XPUNativeFunctions::index_select(
return index_select_out(self, dim, index, out);
}

Tensor& XPUNativeFunctions::masked_scatter_(
Tensor& self,
const Tensor& mask,
const Tensor& source) {
at::assert_no_internal_overlap(self);
TORCH_CHECK(
self.scalar_type() == source.scalar_type(),
"masked_scatter_: expected self and source to have same dtypes but got ",
self.scalar_type(),
" and ",
source.scalar_type());
TORCH_CHECK(
mask.dtype() == ScalarType::Bool,
"masked_scatter_ only supports boolean masks, "
"but got mask with dtype ",
mask.dtype());

c10::MaybeOwned<Tensor> b_mask =
expand_inplace(self, mask, "masked_scatter_");

if (self.numel() == 0) {
return self;
}

auto maskPrefixSum = at::empty(self.sizes(), mask.options().dtype(kLong));
native::xpu::masked_scatter_kernel(self, *b_mask, maskPrefixSum, source);

return self;
}

static Tensor& masked_select_out_impl(
Tensor& result,
const Tensor& self,
Expand Down
58 changes: 58 additions & 0 deletions src/ATen/native/xpu/Loss.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,64 @@ Tensor& XPUNativeFunctions::mse_loss_backward_out(
return grad_input;
}


Tensor& XPUNativeFunctions::smooth_l1_loss_out(
const Tensor& input,
const Tensor& target,
int64_t reduction,
double beta,
Tensor& result) {
if (reduction != Reduction::None) {
TORCH_INTERNAL_ASSERT(
reduction == Reduction::Mean || reduction == Reduction::Sum);
result.resize_({});
Tensor loss;
auto iter = TensorIterator::borrowing_binary_op(loss, input, target);
native::xpu::smooth_l1_kernel(iter, beta);
if (reduction == Reduction::Mean) {
at::mean_out(const_cast<Tensor&>(result), iter.output(), IntArrayRef{});
} else {
at::sum_out(const_cast<Tensor&>(result), iter.output(), IntArrayRef{});
}
} else {
auto iter = TensorIterator::borrowing_binary_op(result, input, target);
native::xpu::smooth_l1_kernel(iter, beta);
}
return result;
}

Tensor XPUNativeFunctions::smooth_l1_loss(
const Tensor& input,
const Tensor& target,
int64_t reduction,
double beta) {
Tensor result = at::empty_like(input, LEGACY_CONTIGUOUS_MEMORY_FORMAT);
result = XPUNativeFunctions::smooth_l1_loss_out(
input, target, reduction, beta, result);
return result;
}

Tensor& XPUNativeFunctions::smooth_l1_loss_backward_out(
const Tensor& grad_output,
const Tensor& input,
const Tensor& target,
int64_t reduction,
double beta,
Tensor& grad_input) {
auto norm = reduction == Reduction::Mean ? 1. / input.numel() : 1.;
auto iter = at::TensorIteratorConfig()
.add_output(grad_input)
.add_const_input(input)
.add_const_input(target)
.add_const_input(grad_output)
.promote_inputs_to_common_dtype(true)
.cast_common_dtype_to_outputs(true)
.enforce_safe_casting_to_output(true)
.build();
native::xpu::smooth_l1_backward_kernel(iter, norm, beta);
return grad_input;
}

Tensor XPUNativeFunctions::binary_cross_entropy(
const Tensor& self,
const Tensor& target,
Expand Down
15 changes: 15 additions & 0 deletions src/ATen/native/xpu/TensorFactories.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,21 @@ Tensor& XPUNativeFunctions::complex_out(
return result;
}

Tensor& XPUNativeFunctions::polar_out(
const Tensor& abs,
const Tensor& angle,
Tensor& result) {
complex_check_dtype(result, abs, angle);
auto iter = TensorIteratorConfig()
.add_output(result)
.add_const_input(abs)
.add_const_input(angle)
.check_all_same_dtype(false)
.build();
native::xpu::polar_kernel(iter);
return result;
}

Tensor& XPUNativeFunctions::randperm_out(
int64_t n,
c10::optional<Generator> generator,
Expand Down
27 changes: 27 additions & 0 deletions src/ATen/native/xpu/WeightNorm.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#include <ATen/native/xpu/sycl/WeightNormKernels.h>
#include <ATen/xpu/XPUNativeFunctions.h>
namespace at {
std::tuple<Tensor, Tensor> XPUNativeFunctions::_weight_norm_interface(
const Tensor& v,
const Tensor& g,
int64_t dim) {
return native::xpu::weight_norm_kernel(v, g, dim);
}

std::tuple<Tensor, Tensor> XPUNativeFunctions::_weight_norm_interface_backward(
const Tensor& grad_w,
const Tensor& saved_v,
const Tensor& saved_g,
const Tensor& saved_norms,
int64_t dim) {
TORCH_CHECK(saved_v.is_contiguous(), "saved_v must be contiguous");
TORCH_CHECK(saved_g.is_contiguous(), "saved_g must be contiguous");
TORCH_CHECK(saved_norms.is_contiguous(), "saved_norms must be contiguous");
TORCH_CHECK(
dim == 0 || dim == saved_v.dim() - 1,
"fused kernels can only be applied for first or last dim")

return native::xpu::weight_norm_backward_kernel(
grad_w, saved_v, saved_g, saved_norms, dim);
}
} // namespace at
4 changes: 0 additions & 4 deletions src/ATen/native/xpu/XPUFallback.template
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
"log_normal_",
"logspace.out",
"lu_unpack.out",
"masked_scatter_",
"max_pool3d_with_indices",
"max_pool3d_with_indices_backward",
"max_unpool2d",
Expand All @@ -240,7 +239,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
"ormqr",
"_pdist_backward",
"_pdist_forward",
"polar.out",
"_prelu_kernel",
"_prelu_kernel_backward",
"prod",
Expand All @@ -257,8 +255,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
"signbit.out",
"sign.out",
"sinc.out",
"smooth_l1_loss_backward.grad_input",
"smooth_l1_loss.out",
"special_airy_ai.out",
"special_bessel_j0.out",
"special_bessel_j1.out",
Expand Down
8 changes: 4 additions & 4 deletions src/ATen/native/xpu/sycl/AdaptiveMaxPooling2dKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ void launch_adaptive_max_pool2d_kernel(
using KernelClass = AdaptiveMaxPool2dKernelFunctor<scalar_t, index_t>;

int64_t output_size = batch * plane * osizeH * osizeW;
BatchKernelConfig cfg = {
1, output_size, 1, 1, true, BatchKernelConfig::Policy::pAdaptive};
BatchKernelConfig cfg = BatchKernelConfig::make_config<KernelClass>(
1, output_size, 1, 1, true, BatchKernelConfig::Policy::pAdaptive);

cfg.build<KernelClass>();

Expand Down Expand Up @@ -301,8 +301,8 @@ void launch_adaptive_max_pool2d_backward_kernel(
int64_t sizeP) {
using KernelClass = AdaptiveMaxPool2dBackwardKernelFunctor<scalar_t, index_t>;

BatchKernelConfig cfg = {
1, osize, 1, 1, true, BatchKernelConfig::Policy::pAdaptive};
BatchKernelConfig cfg = BatchKernelConfig::make_config<KernelClass>(
1, osize, 1, 1, true, BatchKernelConfig::Policy::pAdaptive);

cfg.build<KernelClass>();

Expand Down
Loading

0 comments on commit fc75a4e

Please sign in to comment.