Skip to content

Commit

Permalink
Merge branch 'main' into hjhee/asinh
Browse files Browse the repository at this point in the history
  • Loading branch information
fengyuan14 authored Jul 8, 2024
2 parents 1c64c60 + 1951fce commit 22faf93
Show file tree
Hide file tree
Showing 10 changed files with 236 additions and 83 deletions.
123 changes: 123 additions & 0 deletions .github/workflows/_linux_ut.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
name: inductor-xpu-ut-test

on:
workflow_call:
inputs:
torch_xpu_ops_update:
required: false
type: string
default: 'true'
description: True means update xpu_ops when building pytorch, otherwise means not
ut_suite:
required: true
type: string
default: 'op_example,op_extended,op_ut,torch_xpu'
description: op_example,op_extended,op_ut,torch_xpu. Delimiter is comma
pytorch_branch:
required: false
type: string
default: 'main'
description: Set pytorch branch
runner:
required: true
type: string
default: 'linux.idc.xpu'
description: Set runner


jobs:
Inductor-XPU-UT-Tests:
runs-on: ${{ inputs.runner }}
timeout-minutes: 900
steps:
- name: Checkout torch-xpu-ops
uses: actions/checkout@v4
- name: Prepare Stock Pytorch
run: |
pwd
cd ../ && rm -rf pytorch
git clone -b ${{ inputs.pytorch_branch }} https://github.com/pytorch/pytorch
cd pytorch && git log -n 1 && git submodule sync && git submodule update --init --recursive
if [ -z ${{ inputs.torch_xpu_ops_update }} ]; then
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
else
if [[ ${{ inputs.torch_xpu_ops_update }} == 'true' ]]; then
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
else
echo "Not update torch-xpu-ops"
fi
fi
# Workaround for torch-xpu-ops ci test
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
- name: Build Pytorch XPU
run: |
which conda && conda clean -ay
conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \
rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK}
conda create -n xpu_op_${ZE_AFFINITY_MASK} python=3.10 cmake ninja -y
source activate xpu_op_${ZE_AFFINITY_MASK}
conda install -c intel mkl-static mkl-include -y
cd ../pytorch
pip install -r requirements.txt
export USE_XPU=1
source /opt/intel/oneapi/compiler/latest/env/vars.sh
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
python setup.py bdist_wheel
pip install --force-reinstall dist/*.whl
git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd ..
pip install -r .ci/docker/requirements-ci.txt
- name: Run XPU OP Examples
if: contains(inputs.ut_suite, 'op_example')
run: |
cd ${{ github.workspace }}
mkdir -p ut_log
xpu-smi discovery
source /opt/intel/oneapi/compiler/latest/env/vars.sh
source activate xpu_op_${ZE_AFFINITY_MASK}
cd ${{ github.workspace }}
cd examples
pip install pytest
timeout 8000 pytest -v
- name: Run XPU OP Extended UT
if: contains(inputs.ut_suite, 'op_extended')
run: |
source /opt/intel/oneapi/compiler/latest/env/vars.sh
source activate xpu_op_${ZE_AFFINITY_MASK}
export PYTORCH_TEST_WITH_SLOW=1
cd ../pytorch/third_party/torch-xpu-ops/test/xpu/extended/
timeout 10000 python run_test_with_skip.py
- name: Run XPU OP UT
if: contains(inputs.ut_suite, 'op_ut')
run: |
source /opt/intel/oneapi/compiler/latest/env/vars.sh
source activate xpu_op_${ZE_AFFINITY_MASK}
export PYTORCH_ENABLE_XPU_FALLBACK=1
export PYTORCH_TEST_WITH_SLOW=1
cd ../pytorch/third_party/torch-xpu-ops/test/xpu
timeout 10000 python run_test_with_skip.py
# Cases run with a on-demand white list, since some suites are too
# slow to go through all operators on CPU. So add cases on-demand
# when XPU implementatoin is done.
# test_foreach, test_decomp
timeout 10000 python run_test_with_only.py
- name: Run Torch XPU UT
if: contains(inputs.ut_suite, 'torch_xpu')
run: |
source /opt/intel/oneapi/compiler/latest/env/vars.sh
source activate xpu_op_${ZE_AFFINITY_MASK}
cd ../pytorch
TEST_REPORTS_DIR=$(pwd)/test/test-reports
rm -rf "$TEST_REPORTS_DIR" && mkdir -p "$TEST_REPORTS_DIR"
# Run Pytorch XPU binary UT
for xpu_case in build/bin/*{xpu,sycl}*; do
if [[ "$xpu_case" != *"*"* && "$xpu_case" != *.so && "$xpu_case" != *.a ]]; then
case_name=$(basename "$xpu_case")
echo "Testing ${case_name} ..."
"$xpu_case" --gtest_output=xml:"$TEST_REPORTS_DIR"/"$case_name".xml
fi
done
# Run Pytorch XPU python UT
export PYTORCH_ENABLE_XPU_FALLBACK=1
sed -i 's/selected_tests = exclude_tests(XPU_BLOCKLIST.*/selected_tests = XPU_TEST/g' ./test/run_test.py
python test/run_test.py --xpu
24 changes: 24 additions & 0 deletions .github/workflows/inductor_xpu_e2e_nightly.yml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,21 @@ on:
type: string
default: ''
description: If set, will only launch this one
torch_xpu_ops_update:
required: false
type: string
default: 'true'
description: True means update xpu_ops when building pytorch, otherwise means not
ut_suite:
required: true
type: string
default: 'op_example,op_extended,op_ut,torch_xpu'
description: op_example,op_extended,op_ut,torch_xpu. Delimiter is comma
pytorch_branch:
required: false
type: string
default: 'main'
description: Set pytorch branch


permissions: read-all
Expand Down Expand Up @@ -244,6 +259,15 @@ jobs:
name: Inductor-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
path: ${{ github.workspace }}/upload_files

Inductor-XPU-UT-Nightly-Tests:
if: ${{ inputs.ut_suite }}
name: Nightly Inductor XPU UT Test
uses: ./.github/workflows/_linux_ut.yml
with:
ut_suite: ${{ inputs.ut_suite }}
pytorch_branch: ${{ inputs.pytorch_branch }}
runner: linux.idc.xpu

Tests-Failure-And-Report:
if: always()
runs-on: pvc_e2e
Expand Down
86 changes: 5 additions & 81 deletions .github/workflows/pull.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,84 +23,8 @@ jobs:
# Don't run on forked repos and draft PRs
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }}
name: preci-ut
runs-on: linux.idc.xpu
timeout-minutes: 240
steps:
- name: Checkout torch-xpu-ops
uses: actions/checkout@v3
- name: Prepare Stock Pytorch
run: |
pwd
cd ../ && rm -rf pytorch
git clone -b main https://github.com/pytorch/pytorch
cd pytorch && git log -n 1 && git submodule sync && git submodule update --init --recursive
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
# Workaround for torch-xpu-ops ci test
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
- name: Build Pytorch XPU
run: |
which conda && conda clean -ay
conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \
rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK}
conda create -n xpu_op_${ZE_AFFINITY_MASK} python=3.10 cmake ninja -y
source activate xpu_op_${ZE_AFFINITY_MASK}
conda install -c intel mkl-static mkl-include -y
cd ../pytorch
pip install -r requirements.txt
export USE_XPU=1
source /opt/intel/oneapi/compiler/latest/env/vars.sh
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
python setup.py bdist_wheel
pip install --force-reinstall dist/*.whl
git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd ..
pip install -r .ci/docker/requirements-ci.txt
- name: Run XPU OP Examples
if: ${{ hashFiles('examples/') != '' }}
run: |
xpu-smi discovery
source /opt/intel/oneapi/compiler/latest/env/vars.sh
source activate xpu_op_${ZE_AFFINITY_MASK}
cd examples
pip install pytest
timeout 8000 pytest -v
- name: Run XPU OP Extended UT
if: ${{ hashFiles('test/xpu/') != '' }}
run: |
source /opt/intel/oneapi/compiler/latest/env/vars.sh
source activate xpu_op_${ZE_AFFINITY_MASK}
export PYTORCH_TEST_WITH_SLOW=1
cd ../pytorch/third_party/torch-xpu-ops/test/xpu/extended/
timeout 10000 python run_test_with_skip.py
- name: Run XPU OP UT
if: ${{ hashFiles('test/xpu/') != '' }}
run: |
source /opt/intel/oneapi/compiler/latest/env/vars.sh
source activate xpu_op_${ZE_AFFINITY_MASK}
export PYTORCH_ENABLE_XPU_FALLBACK=1
export PYTORCH_TEST_WITH_SLOW=1
cd ../pytorch/third_party/torch-xpu-ops/test/xpu
timeout 10000 python run_test_with_skip.py
# Cases run with a on-demand white list, since some suites are too
# slow to go through all operators on CPU. So add cases on-demand
# when XPU implementatoin is done.
# test_foreach, test_decomp
timeout 10000 python run_test_with_only.py
- name: Run Torch XPU UT
run: |
source /opt/intel/oneapi/compiler/latest/env/vars.sh
source activate xpu_op_${ZE_AFFINITY_MASK}
cd ../pytorch
TEST_REPORTS_DIR=$(pwd)/test/test-reports
rm -rf "$TEST_REPORTS_DIR" && mkdir -p "$TEST_REPORTS_DIR"
# Run Pytorch XPU binary UT
for xpu_case in build/bin/*{xpu,sycl}*; do
if [[ "$xpu_case" != *"*"* && "$xpu_case" != *.so && "$xpu_case" != *.a ]]; then
case_name=$(basename "$xpu_case")
echo "Testing ${case_name} ..."
"$xpu_case" --gtest_output=xml:"$TEST_REPORTS_DIR"/"$case_name".xml
fi
done
# Run Pytorch XPU python UT
export PYTORCH_ENABLE_XPU_FALLBACK=1
sed -i 's/selected_tests = exclude_tests(XPU_BLOCKLIST.*/selected_tests = XPU_TEST/g' ./test/run_test.py
python test/run_test.py --xpu
uses: ./.github/workflows/_linux_ut.yml
with:
ut_suite: op_example,op_extended,op_ut,torch_xpu
runner: linux.idc.xpu

19 changes: 19 additions & 0 deletions src/ATen/native/xpu/TensorFactories.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,25 @@

namespace at {

Tensor& XPUNativeFunctions::eye_out(int64_t n, Tensor& result) {
return XPUNativeFunctions::eye_out(n, n, result);
}

Tensor& XPUNativeFunctions::eye_out(int64_t n, int64_t m, Tensor& result) {
TORCH_CHECK(n >= 0, "n must be greater or equal to 0, got ", n);
TORCH_CHECK(m >= 0, "m must be greater or equal to 0, got ", m);

result.resize_({n, m});
result.zero_();

int64_t sz = std::min<int64_t>(n, m);
int64_t stride = result.stride(0) + result.stride(1);

Tensor diag = result.as_strided({sz}, {stride});
diag.fill_(1);
return result;
}

Tensor XPUNativeFunctions::empty(
IntArrayRef size,
c10::optional<ScalarType> dtype_opt,
Expand Down
36 changes: 36 additions & 0 deletions src/ATen/native/xpu/UnaryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -630,4 +630,40 @@ Tensor& XPUNativeFunctions::cosh_out(const Tensor& self, Tensor& out) {
return out;
}

TensorIterator ceil_meta(const Tensor& self, Tensor& out) {
TORCH_CHECK(!self.is_complex(), "ceil is not supported for complex inputs");
TensorIterator iter;
iter.build_borrowing_unary_op(out, self);
return iter;
}

Tensor XPUNativeFunctions::ceil(const Tensor& self) {
if (c10::isIntegralType(self.scalar_type(), /*includeBool=*/false)) {
return self.clone();
}
Tensor out;
auto iter = ceil_meta(self, out);
native::xpu::ceil_kernel(iter);
return iter.output();
}

Tensor& XPUNativeFunctions::ceil_(Tensor& self) {
if (c10::isIntegralType(self.scalar_type(), /*includeBool=*/false)) {
return self;
}
auto iter = ceil_meta(self, self);
native::xpu::ceil_kernel(iter);
return self;
}

Tensor& XPUNativeFunctions::ceil_out(const Tensor& self, Tensor& out) {
if (c10::isIntegralType(self.scalar_type(), /*includeBool=*/false)) {
out.copy_(self);
return out;
}
auto iter = ceil_meta(self, out);
native::xpu::ceil_kernel(iter);
return out;
}

} // namespace at
2 changes: 0 additions & 2 deletions src/ATen/native/xpu/XPUFallback.template
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
"bitwise_right_shift.Tensor_out",
"cauchy_",
"_cdist_backward",
"ceil.out",
"channel_shuffle",
"cholesky",
"cholesky_inverse",
Expand All @@ -198,7 +197,6 @@ TORCH_LIBRARY_IMPL(aten, XPU, m) {
"exp2.out",
"expm1.out",
"exponential_",
"eye.m_out",
"_fft_c2c",
"_fft_c2r",
"_fft_r2c",
Expand Down
21 changes: 21 additions & 0 deletions src/ATen/native/xpu/sycl/UnaryFractionKernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,25 @@ void reciprocal_kernel(TensorIteratorBase& iter) {
[&]() { gpu_kernel(iter, ReciprocalFunctor<scalar_t>()); });
}

template <typename scalar_t>
struct CeilFunctor {
scalar_t operator()(const scalar_t a) const {
return std::ceil(a);
}
};

template <typename T>
struct CeilFunctor<c10::complex<T>> {
c10::complex<T> operator()(const c10::complex<T> a) const {
return c10::complex<T>(std::ceil(a.real()), std::ceil(a.imag()));
}
};

void ceil_kernel(TensorIteratorBase& iter) {
AT_DISPATCH_FLOATING_TYPES_AND2(
ScalarType::Half, ScalarType::BFloat16, iter.dtype(), "ceil_xpu", [&]() {
gpu_kernel(iter, CeilFunctor<scalar_t>());
});
}

} // namespace at::native::xpu
2 changes: 2 additions & 0 deletions src/ATen/native/xpu/sycl/UnaryFractionKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,6 @@ namespace at::native::xpu {

void reciprocal_kernel(TensorIteratorBase& iter);

void ceil_kernel(TensorIteratorBase& iter);

} // namespace at::native::xpu
1 change: 1 addition & 0 deletions test/xpu/xpu_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

_xpu_computation_op_list = [
"empty",
"eye",
"fill",
"zeros",
"zeros_like",
Expand Down
5 changes: 5 additions & 0 deletions yaml/xpu_functions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ supported:
- exp_
- empty.memory_format
- empty_strided
- eye.out
- eye.m_out
- _efficientzerotensor
- complex.out
- clone
Expand Down Expand Up @@ -518,3 +520,6 @@ supported:
- randperm.generator_out
- _amp_foreach_non_finite_check_and_unscale_
- _amp_update_scale_
- ceil
- ceil_
- ceil.out

0 comments on commit 22faf93

Please sign in to comment.