Skip to content

Commit c4056fe

Browse files
committed
Merge branch 'main' into fy/sinh_tan
2 parents 3689377 + 682d0e4 commit c4056fe

24 files changed

+845
-130
lines changed

.github/workflows/_linux_ut.yml

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
name: inductor-xpu-ut-test
2+
3+
on:
4+
workflow_call:
5+
inputs:
6+
torch_xpu_ops_update:
7+
required: false
8+
type: string
9+
default: 'true'
10+
description: True means update xpu_ops when building pytorch, otherwise means not
11+
ut_suite:
12+
required: true
13+
type: string
14+
default: 'op_example,op_extended,op_ut,torch_xpu'
15+
description: op_example,op_extended,op_ut,torch_xpu. Delimiter is comma
16+
pytorch_branch:
17+
required: false
18+
type: string
19+
default: 'main'
20+
description: Set pytorch branch
21+
runner:
22+
required: true
23+
type: string
24+
default: 'linux.idc.xpu'
25+
description: Set runner
26+
27+
28+
jobs:
29+
Inductor-XPU-UT-Tests:
30+
runs-on: ${{ inputs.runner }}
31+
timeout-minutes: 900
32+
steps:
33+
- name: Checkout torch-xpu-ops
34+
uses: actions/checkout@v4
35+
- name: Prepare Stock Pytorch
36+
run: |
37+
pwd
38+
cd ../ && rm -rf pytorch
39+
git clone -b ${{ inputs.pytorch_branch }} https://github.com/pytorch/pytorch
40+
cd pytorch && git log -n 1 && git submodule sync && git submodule update --init --recursive
41+
if [ -z ${{ inputs.torch_xpu_ops_update }} ]; then
42+
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
43+
else
44+
if [[ ${{ inputs.torch_xpu_ops_update }} == 'true' ]]; then
45+
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
46+
else
47+
echo "Not update torch-xpu-ops"
48+
fi
49+
fi
50+
# Workaround for torch-xpu-ops ci test
51+
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
52+
- name: Build Pytorch XPU
53+
run: |
54+
which conda && conda clean -ay
55+
conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \
56+
rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK}
57+
conda create -n xpu_op_${ZE_AFFINITY_MASK} python=3.10 cmake ninja -y
58+
source activate xpu_op_${ZE_AFFINITY_MASK}
59+
conda install -c intel mkl-static mkl-include -y
60+
cd ../pytorch
61+
pip install -r requirements.txt
62+
export USE_XPU=1
63+
source /opt/intel/oneapi/compiler/latest/env/vars.sh
64+
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
65+
python setup.py bdist_wheel
66+
pip install --force-reinstall dist/*.whl
67+
git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd ..
68+
pip install -r .ci/docker/requirements-ci.txt
69+
- name: Run XPU OP Examples
70+
if: contains(inputs.ut_suite, 'op_example')
71+
run: |
72+
cd ${{ github.workspace }}
73+
mkdir -p ut_log
74+
xpu-smi discovery
75+
source /opt/intel/oneapi/compiler/latest/env/vars.sh
76+
source activate xpu_op_${ZE_AFFINITY_MASK}
77+
cd ${{ github.workspace }}
78+
cd examples
79+
pip install pytest
80+
timeout 8000 pytest -v
81+
- name: Run XPU OP Extended UT
82+
if: contains(inputs.ut_suite, 'op_extended')
83+
run: |
84+
source /opt/intel/oneapi/compiler/latest/env/vars.sh
85+
source activate xpu_op_${ZE_AFFINITY_MASK}
86+
export PYTORCH_TEST_WITH_SLOW=1
87+
cd ../pytorch/third_party/torch-xpu-ops/test/xpu/extended/
88+
timeout 10000 python run_test_with_skip.py
89+
- name: Run XPU OP UT
90+
if: contains(inputs.ut_suite, 'op_ut')
91+
run: |
92+
source /opt/intel/oneapi/compiler/latest/env/vars.sh
93+
source activate xpu_op_${ZE_AFFINITY_MASK}
94+
export PYTORCH_ENABLE_XPU_FALLBACK=1
95+
export PYTORCH_TEST_WITH_SLOW=1
96+
cd ../pytorch/third_party/torch-xpu-ops/test/xpu
97+
timeout 10000 python run_test_with_skip.py
98+
# Cases run with a on-demand white list, since some suites are too
99+
# slow to go through all operators on CPU. So add cases on-demand
100+
# when XPU implementatoin is done.
101+
# test_foreach, test_decomp
102+
timeout 10000 python run_test_with_only.py
103+
- name: Run Torch XPU UT
104+
if: contains(inputs.ut_suite, 'torch_xpu')
105+
run: |
106+
source /opt/intel/oneapi/compiler/latest/env/vars.sh
107+
source activate xpu_op_${ZE_AFFINITY_MASK}
108+
cd ../pytorch
109+
TEST_REPORTS_DIR=$(pwd)/test/test-reports
110+
rm -rf "$TEST_REPORTS_DIR" && mkdir -p "$TEST_REPORTS_DIR"
111+
# Run Pytorch XPU binary UT
112+
for xpu_case in build/bin/*{xpu,sycl}*; do
113+
if [[ "$xpu_case" != *"*"* && "$xpu_case" != *.so && "$xpu_case" != *.a ]]; then
114+
case_name=$(basename "$xpu_case")
115+
echo "Testing ${case_name} ..."
116+
"$xpu_case" --gtest_output=xml:"$TEST_REPORTS_DIR"/"$case_name".xml
117+
fi
118+
done
119+
# Run Pytorch XPU python UT
120+
export PYTORCH_ENABLE_XPU_FALLBACK=1
121+
sed -i 's/selected_tests = exclude_tests(XPU_BLOCKLIST.*/selected_tests = XPU_TEST/g' ./test/run_test.py
122+
python test/run_test.py --xpu
123+

.github/workflows/inductor_xpu_e2e_nightly.yml

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,21 @@ on:
4141
type: string
4242
default: ''
4343
description: If set, will only launch this one
44+
torch_xpu_ops_update:
45+
required: false
46+
type: string
47+
default: 'true'
48+
description: True means update xpu_ops when building pytorch, otherwise means not
49+
ut_suite:
50+
required: true
51+
type: string
52+
default: 'op_example,op_extended,op_ut,torch_xpu'
53+
description: op_example,op_extended,op_ut,torch_xpu. Delimiter is comma
54+
pytorch_branch:
55+
required: false
56+
type: string
57+
default: 'main'
58+
description: Set pytorch branch
4459

4560

4661
permissions: read-all
@@ -244,6 +259,15 @@ jobs:
244259
name: Inductor-XPU-E2E-Data-${{ github.event.pull_request.number || github.sha }}
245260
path: ${{ github.workspace }}/upload_files
246261

262+
Inductor-XPU-UT-Nightly-Tests:
263+
if: ${{ inputs.ut_suite }}
264+
name: Nightly Inductor XPU UT Test
265+
uses: ./.github/workflows/_linux_ut.yml
266+
with:
267+
ut_suite: ${{ inputs.ut_suite }}
268+
pytorch_branch: ${{ inputs.pytorch_branch }}
269+
runner: linux.idc.xpu
270+
247271
Tests-Failure-And-Report:
248272
if: always()
249273
runs-on: pvc_e2e

.github/workflows/pull.yml

Lines changed: 5 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -23,84 +23,8 @@ jobs:
2323
# Don't run on forked repos and draft PRs
2424
if: ${{ (github.repository_owner == 'intel') && (github.event.pull_request.draft == false) }}
2525
name: preci-ut
26-
runs-on: linux.idc.xpu
27-
timeout-minutes: 240
28-
steps:
29-
- name: Checkout torch-xpu-ops
30-
uses: actions/checkout@v3
31-
- name: Prepare Stock Pytorch
32-
run: |
33-
pwd
34-
cd ../ && rm -rf pytorch
35-
git clone -b main https://github.com/pytorch/pytorch
36-
cd pytorch && git log -n 1 && git submodule sync && git submodule update --init --recursive
37-
rm -rf third_party/torch-xpu-ops && cp -r ../torch-xpu-ops third_party/
38-
# Workaround for torch-xpu-ops ci test
39-
sed -i "s/checkout --quiet \${TORCH_XPU_OPS_COMMIT}/log -n 1/g" caffe2/CMakeLists.txt
40-
- name: Build Pytorch XPU
41-
run: |
42-
which conda && conda clean -ay
43-
conda remove --all -y -n xpu_op_${ZE_AFFINITY_MASK} || \
44-
rm -rf $(dirname ${CONDA_EXE})/../envs/xpu_op_${ZE_AFFINITY_MASK}
45-
conda create -n xpu_op_${ZE_AFFINITY_MASK} python=3.10 cmake ninja -y
46-
source activate xpu_op_${ZE_AFFINITY_MASK}
47-
conda install -c intel mkl-static mkl-include -y
48-
cd ../pytorch
49-
pip install -r requirements.txt
50-
export USE_XPU=1
51-
source /opt/intel/oneapi/compiler/latest/env/vars.sh
52-
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
53-
python setup.py bdist_wheel
54-
pip install --force-reinstall dist/*.whl
55-
git clone https://github.com/pytorch/vision && cd vision && python setup.py install && cd ..
56-
pip install -r .ci/docker/requirements-ci.txt
57-
- name: Run XPU OP Examples
58-
if: ${{ hashFiles('examples/') != '' }}
59-
run: |
60-
xpu-smi discovery
61-
source /opt/intel/oneapi/compiler/latest/env/vars.sh
62-
source activate xpu_op_${ZE_AFFINITY_MASK}
63-
cd examples
64-
pip install pytest
65-
timeout 8000 pytest -v
66-
- name: Run XPU OP Extended UT
67-
if: ${{ hashFiles('test/xpu/') != '' }}
68-
run: |
69-
source /opt/intel/oneapi/compiler/latest/env/vars.sh
70-
source activate xpu_op_${ZE_AFFINITY_MASK}
71-
export PYTORCH_TEST_WITH_SLOW=1
72-
cd ../pytorch/third_party/torch-xpu-ops/test/xpu/extended/
73-
timeout 10000 python run_test_with_skip.py
74-
- name: Run XPU OP UT
75-
if: ${{ hashFiles('test/xpu/') != '' }}
76-
run: |
77-
source /opt/intel/oneapi/compiler/latest/env/vars.sh
78-
source activate xpu_op_${ZE_AFFINITY_MASK}
79-
export PYTORCH_ENABLE_XPU_FALLBACK=1
80-
export PYTORCH_TEST_WITH_SLOW=1
81-
cd ../pytorch/third_party/torch-xpu-ops/test/xpu
82-
timeout 10000 python run_test_with_skip.py
83-
# Cases run with a on-demand white list, since some suites are too
84-
# slow to go through all operators on CPU. So add cases on-demand
85-
# when XPU implementatoin is done.
86-
# test_foreach, test_decomp
87-
timeout 10000 python run_test_with_only.py
88-
- name: Run Torch XPU UT
89-
run: |
90-
source /opt/intel/oneapi/compiler/latest/env/vars.sh
91-
source activate xpu_op_${ZE_AFFINITY_MASK}
92-
cd ../pytorch
93-
TEST_REPORTS_DIR=$(pwd)/test/test-reports
94-
rm -rf "$TEST_REPORTS_DIR" && mkdir -p "$TEST_REPORTS_DIR"
95-
# Run Pytorch XPU binary UT
96-
for xpu_case in build/bin/*{xpu,sycl}*; do
97-
if [[ "$xpu_case" != *"*"* && "$xpu_case" != *.so && "$xpu_case" != *.a ]]; then
98-
case_name=$(basename "$xpu_case")
99-
echo "Testing ${case_name} ..."
100-
"$xpu_case" --gtest_output=xml:"$TEST_REPORTS_DIR"/"$case_name".xml
101-
fi
102-
done
103-
# Run Pytorch XPU python UT
104-
export PYTORCH_ENABLE_XPU_FALLBACK=1
105-
sed -i 's/selected_tests = exclude_tests(XPU_BLOCKLIST.*/selected_tests = XPU_TEST/g' ./test/run_test.py
106-
python test/run_test.py --xpu
26+
uses: ./.github/workflows/_linux_ut.yml
27+
with:
28+
ut_suite: op_example,op_extended,op_ut,torch_xpu
29+
runner: linux.idc.xpu
30+

src/ATen/native/xpu/Bucketization.cpp

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
#include <ATen/native/BucketizationUtils.h>
2+
#include <ATen/native/Resize.h>
3+
#include <ATen/native/xpu/sycl/BucketizationKernels.h>
4+
#include <ATen/xpu/XPUNativeFunctions.h>
5+
6+
namespace at {
7+
8+
Tensor& XPUNativeFunctions::searchsorted_out(
9+
const Tensor& sorted_sequence,
10+
const Tensor& self,
11+
bool out_int32,
12+
bool right,
13+
const std::optional<c10::string_view> side_opt,
14+
const std::optional<Tensor>& sorter_opt,
15+
Tensor& result) {
16+
// See [Note: hacky wrapper removal for optional tensor]
17+
c10::MaybeOwned<Tensor> sorter_maybe_owned =
18+
at::borrow_from_optional_tensor(sorter_opt);
19+
const Tensor& sorter = *sorter_maybe_owned;
20+
at::native::searchsorted_pre_check(
21+
sorted_sequence, self, result, out_int32, right, side_opt, sorter);
22+
at::native::resize_output(result, self.sizes());
23+
24+
if (self.numel() == 0) {
25+
return result;
26+
}
27+
28+
// we have two inputs to set right, pre_check checks that they aren't set to
29+
// opposites
30+
bool is_right = (side_opt && *side_opt == "right") || right;
31+
at::native::xpu::searchsorted_kernel(
32+
result, self, sorted_sequence, out_int32, is_right, sorter);
33+
return result;
34+
}
35+
36+
Tensor& XPUNativeFunctions::searchsorted_out(
37+
const Tensor& sorted_sequence,
38+
const Scalar& self,
39+
bool out_int32,
40+
bool right,
41+
const std::optional<c10::string_view> side_opt,
42+
const std::optional<Tensor>& sorter_opt,
43+
Tensor& result) {
44+
const Tensor& scalar_tensor =
45+
at::native::searchsorted_scalar_tensor(self, sorted_sequence.device());
46+
return searchsorted_out(
47+
sorted_sequence,
48+
scalar_tensor,
49+
out_int32,
50+
right,
51+
side_opt,
52+
sorter_opt,
53+
result);
54+
}
55+
56+
Tensor XPUNativeFunctions::searchsorted(
57+
const Tensor& sorted_sequence,
58+
const Tensor& self,
59+
bool out_int32,
60+
bool right,
61+
const std::optional<c10::string_view> side_opt,
62+
const std::optional<Tensor>& sorter) {
63+
ScalarType scalar_type = out_int32 ? ScalarType::Int : ScalarType::Long;
64+
c10::TensorOptions options =
65+
TensorOptions().device(self.options().device()).dtype(scalar_type);
66+
Tensor result = at::empty({0}, options, MemoryFormat::Contiguous);
67+
searchsorted_out(
68+
sorted_sequence, self, out_int32, right, side_opt, sorter, result);
69+
return result;
70+
}
71+
72+
Tensor XPUNativeFunctions::searchsorted(
73+
const Tensor& sorted_sequence,
74+
const Scalar& self,
75+
bool out_int32,
76+
bool right,
77+
const std::optional<c10::string_view> side_opt,
78+
const std::optional<Tensor>& sorter) {
79+
const Tensor& scalar_tensor =
80+
at::native::searchsorted_scalar_tensor(self, sorted_sequence.device());
81+
return searchsorted(
82+
sorted_sequence, scalar_tensor, out_int32, right, side_opt, sorter);
83+
}
84+
85+
Tensor& XPUNativeFunctions::bucketize_out(
86+
const Tensor& self,
87+
const Tensor& boundaries,
88+
bool out_int32,
89+
bool right,
90+
Tensor& result) {
91+
TORCH_CHECK(
92+
boundaries.dim() == 1,
93+
"boundaries tensor must be 1 dimension, but got dim(",
94+
boundaries.dim(),
95+
")");
96+
searchsorted_out(
97+
boundaries, self, out_int32, right, nullopt, nullopt, result);
98+
return result;
99+
}
100+
101+
Tensor XPUNativeFunctions::bucketize(
102+
const Tensor& self,
103+
const Tensor& boundaries,
104+
bool out_int32,
105+
bool right) {
106+
ScalarType scalar_type = out_int32 ? ScalarType::Int : ScalarType::Long;
107+
c10::TensorOptions options =
108+
TensorOptions().device(self.options().device()).dtype(scalar_type);
109+
Tensor result = at::empty({0}, options, MemoryFormat::Contiguous);
110+
bucketize_out(self, boundaries, out_int32, right, result);
111+
return result;
112+
}
113+
114+
Tensor XPUNativeFunctions::bucketize(
115+
const Scalar& self,
116+
const Tensor& boundaries,
117+
bool out_int32,
118+
bool right) {
119+
return bucketize(
120+
at::native::searchsorted_scalar_tensor(self, boundaries.device()),
121+
boundaries,
122+
out_int32,
123+
right);
124+
}
125+
} // namespace at

src/ATen/native/xpu/LinearAlgebra.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,9 @@
11
#include <ATen/ATen.h>
2-
#include <ATen/ExpandUtils.h>
3-
#include <ATen/WrapDimUtils.h>
4-
#include <ATen/core/Tensor.h>
5-
#include <ATen/core/op_registration/adaption.h>
62
#include <ATen/native/LinearAlgebraUtils.h>
73
#include <ATen/native/ReduceOpsUtils.h>
8-
#include <ATen/native/utils/ParamUtils.h>
9-
#include <ATen/xpu/XPUNativeFunctions.h>
10-
114
#include <ATen/native/xpu/sycl/LinearAlgebraKernels.h>
125
#include <ATen/native/xpu/sycl/ReduceNormKernel.h>
6+
#include <ATen/xpu/XPUNativeFunctions.h>
137
#include <comm/RegisterUtils.h>
148

159
namespace at {

0 commit comments

Comments
 (0)