Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arc: Add cases skip list for extended cases #844

Merged
merged 2 commits into from
Aug 30, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 2 additions & 149 deletions test/xpu/extended/run_test_with_skip.py
Original file line number Diff line number Diff line change
@@ -1,155 +1,8 @@
import os
import sys
from skip_list_common import skip_dict

skip_list = (
# Calculation error between XPU implementation and CPU implementation,
# 1. Compiler optimization causes failing to promote data type to higher precision.
# 2. Accumulate error is amplified by some operations in some extreme cases. (std::exp(extreme_large_num))
# 3. Accumulate error is amplified by a large number of accumalate operations.
# 4. Accumulate error is different on different implementations due to different accumulation order.
# a. Different kernel implementations.
# b. Different std functions. (std::log, std::tanh, std::exp)
# 5. The result of division between two same float values is not 1.
# 6. std functions get different results when input is nan or inf between GCC and SYCL.
"test_compare_cpu_cumsum_xpu_bfloat16",
"test_compare_cpu_cumsum_xpu_float16",
"test_compare_cpu_log_xpu_complex64",
"test_compare_cpu_log10_xpu_complex64",
"test_compare_cpu_log1p_xpu_complex64",
"test_compare_cpu_log2_xpu_complex64",
"test_compare_cpu_log2_xpu_complex128",
"test_compare_cpu_mul_xpu_complex64",
"test_compare_cpu_pow_xpu_complex128",
"test_compare_cpu_pow_xpu_complex64",
"test_compare_cpu_tan_xpu_complex128",
"test_compare_cpu_tan_xpu_complex64",
"test_compare_cpu_tanh_xpu_complex128",
"test_compare_cpu_tanh_xpu_complex64",
"test_compare_cpu_rsqrt_xpu_bfloat16",
"test_compare_cpu_pow_xpu_bfloat16",
# cuda has the same issue on this case
"test_compare_cpu__refs_rsub_xpu_bfloat16",
"test_compare_cpu_add_xpu_bfloat16",
"test_compare_cpu_sub_xpu_bfloat16",
"test_compare_cpu_acos_xpu_complex128",
"test_compare_cpu_acos_xpu_complex64",
"test_compare_cpu_acosh_xpu_complex64",
"test_compare_cpu_cross_xpu_float16",
"test_compare_cpu_floor_divide_xpu_bfloat16",
"test_compare_cpu_floor_divide_xpu_float16",
"test_compare_cpu_polygamma_polygamma_n_0_xpu_bfloat16",
"test_compare_cpu_exp_xpu_bfloat16",
"test_compare_cpu_exp_xpu_complex128",
"test_compare_cpu_exp_xpu_complex64",
"test_compare_cpu_acosh_xpu_complex64",
"test_compare_cpu_asin_xpu_complex128",
"test_compare_cpu_asin_xpu_complex64",
"test_compare_cpu_asinh_xpu_complex128",
"test_compare_cpu_asinh_xpu_complex64",
"test_compare_cpu_atan_xpu_complex128",
"test_compare_cpu_atan_xpu_complex64",
"test_compare_cpu_exp2_xpu_complex128",
"test_compare_cpu_exp2_xpu_complex64",
"test_compare_cpu_nextafter_xpu_bfloat16",
# CUDA does not support the data type either
"test_non_standard_bool_values_native_dropout_backward_xpu_bool",
# Need FP64 golden ref for more accurate comparison
"test_compare_cpu_log_softmax_xpu_bfloat16",
# TestCompositeCompliance
# CPU fallback fails
# Require implementing aten::embedding_renorm_
"test_view_replay_nn_functional_embedding_xpu_float32",
# TestCompositeCompliance::test_cow_input
# XPU Tensor fails in copy-on-write cases
# AssertionError: False is not true : Keyword argument 'output grad 0' during backward call unexpectedly materializes. Either set `supports_cow_input_no_materialize_backward=False` in this operation's OpInfo, add the arg to the OpInfo's `allow_cow_input_materialize_backward` list, or change the implementation to avoid materialization.
# https://github.com/intel/torch-xpu-ops/issues/281
"test_cow_input",
# XPU implementation is correct.
# std::exp{-inf, nan}, the result is (±0,±0) (signs are unspecified)
# std::exp{-inf, inf}, the result is (±0,±0) (signs are unspecified)
# CPU implementation gets NaN in the cases.
# https://en.cppreference.com/w/cpp/numeric/complex/exp
"test_compare_cpu_sigmoid_xpu_complex64",
"test_compare_cpu_sigmoid_xpu_complex128",
# Special handle (different calculation order) in CPU reference impl.
# https://github.com/pytorch/pytorch/blob/c97e3ebb96d7457075b019b94411e8c2d058e68b/aten/src/ATen/native/EmbeddingBag.cpp#L300
"test_compare_cpu_nn_functional_embedding_bag_xpu_bfloat16",
"test_compare_cpu_nn_functional_embedding_bag_xpu_float16",
# Not implemented operators, aten::embedding_renorm_.
# To retrieve cases when the operators are supported.
# https://github.com/intel/torch-xpu-ops/issues/380
"test_compare_cpu_nn_functional_embedding_bag_xpu_float32",
"test_compare_cpu_nn_functional_embedding_bag_xpu_float64",
"test_view_replay_nn_functional_embedding_bag_xpu_float32",
# Double and complex datatype matmul is not supported in oneDNN
"test_compare_cpu_cdist_xpu_float64",
# bilinear interpolate includes large calculation steps, accuracy reduces in half-precision
# Not in CUDA test scope too
"test_compare_cpu_nn_functional_upsample_bilinear_xpu_bfloat16",
"test_compare_cpu_nn_functional_upsample_bilinear_xpu_float16",
# CPU result is not golden reference
"test_compare_cpu_nn_functional_group_norm_xpu_bfloat16",
"test_compare_cpu_nn_functional_group_norm_xpu_float16",
"test_compare_cpu_nn_functional_nll_loss_xpu_bfloat16",
"test_compare_cpu_nn_functional_nll_loss_xpu_float16",
"test_compare_cpu_nn_functional_batch_norm_xpu_bfloat16",
"test_compare_cpu__batch_norm_with_update_xpu_bfloat16",
"test_compare_cpu__batch_norm_with_update_xpu_float16",
"test_compare_cpu_nn_functional_huber_loss_xpu_bfloat16",
"test_compare_cpu_nansum_xpu_bfloat16",
"test_compare_cpu_nanmean_xpu_bfloat16",
# Align with CUDA impl by using accumulate type. But CPU doesn't use.
# When XPU uses original data type, the case passes.
"test_compare_cpu_logit_xpu_bfloat16",
# precison error
# Mismatched elements: 1 / 24 (4.2%)
# Greatest absolute difference: 0.03125 at index (0, 1, 0, 1) (up to 0.001 allowed)
# Greatest relative difference: 0.0048828125 at index (0, 1, 0, 1) (up to 0.001 allowed)
"test_compare_cpu_nn_functional_interpolate_bilinear_xpu_bfloat16",
# RuntimeError: "compute_index_ranges_weights" not implemented for 'Half'
"test_compare_cpu_nn_functional_interpolate_bilinear_xpu_float16",
# AssertionError: False is not true : Argument 0 during forward call unexpectedly materializes. Either set `supports_cow_input_no_materialize_forward=False...
"test_cow_input_nn_functional_interpolate_bilinear_xpu_float32",
"test_cow_input_nn_functional_interpolate_linear_xpu_float32",
"test_cow_input_nn_functional_interpolate_trilinear_xpu_float32",
#The results of XPU and CUDA are consistent, but the results of CPU and CUDA are inconsistent
"test_compare_cpu_nn_functional_interpolate_linear_xpu_bfloat16",
"test_compare_cpu_nn_functional_interpolate_linear_xpu_float16",
# bicubic interpolate includes large calculation steps, accuracy reduces in half-precision
# Not in CUDA test scope too
"test_compare_cpu_nn_functional_interpolate_bicubic_xpu_bfloat16",
"test_compare_cpu_nn_functional_interpolate_bicubic_xpu_float16",
# Not all operators are implemented for XPU tested in the case.
# Retrieve it once the operator is implemented.
# Error: The operator 'aten::glu_jvp' is not currently implemented for the XPU device.
"test_forward_ad_nn_functional_glu_xpu_float32",
# Precision error.
# Mismatched elements: 1 / 812 (0.1%)
# Greatest absolute difference: 0.03125 at index (610,) (up to 0.001 allowed)
# Greatest relative difference: 0.00396728515625 at index (610,) (up to 0.001 allowed)
"test_compare_cpu_hypot_xpu_bfloat16",
# RuntimeError: Expected both inputs to be Half, Float or Double tensors but got BFloat16 and BFloat16.
# Polar's backward is calculated using complex(), which does not support bfloat16. CUDA fails with same error.
"test_compare_cpu_polar_xpu_bfloat16",
# Precision error.
# Mismatched elements: 1 / 25 (4.0%)
# Greatest absolute difference: 0.00146484375 at index (0, 0) (up to 0.001 allowed)
# Greatest relative difference: 0.0163116455078125 at index (0, 0) (up to 0.001 allowed)
"test_compare_cpu_sub_xpu_float16",
# different results for value index due to unstable sort.
# XPU and CUDA have the same result.
"test_compare_cpu_median_xpu_int16",
"test_compare_cpu_median_xpu_int32",
"test_compare_cpu_median_xpu_int64",
"test_compare_cpu_median_xpu_int8",
"test_compare_cpu_median_xpu_uint8",
"test_compare_cpu_nanmedian_xpu_int16",
"test_compare_cpu_nanmedian_xpu_int32",
"test_compare_cpu_nanmedian_xpu_int64",
"test_compare_cpu_nanmedian_xpu_int8",
"test_compare_cpu_nanmedian_xpu_uint8",
)

skip_list = skip_dict["test_ops_xpu.py"]

skip_options = " -k 'not " + skip_list[0]
for skip_case in skip_list[1:]:
19 changes: 19 additions & 0 deletions test/xpu/extended/run_test_with_skip_arc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import os
import sys
from skip_list_common import skip_dict
from skip_list_arc import skip_dict as skip_dict_specifical

skip_list = skip_dict["test_ops_xpu.py"] + skip_dict_specifical["test_ops_xpu.py"]

skip_options = " -k 'not " + skip_list[0]
for skip_case in skip_list[1:]:
skip_option = " and not " + skip_case
skip_options += skip_option
skip_options += "'"

test_command = "PYTORCH_TEST_WITH_SLOW=1 pytest -v test_ops_xpu.py"
test_command += skip_options

res = os.system(test_command)
exit_code = os.WEXITSTATUS(res)
sys.exit(exit_code)
11 changes: 11 additions & 0 deletions test/xpu/extended/skip_list_arc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
skip_dict = {
"test_ops_xpu.py": (
# RuntimeError: Required aspect fp64 is not supported on the device
# https://github.com/intel/torch-xpu-ops/issues/628
"test_compare_cpu_bincount_xpu_int16",
"test_compare_cpu_bincount_xpu_int32",
"test_compare_cpu_bincount_xpu_int64",
"test_compare_cpu_bincount_xpu_int8",
"test_compare_cpu_bincount_xpu_uint8",
),
}
150 changes: 150 additions & 0 deletions test/xpu/extended/skip_list_common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
skip_dict = {
"test_ops_xpu.py": (
# Calculation error between XPU implementation and CPU implementation,
# 1. Compiler optimization causes failing to promote data type to higher precision.
# 2. Accumulate error is amplified by some operations in some extreme cases. (std::exp(extreme_large_num))
# 3. Accumulate error is amplified by a large number of accumalate operations.
# 4. Accumulate error is different on different implementations due to different accumulation order.
# a. Different kernel implementations.
# b. Different std functions. (std::log, std::tanh, std::exp)
# 5. The result of division between two same float values is not 1.
# 6. std functions get different results when input is nan or inf between GCC and SYCL.
"test_compare_cpu_cumsum_xpu_bfloat16",
"test_compare_cpu_cumsum_xpu_float16",
"test_compare_cpu_log_xpu_complex64",
"test_compare_cpu_log10_xpu_complex64",
"test_compare_cpu_log1p_xpu_complex64",
"test_compare_cpu_log2_xpu_complex64",
"test_compare_cpu_log2_xpu_complex128",
"test_compare_cpu_mul_xpu_complex64",
"test_compare_cpu_pow_xpu_complex128",
"test_compare_cpu_pow_xpu_complex64",
"test_compare_cpu_tan_xpu_complex128",
"test_compare_cpu_tan_xpu_complex64",
"test_compare_cpu_tanh_xpu_complex128",
"test_compare_cpu_tanh_xpu_complex64",
"test_compare_cpu_rsqrt_xpu_bfloat16",
"test_compare_cpu_pow_xpu_bfloat16",
# cuda has the same issue on this case
"test_compare_cpu__refs_rsub_xpu_bfloat16",
"test_compare_cpu_add_xpu_bfloat16",
"test_compare_cpu_sub_xpu_bfloat16",
"test_compare_cpu_acos_xpu_complex128",
"test_compare_cpu_acos_xpu_complex64",
"test_compare_cpu_acosh_xpu_complex64",
"test_compare_cpu_cross_xpu_float16",
"test_compare_cpu_floor_divide_xpu_bfloat16",
"test_compare_cpu_floor_divide_xpu_float16",
"test_compare_cpu_polygamma_polygamma_n_0_xpu_bfloat16",
"test_compare_cpu_exp_xpu_bfloat16",
"test_compare_cpu_exp_xpu_complex128",
"test_compare_cpu_exp_xpu_complex64",
"test_compare_cpu_acosh_xpu_complex64",
"test_compare_cpu_asin_xpu_complex128",
"test_compare_cpu_asin_xpu_complex64",
"test_compare_cpu_asinh_xpu_complex128",
"test_compare_cpu_asinh_xpu_complex64",
"test_compare_cpu_atan_xpu_complex128",
"test_compare_cpu_atan_xpu_complex64",
"test_compare_cpu_exp2_xpu_complex128",
"test_compare_cpu_exp2_xpu_complex64",
"test_compare_cpu_nextafter_xpu_bfloat16",
# CUDA does not support the data type either
"test_non_standard_bool_values_native_dropout_backward_xpu_bool",
# Need FP64 golden ref for more accurate comparison
"test_compare_cpu_log_softmax_xpu_bfloat16",
# TestCompositeCompliance
# CPU fallback fails
# Require implementing aten::embedding_renorm_
"test_view_replay_nn_functional_embedding_xpu_float32",
# TestCompositeCompliance::test_cow_input
# XPU Tensor fails in copy-on-write cases
# AssertionError: False is not true : Keyword argument 'output grad 0' during backward call unexpectedly materializes. Either set `supports_cow_input_no_materialize_backward=False` in this operation's OpInfo, add the arg to the OpInfo's `allow_cow_input_materialize_backward` list, or change the implementation to avoid materialization.
# https://github.com/intel/torch-xpu-ops/issues/281
"test_cow_input",
# XPU implementation is correct.
# std::exp{-inf, nan}, the result is (±0,±0) (signs are unspecified)
# std::exp{-inf, inf}, the result is (±0,±0) (signs are unspecified)
# CPU implementation gets NaN in the cases.
# https://en.cppreference.com/w/cpp/numeric/complex/exp
"test_compare_cpu_sigmoid_xpu_complex64",
"test_compare_cpu_sigmoid_xpu_complex128",
# Special handle (different calculation order) in CPU reference impl.
# https://github.com/pytorch/pytorch/blob/c97e3ebb96d7457075b019b94411e8c2d058e68b/aten/src/ATen/native/EmbeddingBag.cpp#L300
"test_compare_cpu_nn_functional_embedding_bag_xpu_bfloat16",
"test_compare_cpu_nn_functional_embedding_bag_xpu_float16",
# Not implemented operators, aten::embedding_renorm_.
# To retrieve cases when the operators are supported.
# https://github.com/intel/torch-xpu-ops/issues/380
"test_compare_cpu_nn_functional_embedding_bag_xpu_float32",
"test_compare_cpu_nn_functional_embedding_bag_xpu_float64",
"test_view_replay_nn_functional_embedding_bag_xpu_float32",
# Double and complex datatype matmul is not supported in oneDNN
"test_compare_cpu_cdist_xpu_float64",
# bilinear interpolate includes large calculation steps, accuracy reduces in half-precision
# Not in CUDA test scope too
"test_compare_cpu_nn_functional_upsample_bilinear_xpu_bfloat16",
"test_compare_cpu_nn_functional_upsample_bilinear_xpu_float16",
# CPU result is not golden reference
"test_compare_cpu_nn_functional_group_norm_xpu_bfloat16",
"test_compare_cpu_nn_functional_group_norm_xpu_float16",
"test_compare_cpu_nn_functional_nll_loss_xpu_bfloat16",
"test_compare_cpu_nn_functional_nll_loss_xpu_float16",
"test_compare_cpu_nn_functional_batch_norm_xpu_bfloat16",
"test_compare_cpu__batch_norm_with_update_xpu_bfloat16",
"test_compare_cpu__batch_norm_with_update_xpu_float16",
"test_compare_cpu_nn_functional_huber_loss_xpu_bfloat16",
"test_compare_cpu_nansum_xpu_bfloat16",
"test_compare_cpu_nanmean_xpu_bfloat16",
# Align with CUDA impl by using accumulate type. But CPU doesn't use.
# When XPU uses original data type, the case passes.
"test_compare_cpu_logit_xpu_bfloat16",
# precison error
# Mismatched elements: 1 / 24 (4.2%)
# Greatest absolute difference: 0.03125 at index (0, 1, 0, 1) (up to 0.001 allowed)
# Greatest relative difference: 0.0048828125 at index (0, 1, 0, 1) (up to 0.001 allowed)
"test_compare_cpu_nn_functional_interpolate_bilinear_xpu_bfloat16",
# RuntimeError: "compute_index_ranges_weights" not implemented for 'Half'
"test_compare_cpu_nn_functional_interpolate_bilinear_xpu_float16",
# AssertionError: False is not true : Argument 0 during forward call unexpectedly materializes. Either set `supports_cow_input_no_materialize_forward=False...
"test_cow_input_nn_functional_interpolate_bilinear_xpu_float32",
"test_cow_input_nn_functional_interpolate_linear_xpu_float32",
"test_cow_input_nn_functional_interpolate_trilinear_xpu_float32",
#The results of XPU and CUDA are consistent, but the results of CPU and CUDA are inconsistent
"test_compare_cpu_nn_functional_interpolate_linear_xpu_bfloat16",
"test_compare_cpu_nn_functional_interpolate_linear_xpu_float16",
# bicubic interpolate includes large calculation steps, accuracy reduces in half-precision
# Not in CUDA test scope too
"test_compare_cpu_nn_functional_interpolate_bicubic_xpu_bfloat16",
"test_compare_cpu_nn_functional_interpolate_bicubic_xpu_float16",
# Not all operators are implemented for XPU tested in the case.
# Retrieve it once the operator is implemented.
# Error: The operator 'aten::glu_jvp' is not currently implemented for the XPU device.
"test_forward_ad_nn_functional_glu_xpu_float32",
# Precision error.
# Mismatched elements: 1 / 812 (0.1%)
# Greatest absolute difference: 0.03125 at index (610,) (up to 0.001 allowed)
# Greatest relative difference: 0.00396728515625 at index (610,) (up to 0.001 allowed)
"test_compare_cpu_hypot_xpu_bfloat16",
# RuntimeError: Expected both inputs to be Half, Float or Double tensors but got BFloat16 and BFloat16.
# Polar's backward is calculated using complex(), which does not support bfloat16. CUDA fails with same error.
"test_compare_cpu_polar_xpu_bfloat16",
# Precision error.
# Mismatched elements: 1 / 25 (4.0%)
# Greatest absolute difference: 0.00146484375 at index (0, 0) (up to 0.001 allowed)
# Greatest relative difference: 0.0163116455078125 at index (0, 0) (up to 0.001 allowed)
"test_compare_cpu_sub_xpu_float16",
# different results for value index due to unstable sort.
# XPU and CUDA have the same result.
"test_compare_cpu_median_xpu_int16",
"test_compare_cpu_median_xpu_int32",
"test_compare_cpu_median_xpu_int64",
"test_compare_cpu_median_xpu_int8",
"test_compare_cpu_median_xpu_uint8",
"test_compare_cpu_nanmedian_xpu_int16",
"test_compare_cpu_nanmedian_xpu_int32",
"test_compare_cpu_nanmedian_xpu_int64",
"test_compare_cpu_nanmedian_xpu_int8",
"test_compare_cpu_nanmedian_xpu_uint8",
),
}