Skip to content

Commit

Permalink
Merge pull request easybuilders#19067 from Flamefire/20231024101128_n…
Browse files Browse the repository at this point in the history
…ew_pr_PyTorch201

{ai}[foss/2022b] PyTorch v2.0.1
  • Loading branch information
branfosj authored Dec 17, 2023
2 parents c692536 + c7a4c5a commit 73643ce
Show file tree
Hide file tree
Showing 3 changed files with 209 additions and 0 deletions.
152 changes: 152 additions & 0 deletions easybuild/easyconfigs/p/PyTorch/PyTorch-2.0.1-foss-2022b.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
name = 'PyTorch'
version = '2.0.1'

homepage = 'https://pytorch.org/'
description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration.
PyTorch is a deep learning framework that puts Python first."""

toolchain = {'name': 'foss', 'version': '2022b'}

source_urls = [GITHUB_RELEASE]
sources = ['%(namelower)s-v%(version)s.tar.gz']
patches = [
'PyTorch-1.7.0_disable-dev-shm-test.patch',
'PyTorch-1.11.1_skip-test_init_from_local_shards.patch',
'PyTorch-1.12.1_add-hypothesis-suppression.patch',
'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch',
'PyTorch-1.12.1_fix-TestTorch.test_to.patch',
'PyTorch-1.12.1_skip-test_round_robin.patch',
'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch',
'PyTorch-1.13.1_fix-protobuf-dependency.patch',
'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch',
'PyTorch-1.13.1_skip-failing-singular-grad-test.patch',
'PyTorch-1.13.1_skip-tests-without-fbgemm.patch',
'PyTorch-2.0.1_add-missing-vsx-vector-shift-functions.patch',
'PyTorch-2.0.1_avoid-test_quantization-failures.patch',
'PyTorch-2.0.1_disable-gcc12-warning.patch',
'PyTorch-2.0.1_disable-test-sharding.patch',
'PyTorch-2.0.1_fix-numpy-compat.patch',
'PyTorch-2.0.1_fix-shift-ops.patch',
'PyTorch-2.0.1_fix-skip-decorators.patch',
'PyTorch-2.0.1_fix-test_memory_profiler.patch',
'PyTorch-2.0.1_fix-test-ops-conf.patch',
'PyTorch-2.0.1_fix-torch.compile-on-ppc.patch',
'PyTorch-2.0.1_fix-ub-in-inductor-codegen.patch',
'PyTorch-2.0.1_fix-vsx-loadu.patch',
'PyTorch-2.0.1_ignore_unexpected_success_in_test_torchinductor_opinfo.patch',
'PyTorch-2.0.1_no-cuda-stubs-rpath.patch',
'PyTorch-2.0.1_remove-test-requiring-online-access.patch',
'PyTorch-2.0.1_skip-diff-test-on-ppc.patch',
'PyTorch-2.0.1_skip-failing-gradtest.patch',
'PyTorch-2.0.1_skip-test_baddbmm_cpu_bfloat16.patch',
'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch',
'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch',
]
checksums = [
{'pytorch-v2.0.1.tar.gz': '9c564ca440265c69400ef5fdd48bf15e28af5aa4bed84c95efaad960a6699998'},
{'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'},
{'PyTorch-1.11.1_skip-test_init_from_local_shards.patch':
'4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7'},
{'PyTorch-1.12.1_add-hypothesis-suppression.patch':
'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'},
{'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch':
'1efc9850c431d702e9117d4766277d3f88c5c8b3870997c9974971bce7f2ab83'},
{'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'},
{'PyTorch-1.12.1_skip-test_round_robin.patch': '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349'},
{'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch':
'5c7be91a6096083a0b1315efe0001537499c600f1f569953c6a2c7f4cc1d0910'},
{'PyTorch-1.13.1_fix-protobuf-dependency.patch':
'8bd755a0cab7233a243bc65ca57c9630dfccdc9bf8c9792f0de4e07a644fcb00'},
{'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch':
'bdde0f2105215c95a54de64ec4b1a4520528510663174fef6d5b900eb1db3937'},
{'PyTorch-1.13.1_skip-failing-singular-grad-test.patch':
'72688a57b2bb617665ad1a1d5e362c5111ae912c10936bb38a089c0204729f48'},
{'PyTorch-1.13.1_skip-tests-without-fbgemm.patch':
'481e595f673baf8ae58b41697a6792b83048b0264aa79b422f48cd8c22948bb7'},
{'PyTorch-2.0.1_add-missing-vsx-vector-shift-functions.patch':
'da44961d6c204403ba0c4b88cedccf06a7a3d24f29c4398545f96efae7a45c95'},
{'PyTorch-2.0.1_avoid-test_quantization-failures.patch':
'02e3f47e4ed1d7d6077e26f1ae50073dc2b20426269930b505f4aefe5d2f33cd'},
{'PyTorch-2.0.1_disable-gcc12-warning.patch': 'f558dfc8f7cdcdc74c4c58ef7e8fe6d67870aec6386ac0d923f1b745d108eec7'},
{'PyTorch-2.0.1_disable-test-sharding.patch': 'a1ed7f21c9a269ea039a07a3d6574f885787b30ca5687143c96e096d31066cca'},
{'PyTorch-2.0.1_fix-numpy-compat.patch': 'f3e5798193e0909a415d824f13772973200965db84476c1737824f2735f2db94'},
{'PyTorch-2.0.1_fix-shift-ops.patch': '5ee655d5dba56d801d5618543b6ca299fa874939a3471f7b5449bfcb7f3f18c7'},
{'PyTorch-2.0.1_fix-skip-decorators.patch': '2039012cef45446065e1a2097839fe20bb29fe3c1dcc926c3695ebf29832e920'},
{'PyTorch-2.0.1_fix-test_memory_profiler.patch':
'fd03117c46f59c1c62227d31c410c4cdd98fd35410976758cb9e7ec947582ddb'},
{'PyTorch-2.0.1_fix-test-ops-conf.patch': '0f995e4f89baf3cbeb8666cbfe694666a2ef2bc53d97d6301f768b3ff9001fa4'},
{'PyTorch-2.0.1_fix-torch.compile-on-ppc.patch':
'20f9172ae696da0c5c7b3bae6f0bf1221192cb1cbac3a44526a415087834bee7'},
{'PyTorch-2.0.1_fix-ub-in-inductor-codegen.patch':
'1b37194f55ae678f3657b8728dfb896c18ffe8babe90987ce468c4fa9274f357'},
{'PyTorch-2.0.1_fix-vsx-loadu.patch': 'a0ffa61da2d47c6acd09aaf6d4791e527d8919a6f4f1aa7ed38454cdcadb1f72'},
{'PyTorch-2.0.1_ignore_unexpected_success_in_test_torchinductor_opinfo.patch':
'57e2985a5b7085c2786e4b0c4a5f0c81f6b2ae9d5804bbd552b06e8b1570f4c4'},
{'PyTorch-2.0.1_no-cuda-stubs-rpath.patch': '8902e58a762240f24cdbf0182e99ccdfc2a93492869352fcb4ca0ec7e407f83a'},
{'PyTorch-2.0.1_remove-test-requiring-online-access.patch':
'721ab0d35ed0ff8a46cb84ced5a98c0fb8ce6143cf6cea80b1360d3d7f64f584'},
{'PyTorch-2.0.1_skip-diff-test-on-ppc.patch': 'f6e39cd774e5663df25507a73d37ad598157c2eadb2f47ca20a537dbe4b3e14f'},
{'PyTorch-2.0.1_skip-failing-gradtest.patch': '8030bdec6ba49b057ab232d19a7f1a5e542e47e2ec340653a246ec9ed59f8bc1'},
{'PyTorch-2.0.1_skip-test_baddbmm_cpu_bfloat16.patch':
'199005bbbb913837e557358dee31535d8e3f63af9ac7cdcece624ab8e572e28a'},
{'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch':
'7047862abc1abaff62954da59700f36d4f39fcf83167a638183b1b7f8fec78ae'},
{'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch':
'166c134573a95230e39b9ea09ece3ad8072f39d370c9a88fb2a1e24f6aaac2b5'},
]

osdependencies = [OS_PKG_IBVERBS_DEV]

builddependencies = [
('CMake', '3.24.3'),
('hypothesis', '6.68.2'),
# For tests
('pytest-rerunfailures', '12.0'),
('pytest-shard', '0.1.2'),
]

dependencies = [
('Ninja', '1.11.1'), # Required for JIT compilation of C++ extensions
('Python', '3.10.8'),
('protobuf', '23.0'),
('protobuf-python', '4.23.0'),
('pybind11', '2.10.3'),
('SciPy-bundle', '2023.02'),
('PyYAML', '6.0'),
('MPFR', '4.2.0'),
('GMP', '6.2.1'),
('numactl', '2.0.16'),
('FFmpeg', '5.1.2'),
('Pillow', '9.4.0'),
('expecttest', '0.1.3'),
('networkx', '3.0'),
('sympy', '1.12'),
]

excluded_tests = {
'': [
# This test seems to take too long on NVIDIA Ampere at least.
'distributed/test_distributed_spawn',
# Broken on CUDA 11.6/11.7: https://github.com/pytorch/pytorch/issues/75375
'distributions/test_constraints',
# no xdoctest
'doctests',
# failing on broadwell
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
'test_native_mha',
# intermittent failures on various systems
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712
'distributed/rpc/test_tensorpipe_agent',
]
}

runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error --verbose %(excluded_tests)s'

# Especially test_quantization has a few corner cases that are triggered by the random input values,
# those cannot be easily avoided, see https://github.com/pytorch/pytorch/issues/107030
# So allow a low number of tests to fail as the tests "usually" succeed
max_failed_tests = 3

tests = ['PyTorch-check-cpp-extension.py']

moduleclass = 'ai'
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
GCC 12 has a false positive warning when compiled for some architectures, e.g. Intel Sapphire Rapids.
See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112370

Suppress this warning such that the build doesn't error.

Author: Alexander Grund (TU Dresden)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 471fc8a8d3d..5eb7b432630 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -557,6 +557,7 @@ string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all")
if(NOT MSVC)
string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -g -lineinfo --source-in-ptx")
string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -g -lineinfo --source-in-ptx")
+ append_cxx_flag_if_supported("-Wno-free-nonheap-object" CMAKE_CXX_FLAGS)
endif(NOT MSVC)

# Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not
diff --git a/cmake/public/utils.cmake b/cmake/public/utils.cmake
index 60cca5383dd..76c02d7479f 100644
--- a/cmake/public/utils.cmake
+++ b/cmake/public/utils.cmake
@@ -548,6 +548,8 @@ function(torch_update_find_cuda_flags)
endif()
endfunction()

+include(CheckCXXCompilerFlag)
+
##############################################################################
# CHeck if given flag is supported and append it to provided outputvar
# Also define HAS_UPPER_CASE_FLAG_NAME variable
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
test_baddbmm_cpu_bfloat16 in test_linalg.py fails with
> AssertionError: Tensor-likes are not close!
>
> Mismatched elements: 1387 / 6000 (23.1%)
> Greatest absolute difference: 3.98046875 at index (0, 11, 7) (up to 0.5 allowed)
> Greatest relative difference: 1324.7142857142858 at index (0, 4, 9) (up to 0.016 allowed)

Happens also with the official 2.0.1 PIP package, and seems to be known to be flaky: https://github.com/pytorch/pytorch/issues/103046
So assume this to be expected and skip the test.

Author: Alexander Grund (TU Dresden)

diff --git a/test/test_linalg.py b/test/test_linalg.py
index 29a0e482d86..d195ad60add 100644
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@@ -5871,7 +5871,7 @@ scipy_lobpcg | {:10.2e} | {:10.2e} | {:6} | N/A

@precisionOverride({torch.half: 0.1, torch.bfloat16: 0.5})
@onlyNativeDeviceTypes
- @dtypes(*floating_and_complex_types_and(torch.bfloat16))
+ @dtypes(*floating_and_complex_types())
@tf32_on_and_off(0.05)
def test_baddbmm(self, device, dtype):
if self.device_type == 'cuda' and dtype is torch.bfloat16 and not SM53OrLater:

0 comments on commit 73643ce

Please sign in to comment.