forked from easybuilders/easybuild-easyconfigs
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request easybuilders#19067 from Flamefire/20231024101128_n…
…ew_pr_PyTorch201 {ai}[foss/2022b] PyTorch v2.0.1
- Loading branch information
Showing
3 changed files
with
209 additions
and
0 deletions.
There are no files selected for viewing
152 changes: 152 additions & 0 deletions
152
easybuild/easyconfigs/p/PyTorch/PyTorch-2.0.1-foss-2022b.eb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
name = 'PyTorch' | ||
version = '2.0.1' | ||
|
||
homepage = 'https://pytorch.org/' | ||
description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration. | ||
PyTorch is a deep learning framework that puts Python first.""" | ||
|
||
toolchain = {'name': 'foss', 'version': '2022b'} | ||
|
||
source_urls = [GITHUB_RELEASE] | ||
sources = ['%(namelower)s-v%(version)s.tar.gz'] | ||
patches = [ | ||
'PyTorch-1.7.0_disable-dev-shm-test.patch', | ||
'PyTorch-1.11.1_skip-test_init_from_local_shards.patch', | ||
'PyTorch-1.12.1_add-hypothesis-suppression.patch', | ||
'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch', | ||
'PyTorch-1.12.1_fix-TestTorch.test_to.patch', | ||
'PyTorch-1.12.1_skip-test_round_robin.patch', | ||
'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch', | ||
'PyTorch-1.13.1_fix-protobuf-dependency.patch', | ||
'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch', | ||
'PyTorch-1.13.1_skip-failing-singular-grad-test.patch', | ||
'PyTorch-1.13.1_skip-tests-without-fbgemm.patch', | ||
'PyTorch-2.0.1_add-missing-vsx-vector-shift-functions.patch', | ||
'PyTorch-2.0.1_avoid-test_quantization-failures.patch', | ||
'PyTorch-2.0.1_disable-gcc12-warning.patch', | ||
'PyTorch-2.0.1_disable-test-sharding.patch', | ||
'PyTorch-2.0.1_fix-numpy-compat.patch', | ||
'PyTorch-2.0.1_fix-shift-ops.patch', | ||
'PyTorch-2.0.1_fix-skip-decorators.patch', | ||
'PyTorch-2.0.1_fix-test_memory_profiler.patch', | ||
'PyTorch-2.0.1_fix-test-ops-conf.patch', | ||
'PyTorch-2.0.1_fix-torch.compile-on-ppc.patch', | ||
'PyTorch-2.0.1_fix-ub-in-inductor-codegen.patch', | ||
'PyTorch-2.0.1_fix-vsx-loadu.patch', | ||
'PyTorch-2.0.1_ignore_unexpected_success_in_test_torchinductor_opinfo.patch', | ||
'PyTorch-2.0.1_no-cuda-stubs-rpath.patch', | ||
'PyTorch-2.0.1_remove-test-requiring-online-access.patch', | ||
'PyTorch-2.0.1_skip-diff-test-on-ppc.patch', | ||
'PyTorch-2.0.1_skip-failing-gradtest.patch', | ||
'PyTorch-2.0.1_skip-test_baddbmm_cpu_bfloat16.patch', | ||
'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch', | ||
'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch', | ||
] | ||
checksums = [ | ||
{'pytorch-v2.0.1.tar.gz': '9c564ca440265c69400ef5fdd48bf15e28af5aa4bed84c95efaad960a6699998'}, | ||
{'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'}, | ||
{'PyTorch-1.11.1_skip-test_init_from_local_shards.patch': | ||
'4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7'}, | ||
{'PyTorch-1.12.1_add-hypothesis-suppression.patch': | ||
'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'}, | ||
{'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch': | ||
'1efc9850c431d702e9117d4766277d3f88c5c8b3870997c9974971bce7f2ab83'}, | ||
{'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'}, | ||
{'PyTorch-1.12.1_skip-test_round_robin.patch': '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349'}, | ||
{'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch': | ||
'5c7be91a6096083a0b1315efe0001537499c600f1f569953c6a2c7f4cc1d0910'}, | ||
{'PyTorch-1.13.1_fix-protobuf-dependency.patch': | ||
'8bd755a0cab7233a243bc65ca57c9630dfccdc9bf8c9792f0de4e07a644fcb00'}, | ||
{'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch': | ||
'bdde0f2105215c95a54de64ec4b1a4520528510663174fef6d5b900eb1db3937'}, | ||
{'PyTorch-1.13.1_skip-failing-singular-grad-test.patch': | ||
'72688a57b2bb617665ad1a1d5e362c5111ae912c10936bb38a089c0204729f48'}, | ||
{'PyTorch-1.13.1_skip-tests-without-fbgemm.patch': | ||
'481e595f673baf8ae58b41697a6792b83048b0264aa79b422f48cd8c22948bb7'}, | ||
{'PyTorch-2.0.1_add-missing-vsx-vector-shift-functions.patch': | ||
'da44961d6c204403ba0c4b88cedccf06a7a3d24f29c4398545f96efae7a45c95'}, | ||
{'PyTorch-2.0.1_avoid-test_quantization-failures.patch': | ||
'02e3f47e4ed1d7d6077e26f1ae50073dc2b20426269930b505f4aefe5d2f33cd'}, | ||
{'PyTorch-2.0.1_disable-gcc12-warning.patch': 'f558dfc8f7cdcdc74c4c58ef7e8fe6d67870aec6386ac0d923f1b745d108eec7'}, | ||
{'PyTorch-2.0.1_disable-test-sharding.patch': 'a1ed7f21c9a269ea039a07a3d6574f885787b30ca5687143c96e096d31066cca'}, | ||
{'PyTorch-2.0.1_fix-numpy-compat.patch': 'f3e5798193e0909a415d824f13772973200965db84476c1737824f2735f2db94'}, | ||
{'PyTorch-2.0.1_fix-shift-ops.patch': '5ee655d5dba56d801d5618543b6ca299fa874939a3471f7b5449bfcb7f3f18c7'}, | ||
{'PyTorch-2.0.1_fix-skip-decorators.patch': '2039012cef45446065e1a2097839fe20bb29fe3c1dcc926c3695ebf29832e920'}, | ||
{'PyTorch-2.0.1_fix-test_memory_profiler.patch': | ||
'fd03117c46f59c1c62227d31c410c4cdd98fd35410976758cb9e7ec947582ddb'}, | ||
{'PyTorch-2.0.1_fix-test-ops-conf.patch': '0f995e4f89baf3cbeb8666cbfe694666a2ef2bc53d97d6301f768b3ff9001fa4'}, | ||
{'PyTorch-2.0.1_fix-torch.compile-on-ppc.patch': | ||
'20f9172ae696da0c5c7b3bae6f0bf1221192cb1cbac3a44526a415087834bee7'}, | ||
{'PyTorch-2.0.1_fix-ub-in-inductor-codegen.patch': | ||
'1b37194f55ae678f3657b8728dfb896c18ffe8babe90987ce468c4fa9274f357'}, | ||
{'PyTorch-2.0.1_fix-vsx-loadu.patch': 'a0ffa61da2d47c6acd09aaf6d4791e527d8919a6f4f1aa7ed38454cdcadb1f72'}, | ||
{'PyTorch-2.0.1_ignore_unexpected_success_in_test_torchinductor_opinfo.patch': | ||
'57e2985a5b7085c2786e4b0c4a5f0c81f6b2ae9d5804bbd552b06e8b1570f4c4'}, | ||
{'PyTorch-2.0.1_no-cuda-stubs-rpath.patch': '8902e58a762240f24cdbf0182e99ccdfc2a93492869352fcb4ca0ec7e407f83a'}, | ||
{'PyTorch-2.0.1_remove-test-requiring-online-access.patch': | ||
'721ab0d35ed0ff8a46cb84ced5a98c0fb8ce6143cf6cea80b1360d3d7f64f584'}, | ||
{'PyTorch-2.0.1_skip-diff-test-on-ppc.patch': 'f6e39cd774e5663df25507a73d37ad598157c2eadb2f47ca20a537dbe4b3e14f'}, | ||
{'PyTorch-2.0.1_skip-failing-gradtest.patch': '8030bdec6ba49b057ab232d19a7f1a5e542e47e2ec340653a246ec9ed59f8bc1'}, | ||
{'PyTorch-2.0.1_skip-test_baddbmm_cpu_bfloat16.patch': | ||
'199005bbbb913837e557358dee31535d8e3f63af9ac7cdcece624ab8e572e28a'}, | ||
{'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch': | ||
'7047862abc1abaff62954da59700f36d4f39fcf83167a638183b1b7f8fec78ae'}, | ||
{'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch': | ||
'166c134573a95230e39b9ea09ece3ad8072f39d370c9a88fb2a1e24f6aaac2b5'}, | ||
] | ||
|
||
osdependencies = [OS_PKG_IBVERBS_DEV] | ||
|
||
builddependencies = [ | ||
('CMake', '3.24.3'), | ||
('hypothesis', '6.68.2'), | ||
# For tests | ||
('pytest-rerunfailures', '12.0'), | ||
('pytest-shard', '0.1.2'), | ||
] | ||
|
||
dependencies = [ | ||
('Ninja', '1.11.1'), # Required for JIT compilation of C++ extensions | ||
('Python', '3.10.8'), | ||
('protobuf', '23.0'), | ||
('protobuf-python', '4.23.0'), | ||
('pybind11', '2.10.3'), | ||
('SciPy-bundle', '2023.02'), | ||
('PyYAML', '6.0'), | ||
('MPFR', '4.2.0'), | ||
('GMP', '6.2.1'), | ||
('numactl', '2.0.16'), | ||
('FFmpeg', '5.1.2'), | ||
('Pillow', '9.4.0'), | ||
('expecttest', '0.1.3'), | ||
('networkx', '3.0'), | ||
('sympy', '1.12'), | ||
] | ||
|
||
excluded_tests = { | ||
'': [ | ||
# This test seems to take too long on NVIDIA Ampere at least. | ||
'distributed/test_distributed_spawn', | ||
# Broken on CUDA 11.6/11.7: https://github.com/pytorch/pytorch/issues/75375 | ||
'distributions/test_constraints', | ||
# no xdoctest | ||
'doctests', | ||
# failing on broadwell | ||
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712 | ||
'test_native_mha', | ||
# intermittent failures on various systems | ||
# See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712 | ||
'distributed/rpc/test_tensorpipe_agent', | ||
] | ||
} | ||
|
||
runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error --verbose %(excluded_tests)s' | ||
|
||
# Especially test_quantization has a few corner cases that are triggered by the random input values, | ||
# those cannot be easily avoided, see https://github.com/pytorch/pytorch/issues/107030 | ||
# So allow a low number of tests to fail as the tests "usually" succeed | ||
max_failed_tests = 3 | ||
|
||
tests = ['PyTorch-check-cpp-extension.py'] | ||
|
||
moduleclass = 'ai' |
32 changes: 32 additions & 0 deletions
32
easybuild/easyconfigs/p/PyTorch/PyTorch-2.0.1_disable-gcc12-warning.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
GCC 12 has a false positive warning when compiled for some architectures, e.g. Intel Sapphire Rapids. | ||
See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=112370 | ||
|
||
Suppress this warning such that the build doesn't error. | ||
|
||
Author: Alexander Grund (TU Dresden) | ||
|
||
diff --git a/CMakeLists.txt b/CMakeLists.txt | ||
index 471fc8a8d3d..5eb7b432630 100644 | ||
--- a/CMakeLists.txt | ||
+++ b/CMakeLists.txt | ||
@@ -557,6 +557,7 @@ string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all") | ||
if(NOT MSVC) | ||
string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -g -lineinfo --source-in-ptx") | ||
string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -g -lineinfo --source-in-ptx") | ||
+ append_cxx_flag_if_supported("-Wno-free-nonheap-object" CMAKE_CXX_FLAGS) | ||
endif(NOT MSVC) | ||
|
||
# Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not | ||
diff --git a/cmake/public/utils.cmake b/cmake/public/utils.cmake | ||
index 60cca5383dd..76c02d7479f 100644 | ||
--- a/cmake/public/utils.cmake | ||
+++ b/cmake/public/utils.cmake | ||
@@ -548,6 +548,8 @@ function(torch_update_find_cuda_flags) | ||
endif() | ||
endfunction() | ||
|
||
+include(CheckCXXCompilerFlag) | ||
+ | ||
############################################################################## | ||
# CHeck if given flag is supported and append it to provided outputvar | ||
# Also define HAS_UPPER_CASE_FLAG_NAME variable |
25 changes: 25 additions & 0 deletions
25
easybuild/easyconfigs/p/PyTorch/PyTorch-2.0.1_skip-test_baddbmm_cpu_bfloat16.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
test_baddbmm_cpu_bfloat16 in test_linalg.py fails with | ||
> AssertionError: Tensor-likes are not close! | ||
> | ||
> Mismatched elements: 1387 / 6000 (23.1%) | ||
> Greatest absolute difference: 3.98046875 at index (0, 11, 7) (up to 0.5 allowed) | ||
> Greatest relative difference: 1324.7142857142858 at index (0, 4, 9) (up to 0.016 allowed) | ||
|
||
Happens also with the official 2.0.1 PIP package, and seems to be known to be flaky: https://github.com/pytorch/pytorch/issues/103046 | ||
So assume this to be expected and skip the test. | ||
|
||
Author: Alexander Grund (TU Dresden) | ||
|
||
diff --git a/test/test_linalg.py b/test/test_linalg.py | ||
index 29a0e482d86..d195ad60add 100644 | ||
--- a/test/test_linalg.py | ||
+++ b/test/test_linalg.py | ||
@@ -5871,7 +5871,7 @@ scipy_lobpcg | {:10.2e} | {:10.2e} | {:6} | N/A | ||
|
||
@precisionOverride({torch.half: 0.1, torch.bfloat16: 0.5}) | ||
@onlyNativeDeviceTypes | ||
- @dtypes(*floating_and_complex_types_and(torch.bfloat16)) | ||
+ @dtypes(*floating_and_complex_types()) | ||
@tf32_on_and_off(0.05) | ||
def test_baddbmm(self, device, dtype): | ||
if self.device_type == 'cuda' and dtype is torch.bfloat16 and not SM53OrLater: |