diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.2.1-foss-2023b.eb b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.2.1-foss-2023b.eb new file mode 100644 index 00000000000..e1174850df2 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.2.1-foss-2023b.eb @@ -0,0 +1,152 @@ +name = 'PyTorch' +version = '2.2.1' + +homepage = 'https://pytorch.org/' +description = """Tensors and Dynamic neural networks in Python with strong GPU acceleration. +PyTorch is a deep learning framework that puts Python first.""" + +toolchain = {'name': 'foss', 'version': '2023b'} + +source_urls = [GITHUB_RELEASE] +sources = ['%(namelower)s-v%(version)s.tar.gz'] +patches = [ + 'PyTorch-1.7.0_disable-dev-shm-test.patch', + 'PyTorch-1.11.1_skip-test_init_from_local_shards.patch', + 'PyTorch-1.12.1_add-hypothesis-suppression.patch', + 'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch', + 'PyTorch-1.12.1_fix-TestTorch.test_to.patch', + 'PyTorch-1.12.1_skip-test_round_robin.patch', + 'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch', + 'PyTorch-1.13.1_fix-protobuf-dependency.patch', + 'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch', + 'PyTorch-1.13.1_skip-failing-singular-grad-test.patch', + 'PyTorch-1.13.1_skip-tests-without-fbgemm.patch', + 'PyTorch-2.0.1_avoid-test_quantization-failures.patch', + 'PyTorch-2.0.1_fix-skip-decorators.patch', + 'PyTorch-2.0.1_fix-vsx-loadu.patch', + 'PyTorch-2.0.1_skip-failing-gradtest.patch', + 'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch', + 'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch', + 'PyTorch-2.1.0_disable-gcc12-warning.patch', + 'PyTorch-2.1.0_fix-vsx-vector-shift-functions.patch', + 'PyTorch-2.1.0_increase-tolerance-functorch-test_vmapvjpvjp.patch', + 'PyTorch-2.1.0_remove-test-requiring-online-access.patch', + 'PyTorch-2.1.0_skip-diff-test-on-ppc.patch', + 'PyTorch-2.1.0_skip-test_jvp_linalg_det_singular.patch', + 'PyTorch-2.1.2_fix-vsx-vector-abs.patch', + 'PyTorch-2.1.2_fix-vsx-vector-div.patch', + 'PyTorch-2.1.2_skip-cpu_repro-test-without-vectorization.patch', + 'PyTorch-2.2.1_no-cuda-stubs-rpath.patch', + 'PyTorch-2.2.1_fix-test_extension_backend-without-vectorization.patch', +] +checksums = [ + {'pytorch-v2.2.1.tar.gz': '8069467387b8ab7a7279671b9144d80a5c5342b4fa022eb3c1db629a6fd806c9'}, + {'PyTorch-1.7.0_disable-dev-shm-test.patch': '622cb1eaeadc06e13128a862d9946bcc1f1edd3d02b259c56a9aecc4d5406b8a'}, + {'PyTorch-1.11.1_skip-test_init_from_local_shards.patch': + '4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7'}, + {'PyTorch-1.12.1_add-hypothesis-suppression.patch': + 'e71ffb94ebe69f580fa70e0de84017058325fdff944866d6bd03463626edc32c'}, + {'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch': + '1efc9850c431d702e9117d4766277d3f88c5c8b3870997c9974971bce7f2ab83'}, + {'PyTorch-1.12.1_fix-TestTorch.test_to.patch': '75f27987c3f25c501e719bd2b1c70a029ae0ee28514a97fe447516aee02b1535'}, + {'PyTorch-1.12.1_skip-test_round_robin.patch': '63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349'}, + {'PyTorch-1.13.1_fix-gcc-12-warning-in-fbgemm.patch': + '5c7be91a6096083a0b1315efe0001537499c600f1f569953c6a2c7f4cc1d0910'}, + {'PyTorch-1.13.1_fix-protobuf-dependency.patch': + '8bd755a0cab7233a243bc65ca57c9630dfccdc9bf8c9792f0de4e07a644fcb00'}, + {'PyTorch-1.13.1_fix-warning-in-test-cpp-api.patch': + 'bdde0f2105215c95a54de64ec4b1a4520528510663174fef6d5b900eb1db3937'}, + {'PyTorch-1.13.1_skip-failing-singular-grad-test.patch': + '72688a57b2bb617665ad1a1d5e362c5111ae912c10936bb38a089c0204729f48'}, + {'PyTorch-1.13.1_skip-tests-without-fbgemm.patch': + '481e595f673baf8ae58b41697a6792b83048b0264aa79b422f48cd8c22948bb7'}, + {'PyTorch-2.0.1_avoid-test_quantization-failures.patch': + '02e3f47e4ed1d7d6077e26f1ae50073dc2b20426269930b505f4aefe5d2f33cd'}, + {'PyTorch-2.0.1_fix-skip-decorators.patch': '2039012cef45446065e1a2097839fe20bb29fe3c1dcc926c3695ebf29832e920'}, + {'PyTorch-2.0.1_fix-vsx-loadu.patch': 'a0ffa61da2d47c6acd09aaf6d4791e527d8919a6f4f1aa7ed38454cdcadb1f72'}, + {'PyTorch-2.0.1_skip-failing-gradtest.patch': '8030bdec6ba49b057ab232d19a7f1a5e542e47e2ec340653a246ec9ed59f8bc1'}, + {'PyTorch-2.0.1_skip-test_shuffle_reproducibility.patch': + '7047862abc1abaff62954da59700f36d4f39fcf83167a638183b1b7f8fec78ae'}, + {'PyTorch-2.0.1_skip-tests-skipped-in-subprocess.patch': + '166c134573a95230e39b9ea09ece3ad8072f39d370c9a88fb2a1e24f6aaac2b5'}, + {'PyTorch-2.1.0_disable-gcc12-warning.patch': 'c858b8db0010f41005dc06f9a50768d0d3dc2d2d499ccbdd5faf8a518869a421'}, + {'PyTorch-2.1.0_fix-vsx-vector-shift-functions.patch': + '3793b4b878be1abe7791efcbd534774b87862cfe7dc4774ca8729b6cabb39e7e'}, + {'PyTorch-2.1.0_increase-tolerance-functorch-test_vmapvjpvjp.patch': + 'aef38adf1210d0c5455e91d7c7a9d9e5caad3ae568301e0ba9fc204309438e7b'}, + {'PyTorch-2.1.0_remove-test-requiring-online-access.patch': + '35184b8c5a1b10f79e511cc25db3b8a5585a5d58b5d1aa25dd3d250200b14fd7'}, + {'PyTorch-2.1.0_skip-diff-test-on-ppc.patch': '394157dbe565ffcbc1821cd63d05930957412156cc01e949ef3d3524176a1dda'}, + {'PyTorch-2.1.0_skip-test_jvp_linalg_det_singular.patch': + '5229ca88a71db7667a90ddc0b809b2c817698bd6e9c5aaabd73d3173cf9b99fe'}, + {'PyTorch-2.1.2_fix-vsx-vector-abs.patch': 'd67d32407faed7dc1dbab4bba0e2f7de36c3db04560ced35c94caf8d84ade886'}, + {'PyTorch-2.1.2_fix-vsx-vector-div.patch': '11f497a6892eb49b249a15320e4218e0d7ac8ae4ce67de39e4a018a064ca1acc'}, + {'PyTorch-2.1.2_skip-cpu_repro-test-without-vectorization.patch': + '7ace835af60c58d9e0754a34c19d4b9a0c3a531f19e5d0eba8e2e49206eaa7eb'}, + {'PyTorch-2.2.1_no-cuda-stubs-rpath.patch': '713f98b45f33be955ff581fc14d16cd843d8b48190d3fdffa02afcdfd3583100'}, + {'PyTorch-2.2.1_fix-test_extension_backend-without-vectorization.patch': + '8d8c72d68c8391ddec5133fcabbb8653fef890acb9eece8ff1ddc43f128f2450'}, +] + +osdependencies = [OS_PKG_IBVERBS_DEV] + +builddependencies = [ + ('CMake', '3.27.6'), + ('hypothesis', '6.90.0'), + # For tests + ('pytest-flakefinder', '1.1.0'), + ('pytest-rerunfailures', '14.0'), + ('pytest-shard', '0.1.2'), + ('unittest-xml-reporting', '3.1.0'), +] + +dependencies = [ + ('Ninja', '1.11.1'), # Required for JIT compilation of C++ extensions + ('Python', '3.11.5'), + ('Python-bundle-PyPI', '2023.10'), + ('protobuf', '25.3'), + ('protobuf-python', '4.25.3'), + ('pybind11', '2.11.1'), + ('SciPy-bundle', '2023.11'), + ('PyYAML', '6.0.1'), + ('MPFR', '4.2.1'), + ('GMP', '6.3.0'), + ('numactl', '2.0.16'), + ('FFmpeg', '6.0'), + ('Pillow', '10.2.0'), + ('expecttest', '0.2.1'), + ('networkx', '3.2.1'), + ('sympy', '1.12'), + ('Z3', '4.13.0',), +] + +use_pip = True +buildcmd = '%(python)s setup.py build' # Run the (long) build in the build step + +excluded_tests = { + '': [ + # This test seems to take too long on NVIDIA Ampere at least. + 'distributed/test_distributed_spawn', + # Broken on CUDA 11.6/11.7: https://github.com/pytorch/pytorch/issues/75375 + 'distributions/test_constraints', + # no xdoctest + 'doctests', + # failing on broadwell + # See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712 + 'test_native_mha', + # intermittent failures on various systems + # See https://github.com/easybuilders/easybuild-easyconfigs/issues/17712 + 'distributed/rpc/test_tensorpipe_agent', + ] +} + +runtest = 'cd test && PYTHONUNBUFFERED=1 %(python)s run_test.py --continue-through-error --verbose %(excluded_tests)s' + +# Especially test_quantization has a few corner cases that are triggered by the random input values, +# those cannot be easily avoided, see https://github.com/pytorch/pytorch/issues/107030 +# So allow a low number of tests to fail as the tests "usually" succeed +max_failed_tests = 2 + +tests = ['PyTorch-check-cpp-extension.py'] + +moduleclass = 'ai' diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.2.1_fix-test_extension_backend-without-vectorization.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.2.1_fix-test_extension_backend-without-vectorization.patch new file mode 100644 index 00000000000..3dc4bc2ebdc --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.2.1_fix-test_extension_backend-without-vectorization.patch @@ -0,0 +1,39 @@ +The test checks for a substring "loadu" in generated code. +On AVX systems that line is: +> auto tmp0 = at::vec::Vectorized::loadu(in_ptr0 + static_cast(i0)) +however on non-AVX systems it is +> auto tmp0 = in_ptr0[static_cast(i0)]; + +the difference depends on `codecache.valid_vec_isa_list()` being non-empty. +See torch/_inductor/codegen/cpp.py:2639 + +Modify the test to account for that. + +Author: Alexander Grund (TU Dresden) + +diff --git a/test/inductor/test_extension_backend.py b/test/inductor/test_extension_backend.py +index 7d6f35d7b74..decc61d62d7 100644 +--- a/test/inductor/test_extension_backend.py ++++ b/test/inductor/test_extension_backend.py +@@ -20,7 +20,7 @@ except ImportError: + ) + + from torch._C import FileCheck +-from torch._inductor import metrics ++from torch._inductor import codecache, metrics + from torch._inductor.codegen.common import ( + get_scheduling_for_device, + get_wrapper_codegen_for_device, +@@ -130,7 +130,11 @@ class ExtensionBackendTests(TestCase): + metrics.reset() + opt_fn = torch.compile()(fn) + _, code = run_and_get_cpp_code(opt_fn, x, y, z) +- FileCheck().check("void kernel").check("loadu").check("extension_device").run( ++ if codecache.valid_vec_isa_list(): ++ load_expr = 'loadu' ++ else: ++ load_expr = ' = in_ptr0[static_cast(i0)];' ++ FileCheck().check("void kernel").check(load_expr).check("extension_device").run( + code + ) + opt_fn(x, y, z) diff --git a/easybuild/easyconfigs/p/PyTorch/PyTorch-2.2.1_no-cuda-stubs-rpath.patch b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.2.1_no-cuda-stubs-rpath.patch new file mode 100644 index 00000000000..a1267eb1c77 --- /dev/null +++ b/easybuild/easyconfigs/p/PyTorch/PyTorch-2.2.1_no-cuda-stubs-rpath.patch @@ -0,0 +1,145 @@ +# PyTorch's CMAKE configuration by default sets RUNPATH on libraries if they link other libraries +# that are outside the build tree, which is done because of the CMAKE config on +# https://github.com/pytorch/pytorch/blob/v1.10.0/cmake/Dependencies.cmake#L10. +# This provides problems, since the cuda stubs library path then also gets added to the RUNPATH. +# As a result, at runtime, the stub version of things like libcuda.so.1 gets picked up, instead of the real drivers +# See https://github.com/easybuilders/easybuild-easyconfigs/issues/14359 +# This line https://github.com/pytorch/pytorch/blob/v1.10.0/cmake/Dependencies.cmake#L16 +# Makes sure that any path that is linked, is also added to the RUNPATH. +# This has been reported upstream in https://github.com/pytorch/pytorch/issues/35418 +# and a fix was attempted in https://github.com/pytorch/pytorch/pull/37737 but it was reverted +# +# This EasyBuild patch changes behavior for the libraries that were failing, i.e. the ones in this list: +# https://github.com/easybuilders/easybuild-easyconfigs/issues/14359#issuecomment-970479904 +# This is done by setting INSTALL_RPATH_USE_LINK_PATH to false, and instead, specifying the RPATH +# explicitely by defining INSTALL_RPATH, but only adding directories that do not match to the "stubs" regex +# +# Original patch: Caspar van Leeuwen +# Updated: Alexander Grund (TU Dresden) +# +# See https://github.com/pytorch/pytorch/pull/87593 +diff --git a/binaries/CMakeLists.txt b/binaries/CMakeLists.txt +index 15f47bf52ae..edf1ab26149 100644 +--- a/binaries/CMakeLists.txt ++++ b/binaries/CMakeLists.txt +@@ -56,7 +56,8 @@ endif() + + if(USE_CUDA) + caffe2_binary_target("inspect_gpu.cc") +- target_link_libraries(inspect_gpu ${CUDA_LIBRARIES}) ++ include(${Torch_SOURCE_DIR}/cmake/LinkCudaLibraries.cmake) ++ link_cuda_libraries(inspect_gpu ${CUDA_LIBRARIES}) + caffe2_binary_target("print_core_object_sizes_gpu.cc") + + if(BUILD_TEST) +diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt +index 748363725bc..a0b75597b34 100644 +--- a/caffe2/CMakeLists.txt ++++ b/caffe2/CMakeLists.txt +@@ -624,14 +624,13 @@ endif() + if(USE_CUDA) + list(APPEND Caffe2_GPU_CU_SRCS ${Caffe2_GPU_HIP_JIT_FUSERS_SRCS}) + add_library(caffe2_nvrtc SHARED ${ATen_NVRTC_STUB_SRCS}) ++ include(${Torch_SOURCE_DIR}/cmake/LinkCudaLibraries.cmake) ++ link_cuda_libraries(caffe2_nvrtc ${CUDA_CUDA_LIB} ${CUDA_NVRTC_LIB}) + if(MSVC) + # Delay load nvcuda.dll so we can import torch compiled with cuda on a CPU-only machine +- set(DELAY_LOAD_FLAGS "-DELAYLOAD:nvcuda.dll;delayimp.lib") +- else() +- set(DELAY_LOAD_FLAGS "") ++ target_link_libraries(caffe2_nvrtc "-DELAYLOAD:nvcuda.dll;delayimp.lib") + endif() + +- target_link_libraries(caffe2_nvrtc ${CUDA_CUDA_LIB} ${CUDA_NVRTC_LIB} ${DELAY_LOAD_FLAGS}) + install(TARGETS caffe2_nvrtc DESTINATION "${TORCH_INSTALL_LIB_DIR}") + if(USE_NCCL) + list(APPEND Caffe2_GPU_SRCS +@@ -1541,6 +1540,7 @@ endif() + + # ---[ CUDA library. + if(USE_CUDA) ++ include(${Torch_SOURCE_DIR}/cmake/LinkCudaLibraries.cmake) + # FIXME: If kineto is linked with CUPTI it pollutes torch_cpu with CUDA dependencies + # Even worse, it never declares that it depends on cudart, but calls the API, see + # https://github.com/pytorch/kineto/blob/aef2f5c0f15e3be52406ac0b885e8689de6bc9f6/libkineto/src/CudaDeviceProperties.cpp#L24 +@@ -1554,13 +1554,13 @@ if(USE_CUDA) + torch_cuda INTERFACE $) + target_include_directories( + torch_cuda PRIVATE ${Caffe2_GPU_INCLUDE}) +- target_link_libraries( ++ link_cuda_libraries( + torch_cuda PRIVATE ${Caffe2_CUDA_DEPENDENCY_LIBS}) + + # These public dependencies must go after the previous dependencies, as the + # order of the libraries in the linker call matters here when statically + # linking; libculibos and cublas must be last. +- target_link_libraries(torch_cuda PUBLIC torch_cpu_library ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS}) ++ link_cuda_libraries(torch_cuda PUBLIC torch_cpu_library ${Caffe2_PUBLIC_CUDA_DEPENDENCY_LIBS}) + endif() + + # ---[ Metal(OSX) modification +diff --git a/cmake/LinkCudaLibraries.cmake b/cmake/LinkCudaLibraries.cmake +new file mode 100644 +index 00000000000..e09d1186f6d +--- /dev/null ++++ b/cmake/LinkCudaLibraries.cmake +@@ -0,0 +1,33 @@ ++# Link CUDA libraries to the given target, i.e.: `target_link_libraries(target )` ++# ++# Additionally makes sure CUDA stub libs don't end up being in RPath ++# ++# Example: link_cuda_libraries(mytarget PRIVATE ${CUDA_LIBRARIES}) ++function(link_cuda_libraries target) ++ set(libs ${ARGN}) ++ set(install_rpath "$ORIGIN") ++ set(filtered FALSE) ++ foreach(lib IN LISTS libs) ++ # CUDA stub libs are in form /prefix/lib/stubs/libcuda.so ++ # So extract the name of the parent folder, to check against "stubs" ++ # And the parent path which we need to add to the INSTALL_RPATH for non-stubs ++ get_filename_component(parent_path "${lib}" DIRECTORY) ++ get_filename_component(parent_name "${parent_path}" NAME) ++ if(parent_name STREQUAL "stubs") ++ message(STATUS "Filtering ${lib} from being set in ${target}'s RPATH, " ++ "because it appears to point to the CUDA stubs directory.") ++ set(filtered TRUE) ++ elseif(parent_path) ++ list(APPEND install_rpath ${parent_path}) ++ endif() ++ endforeach() ++ ++ # Regular link command ++ target_link_libraries(${target} ${libs}) ++ # Manually set INSTALL_RPATH when there were any stub libs ++ if(filtered) ++ list(REMOVE_DUPLICATES install_rpath) ++ set_target_properties(${target} PROPERTIES INSTALL_RPATH_USE_LINK_PATH FALSE) ++ set_target_properties(${target} PROPERTIES INSTALL_RPATH "${install_rpath}") ++ endif() ++endfunction() +diff --git a/test/test_torch.py b/test/test_torch.py +index efc3a1edba5..865416a817e 100644 +--- a/test/test_torch.py ++++ b/test/test_torch.py +@@ -9767,6 +9767,21 @@ def add_neg_dim_tests(): + assert not hasattr(TestTorch, test_name), "Duplicated test name: " + test_name + setattr(TestTorch, test_name, make_neg_dim_test(name, tensor_arg, arg_constr, types, extra_dim)) + ++class TestRPATH(TestCase): ++ @unittest.skipIf(not sys.platform.startswith('linux'), "linux-only test") ++ def test_rpath(self): ++ """ ++ Make sure RPATH (or RUNPATH) in nvrtc does not contain a cuda stubs directory ++ issue gh-35418 ++ """ ++ libdir = os.path.join(os.path.dirname(torch._C.__file__), 'lib') ++ caffe2_nvrtc = os.path.join(libdir, 'libcaffe2_nvrtc.so') ++ if os.path.exists(caffe2_nvrtc): ++ output = subprocess.check_output(['objdump', '-x', caffe2_nvrtc]) ++ for line in output.split(b'\n'): ++ if b'RPATH' in line or b'RUNPATH' in line: ++ self.assertFalse(b'stubs' in line) ++ + # TODO: these empy classes are temporarily instantiated for XLA compatibility + # once XLA updates their test suite it should be removed + class TestViewOps(TestCase):