From b9bdaf4ba85205b5bb84716babc9894178a995a7 Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Sat, 14 Sep 2024 08:50:54 -0700 Subject: [PATCH] plugin/cuda: disable CUDA plugin if /dev/nvidiactl isn't present The presence of /dev/nvidiactl indicates that the system has a compatible NVIDIA GPU driver installed and that the GPU is accessible to the operating system. Signed-off-by: Andrei Vagin --- criu/include/fault-injection.h | 1 + plugins/cuda/cuda_plugin.c | 10 +++++++++- scripts/ci/run-ci-tests.sh | 2 +- 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/criu/include/fault-injection.h b/criu/include/fault-injection.h index 82c3a1f7fc..59adf05b9e 100644 --- a/criu/include/fault-injection.h +++ b/criu/include/fault-injection.h @@ -22,6 +22,7 @@ enum faults { FI_DONT_USE_PAGEMAP_SCAN = 135, FI_DUMP_CRASH = 136, FI_DISABLE_FREEZE_CGROUP = 137, + FI_PLUGIN_CUDA_FORCE_ENABLE = 138, FI_MAX, }; diff --git a/plugins/cuda/cuda_plugin.c b/plugins/cuda/cuda_plugin.c index 04d70b114f..db2a0abafb 100644 --- a/plugins/cuda/cuda_plugin.c +++ b/plugins/cuda/cuda_plugin.c @@ -5,6 +5,7 @@ #include "pid.h" #include "proc_parse.h" #include "seize.h" +#include "fault-injection.h" #include #include @@ -460,8 +461,15 @@ CR_PLUGIN_REGISTER_HOOK(CR_PLUGIN_HOOK__RESUME_DEVICES_LATE, cuda_plugin_resume_ int cuda_plugin_init(int stage) { - int ret = cuda_checkpoint_supports_flag("--action"); + int ret; + if (fault_injected(FI_VDSO_TRAMPOLINES) || access("/dev/nvidiactl", F_OK)) { + pr_info("/dev/nvidiactl doesn't exist. The CUDA plugin is disabled."); + plugin_disabled = true; + return 0; + } + + ret = cuda_checkpoint_supports_flag("--action"); if (ret == -1) { pr_warn("check that %s is present in $PATH\n", CUDA_CHECKPOINT); plugin_disabled = true; diff --git a/scripts/ci/run-ci-tests.sh b/scripts/ci/run-ci-tests.sh index 26ea00c537..38b7b5097f 100755 --- a/scripts/ci/run-ci-tests.sh +++ b/scripts/ci/run-ci-tests.sh @@ -363,4 +363,4 @@ make -C plugins/amdgpu/ test_topology_remap ./test/zdtm.py run -t zdtm/static/maps00 -t zdtm/static/maps02 --criu-plugin amdgpu ./test/zdtm.py run -t zdtm/static/maps00 -t zdtm/static/maps02 --criu-plugin amdgpu cuda -./test/zdtm.py run -t zdtm/static/sigpending -t zdtm/static/pthread00 --mocked-cuda-checkpoint +./test/zdtm.py run -t zdtm/static/sigpending -t zdtm/static/pthread00 --mocked-cuda-checkpoint --fault 138