fix(cpp): skip gpu device if n_gpu_layers <= 0

ref: ggerganov/llama.cpp#10132
mybigday · Nov 2, 2024 · 055df7f · 055df7f
1 parent 1211095
commit 055df7f
Show file tree

Hide file tree

Showing 2 changed files with 36 additions and 3 deletions.
diff --git a/cpp/llama.cpp b/cpp/llama.cpp
@@ -19097,7 +19097,9 @@ struct llama_model_params llama_model_default_params() {
 
 #ifdef LM_GGML_USE_METAL
     // note: we usually have plenty of VRAM, so by default offload all layers to the GPU
-    result.n_gpu_layers = 999;
+    if (result.n_gpu_layers > 0) {
+        result.n_gpu_layers = 999;
+    }
 #endif
 
     return result;
@@ -19300,7 +19302,13 @@ struct llama_model * llama_load_model_from_file(
                 break;
 
             case LM_GGML_BACKEND_DEVICE_TYPE_GPU:
+#ifdef LM_GGML_USE_METAL
+                if (params.n_gpu_layers > 0) {
+                    model->devices.push_back(dev);
+                }
+#else
                 model->devices.push_back(dev);
+#endif
                 break;
         }
     }

diff --git a/scripts/llama.cpp.patch b/scripts/llama.cpp.patch
@@ -1,5 +1,5 @@
---- llama.cpp.orig	2024-11-02 10:33:10
-+++ llama.cpp	2024-11-02 10:33:11
+--- llama.cpp.orig	2024-11-02 11:13:58
++++ llama.cpp	2024-11-02 11:19:21
 @@ -80,6 +80,17 @@
  #define LLAMA_MAX_LAYERS  512
  #define LLAMA_MAX_EXPERTS 160  // DeepSeekV2
@@ -39,3 +39,28 @@
                          strerror(errno));
              }
          }
+@@ -19086,7 +19097,9 @@
+
+ #ifdef GGML_USE_METAL
+     // note: we usually have plenty of VRAM, so by default offload all layers to the GPU
+-    result.n_gpu_layers = 999;
++    if (result.n_gpu_layers > 0) {
++        result.n_gpu_layers = 999;
++    }
+ #endif
+
+     return result;
+@@ -19289,7 +19302,13 @@
+                 break;
+
+             case LM_GGML_BACKEND_DEVICE_TYPE_GPU:
++#ifdef LM_GGML_USE_METAL
++                if (params.n_gpu_layers > 0) {
++                    model->devices.push_back(dev);
++                }
++#else
+                 model->devices.push_back(dev);
++#endif
+                 break;
+         }
+     }