Skip to content

Commit

Permalink
fix(cpp): skip gpu device if n_gpu_layers <= 0
Browse files Browse the repository at this point in the history
  • Loading branch information
jhen0409 committed Nov 2, 2024
1 parent 1211095 commit 055df7f
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 3 deletions.
10 changes: 9 additions & 1 deletion cpp/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19097,7 +19097,9 @@ struct llama_model_params llama_model_default_params() {

#ifdef LM_GGML_USE_METAL
// note: we usually have plenty of VRAM, so by default offload all layers to the GPU
result.n_gpu_layers = 999;
if (result.n_gpu_layers > 0) {
result.n_gpu_layers = 999;
}
#endif

return result;
Expand Down Expand Up @@ -19300,7 +19302,13 @@ struct llama_model * llama_load_model_from_file(
break;

case LM_GGML_BACKEND_DEVICE_TYPE_GPU:
#ifdef LM_GGML_USE_METAL
if (params.n_gpu_layers > 0) {
model->devices.push_back(dev);
}
#else
model->devices.push_back(dev);
#endif
break;
}
}
Expand Down
29 changes: 27 additions & 2 deletions scripts/llama.cpp.patch
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
--- llama.cpp.orig 2024-11-02 10:33:10
+++ llama.cpp 2024-11-02 10:33:11
--- llama.cpp.orig 2024-11-02 11:13:58
+++ llama.cpp 2024-11-02 11:19:21
@@ -80,6 +80,17 @@
#define LLAMA_MAX_LAYERS 512
#define LLAMA_MAX_EXPERTS 160 // DeepSeekV2
Expand Down Expand Up @@ -39,3 +39,28 @@
strerror(errno));
}
}
@@ -19086,7 +19097,9 @@

#ifdef GGML_USE_METAL
// note: we usually have plenty of VRAM, so by default offload all layers to the GPU
- result.n_gpu_layers = 999;
+ if (result.n_gpu_layers > 0) {
+ result.n_gpu_layers = 999;
+ }
#endif

return result;
@@ -19289,7 +19302,13 @@
break;

case LM_GGML_BACKEND_DEVICE_TYPE_GPU:
+#ifdef LM_GGML_USE_METAL
+ if (params.n_gpu_layers > 0) {
+ model->devices.push_back(dev);
+ }
+#else
model->devices.push_back(dev);
+#endif
break;
}
}

0 comments on commit 055df7f

Please sign in to comment.