From 192c9ae24824652efb171d5d6a97e331c487d640 Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Sat, 2 Nov 2024 18:43:48 +0800 Subject: [PATCH] fix(cpp): skip register metal backend on os simulator --- cpp/common.cpp | 2 +- cpp/ggml-backend.cpp | 3 +++ cpp/llama.cpp | 10 +--------- example/ios/Podfile.lock | 4 ++-- scripts/bootstrap.sh | 1 + scripts/ggml-backend.cpp.patch | 14 ++++++++++++++ scripts/llama.cpp.patch | 25 ------------------------- 7 files changed, 22 insertions(+), 37 deletions(-) create mode 100644 scripts/ggml-backend.cpp.patch diff --git a/cpp/common.cpp b/cpp/common.cpp index 347dd0f..a7e4a46 100644 --- a/cpp/common.cpp +++ b/cpp/common.cpp @@ -985,7 +985,7 @@ struct llama_model_params common_model_params_to_llama(const common_params & par if (params.n_gpu_layers != -1) { mparams.n_gpu_layers = params.n_gpu_layers; } - + mparams.vocab_only = params.vocab_only; mparams.rpc_servers = params.rpc_servers.c_str(); mparams.main_gpu = params.main_gpu; diff --git a/cpp/ggml-backend.cpp b/cpp/ggml-backend.cpp index 7451de3..d8cb76d 100644 --- a/cpp/ggml-backend.cpp +++ b/cpp/ggml-backend.cpp @@ -575,8 +575,11 @@ struct lm_ggml_backend_registry { register_backend(lm_ggml_backend_cuda_reg()); #endif #ifdef LM_GGML_USE_METAL +#include +#if !TARGET_OS_SIMULATOR register_backend(lm_ggml_backend_metal_reg()); #endif +#endif #ifdef LM_GGML_USE_SYCL register_backend(lm_ggml_backend_sycl_reg()); #endif diff --git a/cpp/llama.cpp b/cpp/llama.cpp index d0c4f5d..6296abe 100644 --- a/cpp/llama.cpp +++ b/cpp/llama.cpp @@ -19097,9 +19097,7 @@ struct llama_model_params llama_model_default_params() { #ifdef LM_GGML_USE_METAL // note: we usually have plenty of VRAM, so by default offload all layers to the GPU - if (result.n_gpu_layers > 0) { - result.n_gpu_layers = 999; - } + result.n_gpu_layers = 999; #endif return result; @@ -19302,13 +19300,7 @@ struct llama_model * llama_load_model_from_file( break; case LM_GGML_BACKEND_DEVICE_TYPE_GPU: -#ifdef LM_GGML_USE_METAL - if (params.n_gpu_layers > 0) { - model->devices.push_back(dev); - } -#else model->devices.push_back(dev); -#endif break; } } diff --git a/example/ios/Podfile.lock b/example/ios/Podfile.lock index f9cf151..1827ea6 100644 --- a/example/ios/Podfile.lock +++ b/example/ios/Podfile.lock @@ -8,7 +8,7 @@ PODS: - hermes-engine/Pre-built (= 0.72.3) - hermes-engine/Pre-built (0.72.3) - libevent (2.1.12) - - llama-rn (0.3.10): + - llama-rn (0.3.11): - RCT-Folly - RCTRequired - RCTTypeSafety @@ -1261,7 +1261,7 @@ SPEC CHECKSUMS: glog: 04b94705f318337d7ead9e6d17c019bd9b1f6b1b hermes-engine: 10fbd3f62405c41ea07e71973ea61e1878d07322 libevent: 4049cae6c81cdb3654a443be001fb9bdceff7913 - llama-rn: bb4447eb237c895e526b1adbb2b88700093005ac + llama-rn: 07a172eaad49cc56323f326e4c3ce3ec734b6a9e RCT-Folly: 424b8c9a7a0b9ab2886ffe9c3b041ef628fd4fb1 RCTRequired: a2faf4bad4e438ca37b2040cb8f7799baa065c18 RCTTypeSafety: cb09f3e4747b6d18331a15eb05271de7441ca0b3 diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh index 92521e9..b84fe66 100755 --- a/scripts/bootstrap.sh +++ b/scripts/bootstrap.sh @@ -119,6 +119,7 @@ patch -p0 -d ./cpp < ./scripts/common.cpp.patch patch -p0 -d ./cpp < ./scripts/log.cpp.patch patch -p0 -d ./cpp < ./scripts/llama.cpp.patch patch -p0 -d ./cpp < ./scripts/ggml-metal.m.patch +patch -p0 -d ./cpp < ./scripts/ggml-backend.cpp.patch patch -p0 -d ./cpp < ./scripts/ggml.c.patch diff --git a/scripts/ggml-backend.cpp.patch b/scripts/ggml-backend.cpp.patch new file mode 100644 index 0000000..48757f4 --- /dev/null +++ b/scripts/ggml-backend.cpp.patch @@ -0,0 +1,14 @@ +--- ggml-backend.cpp.orig 2024-11-02 18:37:57 ++++ ggml-backend.cpp 2024-11-02 18:39:36 +@@ -575,8 +575,11 @@ + register_backend(lm_ggml_backend_cuda_reg()); + #endif + #ifdef LM_GGML_USE_METAL ++#include ++#if !TARGET_OS_SIMULATOR + register_backend(lm_ggml_backend_metal_reg()); + #endif ++#endif + #ifdef LM_GGML_USE_SYCL + register_backend(lm_ggml_backend_sycl_reg()); + #endif diff --git a/scripts/llama.cpp.patch b/scripts/llama.cpp.patch index a09f90e..efed972 100644 --- a/scripts/llama.cpp.patch +++ b/scripts/llama.cpp.patch @@ -39,28 +39,3 @@ strerror(errno)); } } -@@ -19086,7 +19097,9 @@ - - #ifdef LM_GGML_USE_METAL - // note: we usually have plenty of VRAM, so by default offload all layers to the GPU -- result.n_gpu_layers = 999; -+ if (result.n_gpu_layers > 0) { -+ result.n_gpu_layers = 999; -+ } - #endif - - return result; -@@ -19289,7 +19302,13 @@ - break; - - case LM_GGML_BACKEND_DEVICE_TYPE_GPU: -+#ifdef LM_GGML_USE_METAL -+ if (params.n_gpu_layers > 0) { -+ model->devices.push_back(dev); -+ } -+#else - model->devices.push_back(dev); -+#endif - break; - } - }