From 989858b01e2cd8c8af197c389b8bd0a9881d625a Mon Sep 17 00:00:00 2001 From: Jhen-Jie Hong Date: Mon, 4 Sep 2023 10:42:10 +0800 Subject: [PATCH] feat: sync llama.cpp (#16) * feat: sync llama.cpp * fix: add log.h * chore: cleanup * fix(ios): update patch --- example/ios/Podfile.lock | 4 +- .../contents.xcworkspacedata | 3 - llama.cpp | 2 +- scripts/bootstrap.sh | 1 + scripts/ggml-metal.m.patch | 64 ++++++++++++------- 5 files changed, 46 insertions(+), 28 deletions(-) diff --git a/example/ios/Podfile.lock b/example/ios/Podfile.lock index ef9659a..bf7d134 100644 --- a/example/ios/Podfile.lock +++ b/example/ios/Podfile.lock @@ -8,7 +8,7 @@ PODS: - hermes-engine/Pre-built (= 0.72.3) - hermes-engine/Pre-built (0.72.3) - libevent (2.1.12) - - llama-rn (0.1.5): + - llama-rn (0.2.0-rc.2): - RCT-Folly - RCTRequired - RCTTypeSafety @@ -1242,7 +1242,7 @@ SPEC CHECKSUMS: glog: 04b94705f318337d7ead9e6d17c019bd9b1f6b1b hermes-engine: 10fbd3f62405c41ea07e71973ea61e1878d07322 libevent: 4049cae6c81cdb3654a443be001fb9bdceff7913 - llama-rn: 2fc75a540ad1b89e773cb00f4b02c764e2b1b87a + llama-rn: eda3c9288703cf662d48ade3efee3b14a80b8c21 RCT-Folly: 424b8c9a7a0b9ab2886ffe9c3b041ef628fd4fb1 RCTRequired: a2faf4bad4e438ca37b2040cb8f7799baa065c18 RCTTypeSafety: cb09f3e4747b6d18331a15eb05271de7441ca0b3 diff --git a/example/ios/RNLlamaExample.xcworkspace/contents.xcworkspacedata b/example/ios/RNLlamaExample.xcworkspace/contents.xcworkspacedata index 39b12ac..e7451e0 100644 --- a/example/ios/RNLlamaExample.xcworkspace/contents.xcworkspacedata +++ b/example/ios/RNLlamaExample.xcworkspace/contents.xcworkspacedata @@ -7,7 +7,4 @@ - - diff --git a/llama.cpp b/llama.cpp index 44c117f..cf9b084 160000 --- a/llama.cpp +++ b/llama.cpp @@ -1 +1 @@ -Subproject commit 44c117f41ee01c5ac8fb86bba041f08d8b87b46d +Subproject commit cf9b08485c4c2d4d945c6e74fe20f273a38b6104 diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh index a62e6b9..a0e33ac 100755 --- a/scripts/bootstrap.sh +++ b/scripts/bootstrap.sh @@ -19,6 +19,7 @@ cp ./llama.cpp/llama.h ./cpp/llama.h cp ./llama.cpp/llama.cpp ./cpp/llama.cpp cp ./llama.cpp/k_quants.h ./cpp/k_quants.h cp ./llama.cpp/k_quants.c ./cpp/k_quants.c +cp ./llama.cpp/common/log.h ./cpp/log.h cp ./llama.cpp/common/common.h ./cpp/common.h cp ./llama.cpp/common/common.cpp ./cpp/common.cpp cp ./llama.cpp/common/grammar-parser.h ./cpp/grammar-parser.h diff --git a/scripts/ggml-metal.m.patch b/scripts/ggml-metal.m.patch index 57afad6..b878a89 100644 --- a/scripts/ggml-metal.m.patch +++ b/scripts/ggml-metal.m.patch @@ -1,46 +1,66 @@ ---- ggml-metal-orig.m 2023-08-29 10:32:31 -+++ ggml-metal.m 2023-08-29 10:32:32 -@@ -230,13 +230,13 @@ +--- ggml-metal-orig.m 2023-09-04 09:16:25 ++++ ggml-metal.m 2023-09-04 10:09:46 +@@ -118,13 +118,13 @@ + metal_printf("%s: allocating\n", __func__); + + // Show all the Metal device instances in the system +- NSArray * devices = MTLCopyAllDevices(); ++ // NSArray * devices = MTLCopyAllDevices(); + id device; + NSString * s; +- for (device in devices) { +- s = [device name]; +- metal_printf("%s: found device: %s\n", __func__, [s UTF8String]); +- } ++ // for (device in devices) { ++ // s = [device name]; ++ // metal_printf("%s: found device: %s\n", __func__, [s UTF8String]); ++ // } + + // Pick and show default Metal device + device = MTLCreateSystemDefaultDevice(); +@@ -247,13 +247,13 @@ #undef LM_GGML_METAL_ADD_KERNEL } - -- fprintf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0); -+ // fprintf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0); - fprintf(stderr, "%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false"); + +- metal_printf("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0); +- metal_printf("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false"); - if (ctx->device.maxTransferRate != 0) { -- fprintf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0); +- metal_printf("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0); - } else { -- fprintf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__); +- metal_printf("%s: maxTransferRate = built-in GPU\n", __func__); - } ++ // metal_printf("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0); ++ // metal_printf("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false"); + // if (ctx->device.maxTransferRate != 0) { -+ // fprintf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0); ++ // metal_printf("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0); + // } else { -+ // fprintf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__); ++ // metal_printf("%s: maxTransferRate = built-in GPU\n", __func__); + // } - + return ctx; } -@@ -436,15 +436,15 @@ +@@ -454,15 +454,15 @@ } } - -- fprintf(stderr, ", (%8.2f / %8.2f)", + +- metal_printf(", (%8.2f / %8.2f)", - ctx->device.currentAllocatedSize / 1024.0 / 1024.0, - ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0); -+ // fprintf(stderr, ", (%8.2f / %8.2f)", ++ // metal_printf(", (%8.2f / %8.2f)", + // ctx->device.currentAllocatedSize / 1024.0 / 1024.0, + // ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0); - + - if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) { -- fprintf(stderr, ", warning: current allocated size is greater than the recommended max working set size\n"); +- metal_printf(", warning: current allocated size is greater than the recommended max working set size\n"); - } else { -- fprintf(stderr, "\n"); +- metal_printf("\n"); - } + // if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) { -+ // fprintf(stderr, ", warning: current allocated size is greater than the recommended max working set size\n"); ++ // metal_printf(", warning: current allocated size is greater than the recommended max working set size\n"); + // } else { -+ // fprintf(stderr, "\n"); ++ // metal_printf("\n"); + // } } - + return true;