From 989858b01e2cd8c8af197c389b8bd0a9881d625a Mon Sep 17 00:00:00 2001
From: Jhen-Jie Hong <developer@jhen.me>
Date: Mon, 4 Sep 2023 10:42:10 +0800
Subject: [PATCH] feat: sync llama.cpp (#16)

* feat: sync llama.cpp

* fix: add log.h

* chore: cleanup

* fix(ios): update patch
---
 example/ios/Podfile.lock                      |  4 +-
 .../contents.xcworkspacedata                  |  3 -
 llama.cpp                                     |  2 +-
 scripts/bootstrap.sh                          |  1 +
 scripts/ggml-metal.m.patch                    | 64 ++++++++++++-------
 5 files changed, 46 insertions(+), 28 deletions(-)
diff --git a/example/ios/Podfile.lock b/example/ios/Podfile.lock
index ef9659a..bf7d134 100644
--- a/example/ios/Podfile.lock
+++ b/example/ios/Podfile.lock
@@ -8,7 +8,7 @@ PODS:
     - hermes-engine/Pre-built (= 0.72.3)
   - hermes-engine/Pre-built (0.72.3)
   - libevent (2.1.12)
-  - llama-rn (0.1.5):
+  - llama-rn (0.2.0-rc.2):
     - RCT-Folly
     - RCTRequired
     - RCTTypeSafety
@@ -1242,7 +1242,7 @@ SPEC CHECKSUMS:
   glog: 04b94705f318337d7ead9e6d17c019bd9b1f6b1b
   hermes-engine: 10fbd3f62405c41ea07e71973ea61e1878d07322
   libevent: 4049cae6c81cdb3654a443be001fb9bdceff7913
-  llama-rn: 2fc75a540ad1b89e773cb00f4b02c764e2b1b87a
+  llama-rn: eda3c9288703cf662d48ade3efee3b14a80b8c21
   RCT-Folly: 424b8c9a7a0b9ab2886ffe9c3b041ef628fd4fb1
   RCTRequired: a2faf4bad4e438ca37b2040cb8f7799baa065c18
   RCTTypeSafety: cb09f3e4747b6d18331a15eb05271de7441ca0b3
diff --git a/example/ios/RNLlamaExample.xcworkspace/contents.xcworkspacedata b/example/ios/RNLlamaExample.xcworkspace/contents.xcworkspacedata
index 39b12ac..e7451e0 100644
--- a/example/ios/RNLlamaExample.xcworkspace/contents.xcworkspacedata
+++ b/example/ios/RNLlamaExample.xcworkspace/contents.xcworkspacedata
@@ -7,7 +7,4 @@
    <FileRef
       location = "group:Pods/Pods.xcodeproj">
    </FileRef>
-   <FileRef
-      location = "group:LlamaExample.xcodeproj">
-   </FileRef>
 </Workspace>
diff --git a/llama.cpp b/llama.cpp
index 44c117f..cf9b084 160000
--- a/llama.cpp
+++ b/llama.cpp
@@ -1 +1 @@
-Subproject commit 44c117f41ee01c5ac8fb86bba041f08d8b87b46d
+Subproject commit cf9b08485c4c2d4d945c6e74fe20f273a38b6104
diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh
index a62e6b9..a0e33ac 100755
--- a/scripts/bootstrap.sh
+++ b/scripts/bootstrap.sh
@@ -19,6 +19,7 @@ cp ./llama.cpp/llama.h ./cpp/llama.h
 cp ./llama.cpp/llama.cpp ./cpp/llama.cpp
 cp ./llama.cpp/k_quants.h ./cpp/k_quants.h
 cp ./llama.cpp/k_quants.c ./cpp/k_quants.c
+cp ./llama.cpp/common/log.h ./cpp/log.h
 cp ./llama.cpp/common/common.h ./cpp/common.h
 cp ./llama.cpp/common/common.cpp ./cpp/common.cpp
 cp ./llama.cpp/common/grammar-parser.h ./cpp/grammar-parser.h
diff --git a/scripts/ggml-metal.m.patch b/scripts/ggml-metal.m.patch
index 57afad6..b878a89 100644
--- a/scripts/ggml-metal.m.patch
+++ b/scripts/ggml-metal.m.patch
@@ -1,46 +1,66 @@
---- ggml-metal-orig.m	2023-08-29 10:32:31
-+++ ggml-metal.m	2023-08-29 10:32:32
-@@ -230,13 +230,13 @@
+--- ggml-metal-orig.m	2023-09-04 09:16:25
++++ ggml-metal.m	2023-09-04 10:09:46
+@@ -118,13 +118,13 @@
+     metal_printf("%s: allocating\n", __func__);
+ 
+     // Show all the Metal device instances in the system
+-    NSArray * devices = MTLCopyAllDevices();
++    // NSArray * devices = MTLCopyAllDevices();
+     id <MTLDevice> device;
+     NSString * s;
+-    for (device in devices) {
+-        s = [device name];
+-        metal_printf("%s: found device: %s\n", __func__, [s UTF8String]);
+-    }
++    // for (device in devices) {
++    //     s = [device name];
++    //     metal_printf("%s: found device: %s\n", __func__, [s UTF8String]);
++    // }
+ 
+     // Pick and show default Metal device
+     device = MTLCreateSystemDefaultDevice();
+@@ -247,13 +247,13 @@
  #undef LM_GGML_METAL_ADD_KERNEL
      }
-
--    fprintf(stderr, "%s: recommendedMaxWorkingSetSize  = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
-+    // fprintf(stderr, "%s: recommendedMaxWorkingSetSize  = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
-     fprintf(stderr, "%s: hasUnifiedMemory              = %s\n",       __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
+ 
+-    metal_printf("%s: recommendedMaxWorkingSetSize  = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
+-    metal_printf("%s: hasUnifiedMemory              = %s\n",       __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
 -    if (ctx->device.maxTransferRate != 0) {
--        fprintf(stderr, "%s: maxTransferRate               = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
+-        metal_printf("%s: maxTransferRate               = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
 -    } else {
--        fprintf(stderr, "%s: maxTransferRate               = built-in GPU\n", __func__);
+-        metal_printf("%s: maxTransferRate               = built-in GPU\n", __func__);
 -    }
++    // metal_printf("%s: recommendedMaxWorkingSetSize  = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
++    // metal_printf("%s: hasUnifiedMemory              = %s\n",       __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
 +    // if (ctx->device.maxTransferRate != 0) {
-+    //     fprintf(stderr, "%s: maxTransferRate               = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
++    //     metal_printf("%s: maxTransferRate               = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
 +    // } else {
-+    //     fprintf(stderr, "%s: maxTransferRate               = built-in GPU\n", __func__);
++    //     metal_printf("%s: maxTransferRate               = built-in GPU\n", __func__);
 +    // }
-
+ 
      return ctx;
  }
-@@ -436,15 +436,15 @@
+@@ -454,15 +454,15 @@
              }
          }
-
--        fprintf(stderr, ", (%8.2f / %8.2f)",
+ 
+-        metal_printf(", (%8.2f / %8.2f)",
 -                ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
 -                ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
-+        // fprintf(stderr, ", (%8.2f / %8.2f)",
++        // metal_printf(", (%8.2f / %8.2f)",
 +        //         ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
 +        //         ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
-
+ 
 -        if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
--            fprintf(stderr, ", warning: current allocated size is greater than the recommended max working set size\n");
+-            metal_printf(", warning: current allocated size is greater than the recommended max working set size\n");
 -        } else {
--            fprintf(stderr, "\n");
+-            metal_printf("\n");
 -        }
 +        // if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
-+        //     fprintf(stderr, ", warning: current allocated size is greater than the recommended max working set size\n");
++        //     metal_printf(", warning: current allocated size is greater than the recommended max working set size\n");
 +        // } else {
-+        //     fprintf(stderr, "\n");
++        //     metal_printf("\n");
 +        // }
      }
-
+ 
      return true;