diff --git a/example/ios/Podfile.lock b/example/ios/Podfile.lock
index ef9659a..bf7d134 100644
--- a/example/ios/Podfile.lock
+++ b/example/ios/Podfile.lock
@@ -8,7 +8,7 @@ PODS:
- hermes-engine/Pre-built (= 0.72.3)
- hermes-engine/Pre-built (0.72.3)
- libevent (2.1.12)
- - llama-rn (0.1.5):
+ - llama-rn (0.2.0-rc.2):
- RCT-Folly
- RCTRequired
- RCTTypeSafety
@@ -1242,7 +1242,7 @@ SPEC CHECKSUMS:
glog: 04b94705f318337d7ead9e6d17c019bd9b1f6b1b
hermes-engine: 10fbd3f62405c41ea07e71973ea61e1878d07322
libevent: 4049cae6c81cdb3654a443be001fb9bdceff7913
- llama-rn: 2fc75a540ad1b89e773cb00f4b02c764e2b1b87a
+ llama-rn: eda3c9288703cf662d48ade3efee3b14a80b8c21
RCT-Folly: 424b8c9a7a0b9ab2886ffe9c3b041ef628fd4fb1
RCTRequired: a2faf4bad4e438ca37b2040cb8f7799baa065c18
RCTTypeSafety: cb09f3e4747b6d18331a15eb05271de7441ca0b3
diff --git a/example/ios/RNLlamaExample.xcworkspace/contents.xcworkspacedata b/example/ios/RNLlamaExample.xcworkspace/contents.xcworkspacedata
index 39b12ac..e7451e0 100644
--- a/example/ios/RNLlamaExample.xcworkspace/contents.xcworkspacedata
+++ b/example/ios/RNLlamaExample.xcworkspace/contents.xcworkspacedata
@@ -7,7 +7,4 @@
-
-
diff --git a/llama.cpp b/llama.cpp
index 44c117f..cf9b084 160000
--- a/llama.cpp
+++ b/llama.cpp
@@ -1 +1 @@
-Subproject commit 44c117f41ee01c5ac8fb86bba041f08d8b87b46d
+Subproject commit cf9b08485c4c2d4d945c6e74fe20f273a38b6104
diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh
index a62e6b9..a0e33ac 100755
--- a/scripts/bootstrap.sh
+++ b/scripts/bootstrap.sh
@@ -19,6 +19,7 @@ cp ./llama.cpp/llama.h ./cpp/llama.h
cp ./llama.cpp/llama.cpp ./cpp/llama.cpp
cp ./llama.cpp/k_quants.h ./cpp/k_quants.h
cp ./llama.cpp/k_quants.c ./cpp/k_quants.c
+cp ./llama.cpp/common/log.h ./cpp/log.h
cp ./llama.cpp/common/common.h ./cpp/common.h
cp ./llama.cpp/common/common.cpp ./cpp/common.cpp
cp ./llama.cpp/common/grammar-parser.h ./cpp/grammar-parser.h
diff --git a/scripts/ggml-metal.m.patch b/scripts/ggml-metal.m.patch
index 57afad6..b878a89 100644
--- a/scripts/ggml-metal.m.patch
+++ b/scripts/ggml-metal.m.patch
@@ -1,46 +1,66 @@
---- ggml-metal-orig.m 2023-08-29 10:32:31
-+++ ggml-metal.m 2023-08-29 10:32:32
-@@ -230,13 +230,13 @@
+--- ggml-metal-orig.m 2023-09-04 09:16:25
++++ ggml-metal.m 2023-09-04 10:09:46
+@@ -118,13 +118,13 @@
+ metal_printf("%s: allocating\n", __func__);
+
+ // Show all the Metal device instances in the system
+- NSArray * devices = MTLCopyAllDevices();
++ // NSArray * devices = MTLCopyAllDevices();
+ id device;
+ NSString * s;
+- for (device in devices) {
+- s = [device name];
+- metal_printf("%s: found device: %s\n", __func__, [s UTF8String]);
+- }
++ // for (device in devices) {
++ // s = [device name];
++ // metal_printf("%s: found device: %s\n", __func__, [s UTF8String]);
++ // }
+
+ // Pick and show default Metal device
+ device = MTLCreateSystemDefaultDevice();
+@@ -247,13 +247,13 @@
#undef LM_GGML_METAL_ADD_KERNEL
}
-
-- fprintf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
-+ // fprintf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
- fprintf(stderr, "%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
+
+- metal_printf("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
+- metal_printf("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
- if (ctx->device.maxTransferRate != 0) {
-- fprintf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
+- metal_printf("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
- } else {
-- fprintf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__);
+- metal_printf("%s: maxTransferRate = built-in GPU\n", __func__);
- }
++ // metal_printf("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
++ // metal_printf("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
+ // if (ctx->device.maxTransferRate != 0) {
-+ // fprintf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
++ // metal_printf("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
+ // } else {
-+ // fprintf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__);
++ // metal_printf("%s: maxTransferRate = built-in GPU\n", __func__);
+ // }
-
+
return ctx;
}
-@@ -436,15 +436,15 @@
+@@ -454,15 +454,15 @@
}
}
-
-- fprintf(stderr, ", (%8.2f / %8.2f)",
+
+- metal_printf(", (%8.2f / %8.2f)",
- ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
- ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
-+ // fprintf(stderr, ", (%8.2f / %8.2f)",
++ // metal_printf(", (%8.2f / %8.2f)",
+ // ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
+ // ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
-
+
- if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
-- fprintf(stderr, ", warning: current allocated size is greater than the recommended max working set size\n");
+- metal_printf(", warning: current allocated size is greater than the recommended max working set size\n");
- } else {
-- fprintf(stderr, "\n");
+- metal_printf("\n");
- }
+ // if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
-+ // fprintf(stderr, ", warning: current allocated size is greater than the recommended max working set size\n");
++ // metal_printf(", warning: current allocated size is greater than the recommended max working set size\n");
+ // } else {
-+ // fprintf(stderr, "\n");
++ // metal_printf("\n");
+ // }
}
-
+
return true;