diff --git a/llama.cpp b/llama.cpp
index 226255b..c1ac54b 160000
--- a/llama.cpp
+++ b/llama.cpp
@@ -1 +1 @@
-Subproject commit 226255b44ef2c2794bfac48d101d35a9c2dbb965
+Subproject commit c1ac54b77aaba10d029084d152be786102010eb2
diff --git a/scripts/ggml-metal.m.patch b/scripts/ggml-metal.m.patch
index 9a9b583..87bf951 100644
--- a/scripts/ggml-metal.m.patch
+++ b/scripts/ggml-metal.m.patch
@@ -1,26 +1,26 @@
---- ggml-metal-orig.m	2023-08-22 12:22:42
-+++ ggml-metal.m	2023-08-22 12:22:43
-@@ -218,13 +218,13 @@
+--- ggml-metal-orig.m	2023-08-27 09:44:37
++++ ggml-metal.m	2023-08-27 09:46:07
+@@ -226,13 +226,13 @@
  #undef LM_GGML_METAL_ADD_KERNEL
      }
 
--    fprintf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
-+    // fprintf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
-     fprintf(stderr, "%s: hasUnifiedMemory             = %s\n",       __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
+-    fprintf(stderr, "%s: recommendedMaxWorkingSetSize  = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
++    // fprintf(stderr, "%s: recommendedMaxWorkingSetSize  = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
+     fprintf(stderr, "%s: hasUnifiedMemory              = %s\n",       __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
 -    if (ctx->device.maxTransferRate != 0) {
--        fprintf(stderr, "%s: maxTransferRate              = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
+-        fprintf(stderr, "%s: maxTransferRate               = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
 -    } else {
--        fprintf(stderr, "%s: maxTransferRate              = built-in GPU\n", __func__);
+-        fprintf(stderr, "%s: maxTransferRate               = built-in GPU\n", __func__);
 -    }
 +    // if (ctx->device.maxTransferRate != 0) {
-+    //     fprintf(stderr, "%s: maxTransferRate              = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
++    //     fprintf(stderr, "%s: maxTransferRate               = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
 +    // } else {
-+    //     fprintf(stderr, "%s: maxTransferRate              = built-in GPU\n", __func__);
++    //     fprintf(stderr, "%s: maxTransferRate               = built-in GPU\n", __func__);
 +    // }
 
      return ctx;
  }
-@@ -366,15 +366,15 @@
+@@ -374,15 +374,15 @@
              }
          }