diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh index c91a518..033004d 100755 --- a/scripts/bootstrap.sh +++ b/scripts/bootstrap.sh @@ -51,6 +51,7 @@ echo "Replacement completed successfully!" yarn example # Apply patch +patch -p0 -d ./cpp < ./scripts/llama.cpp.patch patch -p0 -d ./cpp < ./scripts/ggml-metal.m.patch rm cpp/ggml-metal.m.orig || true diff --git a/scripts/ggml-metal.m.patch b/scripts/ggml-metal.m.patch index 0d0cd9c..2a0ed23 100644 --- a/scripts/ggml-metal.m.patch +++ b/scripts/ggml-metal.m.patch @@ -1,6 +1,50 @@ ---- ggml-metal-orig.m 2023-07-25 08:38:51 -+++ ggml-metal.m 2023-07-25 08:34:40 -@@ -194,13 +194,13 @@ +--- ggml-metal-orig.m 2023-08-11 07:13:35 ++++ ggml-metal.m 2023-08-11 07:10:19 +@@ -126,7 +126,7 @@ + ctx->library = [ctx->device newLibraryWithSource:msl_library_source options:nil error:&error]; + if (error) { + fprintf(stderr, "%s: error: %s\n", __func__, [[error description] UTF8String]); +- exit(1); ++ return NULL; + } + } + #else +@@ -144,7 +144,7 @@ + NSString * src = [NSString stringWithContentsOfFile:path encoding:NSUTF8StringEncoding error:&error]; + if (error) { + fprintf(stderr, "%s: error: %s\n", __func__, [[error description] UTF8String]); +- exit(1); ++ return NULL; + } + + #ifdef LM_GGML_QKK_64 +@@ -156,17 +156,22 @@ + #endif + if (error) { + fprintf(stderr, "%s: error: %s\n", __func__, [[error description] UTF8String]); +- exit(1); ++ return NULL; + } + } + #endif + + // load kernels + { ++ NSError * error = nil; + #define LM_GGML_METAL_ADD_KERNEL(name) \ + ctx->function_##name = [ctx->library newFunctionWithName:@"kernel_"#name]; \ +- ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:nil]; \ +- fprintf(stderr, "%s: loaded %-32s %16p\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name); ++ ctx->pipeline_##name = [ctx->device newComputePipelineStateWithFunction:ctx->function_##name error:&error]; \ ++ fprintf(stderr, "%s: loaded %-32s %16p\n", __func__, "kernel_"#name, (void *) ctx->pipeline_##name); \ ++ if (error) { \ ++ fprintf(stderr, "%s: load pipeline error: %s\n", __func__, [[error description] UTF8String]); \ ++ return NULL; \ ++ } + + LM_GGML_METAL_ADD_KERNEL(add); + LM_GGML_METAL_ADD_KERNEL(add_row); +@@ -205,13 +210,13 @@ #undef LM_GGML_METAL_ADD_KERNEL } @@ -20,7 +64,7 @@ return ctx; } -@@ -319,15 +319,15 @@ +@@ -337,15 +342,15 @@ } } diff --git a/scripts/llama.cpp.patch b/scripts/llama.cpp.patch new file mode 100644 index 0000000..8cd7127 --- /dev/null +++ b/scripts/llama.cpp.patch @@ -0,0 +1,15 @@ +--- llama-orig.cpp 2023-08-11 07:15:38 ++++ llama.cpp 2023-08-11 07:08:07 +@@ -3336,6 +3336,12 @@ + if (params.n_gpu_layers > 0) { + // this allocates all Metal resources and memory buffers + ctx->ctx_metal = lm_ggml_metal_init(1); ++ ++ if (!ctx->ctx_metal) { ++ LLAMA_LOG_ERROR("%s: lm_ggml_metal_init() failed\n", __func__); ++ llama_free(ctx); ++ return NULL; ++ } + + void * data_ptr = NULL; + size_t data_size = 0;