Skip to content

Commit

Permalink
feat: sync llama.cpp (#16)
Browse files Browse the repository at this point in the history
* feat: sync llama.cpp

* fix: add log.h

* chore: cleanup

* fix(ios): update patch
  • Loading branch information
jhen0409 authored Sep 4, 2023
1 parent 22a85bd commit 989858b
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 28 deletions.
4 changes: 2 additions & 2 deletions example/ios/Podfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ PODS:
- hermes-engine/Pre-built (= 0.72.3)
- hermes-engine/Pre-built (0.72.3)
- libevent (2.1.12)
- llama-rn (0.1.5):
- llama-rn (0.2.0-rc.2):
- RCT-Folly
- RCTRequired
- RCTTypeSafety
Expand Down Expand Up @@ -1242,7 +1242,7 @@ SPEC CHECKSUMS:
glog: 04b94705f318337d7ead9e6d17c019bd9b1f6b1b
hermes-engine: 10fbd3f62405c41ea07e71973ea61e1878d07322
libevent: 4049cae6c81cdb3654a443be001fb9bdceff7913
llama-rn: 2fc75a540ad1b89e773cb00f4b02c764e2b1b87a
llama-rn: eda3c9288703cf662d48ade3efee3b14a80b8c21
RCT-Folly: 424b8c9a7a0b9ab2886ffe9c3b041ef628fd4fb1
RCTRequired: a2faf4bad4e438ca37b2040cb8f7799baa065c18
RCTTypeSafety: cb09f3e4747b6d18331a15eb05271de7441ca0b3
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion llama.cpp
Submodule llama.cpp updated 58 files
+4 −7 .devops/tools.sh
+12 −1 .github/workflows/build.yml
+36 −0 .github/workflows/code-coverage.yml
+43 −0 .github/workflows/gguf-publish.yml
+11 −0 .gitignore
+7 −1 CMakeLists.txt
+169 −101 Makefile
+11 −2 Package.swift
+39 −8 README.md
+14 −0 codecov.yml
+174 −2 common/common.cpp
+39 −0 common/common.h
+1 −0 common/console.cpp
+643 −0 common/log.h
+89 −101 convert-falcon-hf-to-gguf.py
+81 −109 convert-gptneox-hf-to-gguf.py
+0 −308 convert-llama-7b-pth-to-gguf.py
+27 −19 convert-llama-ggmlv3-to-gguf.py
+0 −328 convert-llama-hf-to-gguf.py
+6 −4 convert-lora-to-ggml.py
+189 −136 convert.py
+2 −1 examples/CMakeLists.txt
+0 −5 examples/baby-llama/baby-llama.cpp
+2 −2 examples/beam-search/CMakeLists.txt
+5 −3 examples/beam-search/beam-search.cpp
+1 −1 examples/chat.sh
+38 −35 examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+7 −6 examples/gptneox-wip/gptneox-main.cpp
+2 −1 examples/llm.vim
+2 −2 examples/main/README.md
+166 −190 examples/main/main.cpp
+15 −0 examples/perplexity/perplexity.cpp
+19 −5 examples/quantize/quantize.cpp
+17 −5 examples/server/server.cpp
+8 −0 examples/speculative/CMakeLists.txt
+234 −0 examples/speculative/speculative.cpp
+4 −1 examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py
+105 −68 ggml-alloc.c
+17 −0 ggml-cuda.cu
+82 −51 ggml-metal.m
+160 −74 ggml-metal.metal
+1 −1 ggml-opencl.cpp
+322 −168 ggml.c
+4 −1 ggml.h
+20 −3 gguf-py/README.md
+345 −234 gguf-py/gguf/gguf.py
+0 −0 gguf-py/gguf/py.typed
+2 −1 gguf-py/pyproject.toml
+42 −0 grammars/c.gbnf
+37 −11 k_quants.c
+98 −37 llama.cpp
+2 −1 llama.h
+5 −0 mypy.ini
+1 −0 scripts/qnt-all.sh
+1 −0 scripts/run-all-perf.sh
+1 −0 scripts/run-all-ppl.sh
+5 −0 tests/CMakeLists.txt
+3 −0 tests/test-c.c
1 change: 1 addition & 0 deletions scripts/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ cp ./llama.cpp/llama.h ./cpp/llama.h
cp ./llama.cpp/llama.cpp ./cpp/llama.cpp
cp ./llama.cpp/k_quants.h ./cpp/k_quants.h
cp ./llama.cpp/k_quants.c ./cpp/k_quants.c
cp ./llama.cpp/common/log.h ./cpp/log.h
cp ./llama.cpp/common/common.h ./cpp/common.h
cp ./llama.cpp/common/common.cpp ./cpp/common.cpp
cp ./llama.cpp/common/grammar-parser.h ./cpp/grammar-parser.h
Expand Down
64 changes: 42 additions & 22 deletions scripts/ggml-metal.m.patch
Original file line number Diff line number Diff line change
@@ -1,46 +1,66 @@
--- ggml-metal-orig.m 2023-08-29 10:32:31
+++ ggml-metal.m 2023-08-29 10:32:32
@@ -230,13 +230,13 @@
--- ggml-metal-orig.m 2023-09-04 09:16:25
+++ ggml-metal.m 2023-09-04 10:09:46
@@ -118,13 +118,13 @@
metal_printf("%s: allocating\n", __func__);

// Show all the Metal device instances in the system
- NSArray * devices = MTLCopyAllDevices();
+ // NSArray * devices = MTLCopyAllDevices();
id <MTLDevice> device;
NSString * s;
- for (device in devices) {
- s = [device name];
- metal_printf("%s: found device: %s\n", __func__, [s UTF8String]);
- }
+ // for (device in devices) {
+ // s = [device name];
+ // metal_printf("%s: found device: %s\n", __func__, [s UTF8String]);
+ // }

// Pick and show default Metal device
device = MTLCreateSystemDefaultDevice();
@@ -247,13 +247,13 @@
#undef LM_GGML_METAL_ADD_KERNEL
}

- fprintf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
+ // fprintf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
fprintf(stderr, "%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");

- metal_printf("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
- metal_printf("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
- if (ctx->device.maxTransferRate != 0) {
- fprintf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
- metal_printf("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
- } else {
- fprintf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__);
- metal_printf("%s: maxTransferRate = built-in GPU\n", __func__);
- }
+ // metal_printf("%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
+ // metal_printf("%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
+ // if (ctx->device.maxTransferRate != 0) {
+ // fprintf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
+ // metal_printf("%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
+ // } else {
+ // fprintf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__);
+ // metal_printf("%s: maxTransferRate = built-in GPU\n", __func__);
+ // }

return ctx;
}
@@ -436,15 +436,15 @@
@@ -454,15 +454,15 @@
}
}

- fprintf(stderr, ", (%8.2f / %8.2f)",
- metal_printf(", (%8.2f / %8.2f)",
- ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
- ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
+ // fprintf(stderr, ", (%8.2f / %8.2f)",
+ // metal_printf(", (%8.2f / %8.2f)",
+ // ctx->device.currentAllocatedSize / 1024.0 / 1024.0,
+ // ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);

- if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
- fprintf(stderr, ", warning: current allocated size is greater than the recommended max working set size\n");
- metal_printf(", warning: current allocated size is greater than the recommended max working set size\n");
- } else {
- fprintf(stderr, "\n");
- metal_printf("\n");
- }
+ // if (ctx->device.currentAllocatedSize > ctx->device.recommendedMaxWorkingSetSize) {
+ // fprintf(stderr, ", warning: current allocated size is greater than the recommended max working set size\n");
+ // metal_printf(", warning: current allocated size is greater than the recommended max working set size\n");
+ // } else {
+ // fprintf(stderr, "\n");
+ // metal_printf("\n");
+ // }
}

return true;

0 comments on commit 989858b

Please sign in to comment.