Skip to content

Commit

Permalink
feat: sync llama.cpp
Browse files Browse the repository at this point in the history
  • Loading branch information
jhen0409 committed Aug 27, 2023
1 parent ac7c554 commit 8eee8ea
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 12 deletions.
2 changes: 1 addition & 1 deletion llama.cpp
Submodule llama.cpp updated 81 files
+44 −0 .devops/full-rocm.Dockerfile
+58 −0 .devops/lamma-cpp-clblast.srpm.spec
+59 −0 .devops/lamma-cpp-cublas.srpm.spec
+58 −0 .devops/llama-cpp.srpm.spec
+44 −0 .devops/main-rocm.Dockerfile
+1 −8 .dockerignore
+46 −17 .github/workflows/build.yml
+4 −16 .gitignore
+38 −0 CMakeLists.txt
+24 −0 Makefile
+123 −84 README.md
+1 −0 ci/run.sh
+22 −41 common/common.cpp
+6 −10 common/common.h
+27 −30 convert-falcon-hf-to-gguf.py
+1 −0 convert-gptneox-hf-to-gguf.py
+1 −0 convert-llama-7b-pth-to-gguf.py
+24 −12 convert-llama-ggmlv3-to-gguf.py
+1 −0 convert-llama-hf-to-gguf.py
+18 −17 convert-lora-to-ggml.py
+234 −116 convert.py
+1 −0 examples/CMakeLists.txt
+8 −0 examples/beam_search/CMakeLists.txt
+188 −0 examples/beam_search/beam_search.cpp
+9 −5 examples/convert-llama2c-to-ggml/README.md
+135 −99 examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+1 −0 examples/embd-input/embd_input.py
+1 −0 examples/embd-input/llava.py
+1 −0 examples/embd-input/minigpt4.py
+1 −0 examples/embd-input/panda_gpt.py
+19 −12 examples/embedding/embedding.cpp
+1 −0 examples/jeopardy/graph.py
+0 −0 examples/jeopardy/jeopardy.sh
+1 −0 examples/json-schema-to-grammar.py
+58 −16 examples/llama-bench/llama-bench.cpp
+4 −0 examples/main/README.md
+28 −8 examples/main/main.cpp
+1 −0 examples/make-ggml.py
+213 −19 examples/perplexity/perplexity.cpp
+14 −14 examples/quantize/quantize.cpp
+0 −1 examples/reason-act.sh
+0 −0 examples/server-llama2-13B.sh
+17 −14 examples/server/README.md
+1 −0 examples/server/api_like_OAI.py
+0 −0 examples/server/chat-llama2.sh
+0 −0 examples/server/chat.sh
+1,675 −1,101 examples/server/index.html.hpp
+231 −14 examples/server/public/index.html
+234 −56 examples/server/server.cpp
+2 −2 examples/train-text-from-scratch/train-text-from-scratch.cpp
+6 −6 flake.lock
+34 −20 flake.nix
+79 −65 ggml-alloc.c
+1 −1 ggml-alloc.h
+330 −69 ggml-cuda.cu
+13 −0 ggml-cuda.h
+87 −66 ggml-metal.m
+114 −11 ggml-metal.metal
+919 −74 ggml.c
+116 −11 ggml.h
+21 −0 gguf-py/LICENSE
+55 −0 gguf-py/README.md
+1 −0 gguf-py/gguf/__init__.py
+23 −14 gguf-py/gguf/gguf.py
+28 −0 gguf-py/pyproject.toml
+7 −0 gguf-py/tests/test_gguf.py
+91 −0 grammars/README.md
+110 −54 k_quants.c
+1,732 −676 llama.cpp
+47 −15 llama.h
+1 −0 requirements.txt
+0 −0 scripts/get-wikitext-2.sh
+0 −93 scripts/perf-run-all.sh
+0 −39 scripts/ppl-run-all.sh
+27 −0 scripts/qnt-all.sh
+31 −0 scripts/run-all-perf.sh
+27 −0 scripts/run-all-ppl.sh
+11 −9 scripts/sync-ggml.sh
+2 −1 tests/CMakeLists.txt
+12 −2 tests/test-tokenizer-0.cpp
+7 −22 tests/test-tokenizer-1.cpp
22 changes: 11 additions & 11 deletions scripts/ggml-metal.m.patch
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
--- ggml-metal-orig.m 2023-08-22 12:22:42
+++ ggml-metal.m 2023-08-22 12:22:43
@@ -218,13 +218,13 @@
--- ggml-metal-orig.m 2023-08-27 09:44:37
+++ ggml-metal.m 2023-08-27 09:46:07
@@ -226,13 +226,13 @@
#undef LM_GGML_METAL_ADD_KERNEL
}

- fprintf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
+ // fprintf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
fprintf(stderr, "%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
- fprintf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
+ // fprintf(stderr, "%s: recommendedMaxWorkingSetSize = %8.2f MB\n", __func__, ctx->device.recommendedMaxWorkingSetSize / 1024.0 / 1024.0);
fprintf(stderr, "%s: hasUnifiedMemory = %s\n", __func__, ctx->device.hasUnifiedMemory ? "true" : "false");
- if (ctx->device.maxTransferRate != 0) {
- fprintf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
- fprintf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
- } else {
- fprintf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__);
- fprintf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__);
- }
+ // if (ctx->device.maxTransferRate != 0) {
+ // fprintf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
+ // fprintf(stderr, "%s: maxTransferRate = %8.2f MB/s\n", __func__, ctx->device.maxTransferRate / 1024.0 / 1024.0);
+ // } else {
+ // fprintf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__);
+ // fprintf(stderr, "%s: maxTransferRate = built-in GPU\n", __func__);
+ // }

return ctx;
}
@@ -366,15 +366,15 @@
@@ -374,15 +374,15 @@
}
}

Expand Down

0 comments on commit 8eee8ea

Please sign in to comment.