Skip to content

Commit

Permalink
feat: sync llama.cpp (#13)
Browse files Browse the repository at this point in the history
* feat: sync llama.cpp

* chore: regen patch

* fix: copy build-info.h

* fix: llama_token_to_str -> llama_token_to_piece

* fix: build-info.h gen
  • Loading branch information
jhen0409 authored Aug 29, 2023
1 parent 13b0a15 commit a7827f7
Show file tree
Hide file tree
Showing 6 changed files with 17 additions and 12 deletions.
2 changes: 1 addition & 1 deletion android/src/main/jni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ Java_com_rnllama_LlamaContext_doCompletion(
if (token_with_probs.tok == -1 || llama->multibyte_pending > 0) {
continue;
}
const std::string token_text = llama_token_to_str(llama->ctx, token_with_probs.tok);
const std::string token_text = llama_token_to_piece(llama->ctx, token_with_probs.tok);

size_t pos = std::min(sent_count, llama->generated_text.size());

Expand Down
10 changes: 5 additions & 5 deletions cpp/rn-llama.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ static size_t find_partial_stop_string(const std::string &stop,
// format incomplete utf-8 multibyte character for output
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
{
std::string out = token == -1 ? "" : llama_token_to_str(ctx, token);
std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
// if the size is 1 and first bit is 1, meaning it's a partial character
// (size > 1 meaning it's already a known token)
if (out.size() == 1 && (out[0] & 0x80) == 0x80)
Expand All @@ -120,7 +120,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
std::string ret;
for (; begin != end; ++begin)
{
ret += llama_token_to_str(ctx, *begin);
ret += llama_token_to_piece(ctx, *begin);
}
return ret;
}
Expand Down Expand Up @@ -456,7 +456,7 @@ struct llama_rn_context

if (!embd.empty() && embd.back() == llama_token_eos(ctx))
{
// stopping_word = llama_token_to_str(ctx, embd.back());
// stopping_word = llama_token_to_piece(ctx, embd.back());
has_next_token = false;
stopped_eos = true;
LOG_VERBOSE("eos token found", "");
Expand Down Expand Up @@ -503,7 +503,7 @@ struct llama_rn_context
{
const completion_token_output token_with_probs = nextToken();

const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_str(ctx, token_with_probs.tok);
const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_piece(ctx, token_with_probs.tok);
generated_text += token_text;

if (params.n_probs > 0)
Expand Down Expand Up @@ -551,7 +551,7 @@ struct llama_rn_context
}

LOG_VERBOSE("next token, token: %s, token_text: %s, has_next_token: %d, n_remain: %d, num_tokens_predicted: %d, stopped_eos: %d, stopped_word: %d, stopped_limit: %d, stopping_word: %s",
llama_token_to_str(ctx, token_with_probs.tok),
llama_token_to_piece(ctx, token_with_probs.tok),
tokens_to_output_formatted_string(ctx, token_with_probs.tok).c_str(),
has_next_token,
n_remain,
Expand Down
2 changes: 1 addition & 1 deletion ios/RNLlamaContext.mm
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ - (NSDictionary *)completion:(NSDictionary *)params
if (token_with_probs.tok == -1 || llama->multibyte_pending > 0) {
continue;
}
const std::string token_text = llama_token_to_str(llama->ctx, token_with_probs.tok);
const std::string token_text = llama_token_to_piece(llama->ctx, token_with_probs.tok);

size_t pos = std::min(sent_count, llama->generated_text.size());

Expand Down
2 changes: 1 addition & 1 deletion llama.cpp
Submodule llama.cpp updated 49 files
+35 −9 .devops/llama-cpp-clblast.srpm.spec
+25 −1 .devops/llama-cpp-cublas.srpm.spec
+36 −9 .devops/llama-cpp.srpm.spec
+4 −1 .gitignore
+10 −4 Makefile
+4 −0 README.md
+118 −22 ci/run.sh
+351 −12 common/common.cpp
+39 −1 common/common.h
+3 −3 examples/beam_search/beam_search.cpp
+2 −6 examples/convert-llama2c-to-ggml/README.md
+218 −123 examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+1 −1 examples/embd-input/embd-input-lib.cpp
+1 −4 examples/embedding/embedding.cpp
+5 −0 examples/gguf/CMakeLists.txt
+3 −0 examples/gguf/gguf.cpp
+6 −1 examples/llama-bench/llama-bench.cpp
+82 −14 examples/main/main.cpp
+121 −28 examples/perplexity/perplexity.cpp
+2 −2 examples/quantize/quantize.cpp
+2 −2 examples/save-load-state/save-load-state.cpp
+8 −10 examples/server/server.cpp
+2 −2 examples/simple/simple.cpp
+7 −7 examples/train-text-from-scratch/README.md
+492 −0 examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py
+1,152 −2,250 examples/train-text-from-scratch/train-text-from-scratch.cpp
+7 −3 ggml-alloc.c
+23 −11 ggml-cuda.cu
+1 −0 ggml-metal.h
+81 −19 ggml-metal.m
+315 −206 ggml.c
+41 −18 ggml.h
+29 −7 gguf-py/gguf/gguf.py
+2 −2 k_quants.c
+120 −48 llama.cpp
+10 −5 llama.h
+140 −0 run_with_preset.py
+26 −0 scripts/convert-gg.sh
+2 −0 scripts/qnt-all.sh
+2 −0 scripts/run-all-perf.sh
+2 −0 scripts/run-all-ppl.sh
+4 −2 tests/CMakeLists.txt
+37 −17 tests/test-grad0.cpp
+178 −0 tests/test-tokenizer-0-falcon.cpp
+83 −0 tests/test-tokenizer-0-falcon.py
+182 −0 tests/test-tokenizer-0-llama.cpp
+95 −0 tests/test-tokenizer-0-llama.py
+0 −141 tests/test-tokenizer-0.cpp
+3 −11 tests/test-tokenizer-1.cpp
5 changes: 5 additions & 0 deletions scripts/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
git submodule init
git submodule update --recursive

cd llama.cpp
./scripts/build-info.sh > build-info.h
cd -

cp ./llama.cpp/build-info.h ./cpp/build-info.h
cp ./llama.cpp/ggml.h ./cpp/ggml.h
cp ./llama.cpp/ggml.c ./cpp/ggml.c
cp ./llama.cpp/ggml-metal.h ./cpp/ggml-metal.h
Expand Down
8 changes: 4 additions & 4 deletions scripts/ggml-metal.m.patch
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
--- ggml-metal-orig.m 2023-08-27 09:44:37
+++ ggml-metal.m 2023-08-27 09:46:07
@@ -226,13 +226,13 @@
--- ggml-metal-orig.m 2023-08-29 10:32:31
+++ ggml-metal.m 2023-08-29 10:32:32
@@ -230,13 +230,13 @@
#undef LM_GGML_METAL_ADD_KERNEL
}

Expand All @@ -20,7 +20,7 @@

return ctx;
}
@@ -374,15 +374,15 @@
@@ -436,15 +436,15 @@
}
}

Expand Down

0 comments on commit a7827f7

Please sign in to comment.