Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: sync llama.cpp #13

Merged
merged 6 commits into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion android/src/main/jni.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -320,7 +320,7 @@ Java_com_rnllama_LlamaContext_doCompletion(
if (token_with_probs.tok == -1 || llama->multibyte_pending > 0) {
continue;
}
const std::string token_text = llama_token_to_str(llama->ctx, token_with_probs.tok);
const std::string token_text = llama_token_to_piece(llama->ctx, token_with_probs.tok);

size_t pos = std::min(sent_count, llama->generated_text.size());

Expand Down
10 changes: 5 additions & 5 deletions cpp/rn-llama.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ static size_t find_partial_stop_string(const std::string &stop,
// format incomplete utf-8 multibyte character for output
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
{
std::string out = token == -1 ? "" : llama_token_to_str(ctx, token);
std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
// if the size is 1 and first bit is 1, meaning it's a partial character
// (size > 1 meaning it's already a known token)
if (out.size() == 1 && (out[0] & 0x80) == 0x80)
Expand All @@ -120,7 +120,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
std::string ret;
for (; begin != end; ++begin)
{
ret += llama_token_to_str(ctx, *begin);
ret += llama_token_to_piece(ctx, *begin);
}
return ret;
}
Expand Down Expand Up @@ -456,7 +456,7 @@ struct llama_rn_context

if (!embd.empty() && embd.back() == llama_token_eos(ctx))
{
// stopping_word = llama_token_to_str(ctx, embd.back());
// stopping_word = llama_token_to_piece(ctx, embd.back());
has_next_token = false;
stopped_eos = true;
LOG_VERBOSE("eos token found", "");
Expand Down Expand Up @@ -503,7 +503,7 @@ struct llama_rn_context
{
const completion_token_output token_with_probs = nextToken();

const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_str(ctx, token_with_probs.tok);
const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_piece(ctx, token_with_probs.tok);
generated_text += token_text;

if (params.n_probs > 0)
Expand Down Expand Up @@ -551,7 +551,7 @@ struct llama_rn_context
}

LOG_VERBOSE("next token, token: %s, token_text: %s, has_next_token: %d, n_remain: %d, num_tokens_predicted: %d, stopped_eos: %d, stopped_word: %d, stopped_limit: %d, stopping_word: %s",
llama_token_to_str(ctx, token_with_probs.tok),
llama_token_to_piece(ctx, token_with_probs.tok),
tokens_to_output_formatted_string(ctx, token_with_probs.tok).c_str(),
has_next_token,
n_remain,
Expand Down
2 changes: 1 addition & 1 deletion ios/RNLlamaContext.mm
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ - (NSDictionary *)completion:(NSDictionary *)params
if (token_with_probs.tok == -1 || llama->multibyte_pending > 0) {
continue;
}
const std::string token_text = llama_token_to_str(llama->ctx, token_with_probs.tok);
const std::string token_text = llama_token_to_piece(llama->ctx, token_with_probs.tok);

size_t pos = std::min(sent_count, llama->generated_text.size());

Expand Down
2 changes: 1 addition & 1 deletion llama.cpp
Submodule llama.cpp updated 49 files
+35 −9 .devops/llama-cpp-clblast.srpm.spec
+25 −1 .devops/llama-cpp-cublas.srpm.spec
+36 −9 .devops/llama-cpp.srpm.spec
+4 −1 .gitignore
+10 −4 Makefile
+4 −0 README.md
+118 −22 ci/run.sh
+351 −12 common/common.cpp
+39 −1 common/common.h
+3 −3 examples/beam_search/beam_search.cpp
+2 −6 examples/convert-llama2c-to-ggml/README.md
+218 −123 examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp
+1 −1 examples/embd-input/embd-input-lib.cpp
+1 −4 examples/embedding/embedding.cpp
+5 −0 examples/gguf/CMakeLists.txt
+3 −0 examples/gguf/gguf.cpp
+6 −1 examples/llama-bench/llama-bench.cpp
+82 −14 examples/main/main.cpp
+121 −28 examples/perplexity/perplexity.cpp
+2 −2 examples/quantize/quantize.cpp
+2 −2 examples/save-load-state/save-load-state.cpp
+8 −10 examples/server/server.cpp
+2 −2 examples/simple/simple.cpp
+7 −7 examples/train-text-from-scratch/README.md
+492 −0 examples/train-text-from-scratch/convert-train-checkpoint-to-gguf.py
+1,152 −2,250 examples/train-text-from-scratch/train-text-from-scratch.cpp
+7 −3 ggml-alloc.c
+23 −11 ggml-cuda.cu
+1 −0 ggml-metal.h
+81 −19 ggml-metal.m
+315 −206 ggml.c
+41 −18 ggml.h
+29 −7 gguf-py/gguf/gguf.py
+2 −2 k_quants.c
+120 −48 llama.cpp
+10 −5 llama.h
+140 −0 run_with_preset.py
+26 −0 scripts/convert-gg.sh
+2 −0 scripts/qnt-all.sh
+2 −0 scripts/run-all-perf.sh
+2 −0 scripts/run-all-ppl.sh
+4 −2 tests/CMakeLists.txt
+37 −17 tests/test-grad0.cpp
+178 −0 tests/test-tokenizer-0-falcon.cpp
+83 −0 tests/test-tokenizer-0-falcon.py
+182 −0 tests/test-tokenizer-0-llama.cpp
+95 −0 tests/test-tokenizer-0-llama.py
+0 −141 tests/test-tokenizer-0.cpp
+3 −11 tests/test-tokenizer-1.cpp
5 changes: 5 additions & 0 deletions scripts/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
git submodule init
git submodule update --recursive

cd llama.cpp
./scripts/build-info.sh > build-info.h
cd -

cp ./llama.cpp/build-info.h ./cpp/build-info.h
cp ./llama.cpp/ggml.h ./cpp/ggml.h
cp ./llama.cpp/ggml.c ./cpp/ggml.c
cp ./llama.cpp/ggml-metal.h ./cpp/ggml-metal.h
Expand Down
8 changes: 4 additions & 4 deletions scripts/ggml-metal.m.patch
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
--- ggml-metal-orig.m 2023-08-27 09:44:37
+++ ggml-metal.m 2023-08-27 09:46:07
@@ -226,13 +226,13 @@
--- ggml-metal-orig.m 2023-08-29 10:32:31
+++ ggml-metal.m 2023-08-29 10:32:32
@@ -230,13 +230,13 @@
#undef LM_GGML_METAL_ADD_KERNEL
}

Expand All @@ -20,7 +20,7 @@

return ctx;
}
@@ -374,15 +374,15 @@
@@ -436,15 +436,15 @@
}
}

Expand Down
Loading