feat: sync llama.cpp (#13)

* feat: sync llama.cpp * chore: regen patch * fix: copy build-info.h * fix: llama_token_to_str -> llama_token_to_piece * fix: build-info.h gen
mybigday · Aug 29, 2023 · a7827f7 · a7827f7
1 parent 13b0a15
commit a7827f7
Show file tree

Hide file tree

Showing 6 changed files with 17 additions and 12 deletions.
diff --git a/android/src/main/jni.cpp b/android/src/main/jni.cpp
@@ -320,7 +320,7 @@ Java_com_rnllama_LlamaContext_doCompletion(
         if (token_with_probs.tok == -1 || llama->multibyte_pending > 0) {
             continue;
         }
-        const std::string token_text = llama_token_to_str(llama->ctx, token_with_probs.tok);
+        const std::string token_text = llama_token_to_piece(llama->ctx, token_with_probs.tok);
 
         size_t pos = std::min(sent_count, llama->generated_text.size());
 

diff --git a/cpp/rn-llama.hpp b/cpp/rn-llama.hpp
@@ -101,7 +101,7 @@ static size_t find_partial_stop_string(const std::string &stop,
 // format incomplete utf-8 multibyte character for output
 static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
 {
-    std::string out = token == -1 ? "" : llama_token_to_str(ctx, token);
+    std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
     // if the size is 1 and first bit is 1, meaning it's a partial character
     //   (size > 1 meaning it's already a known token)
     if (out.size() == 1 && (out[0] & 0x80) == 0x80)
@@ -120,7 +120,7 @@ static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
     std::string ret;
     for (; begin != end; ++begin)
     {
-        ret += llama_token_to_str(ctx, *begin);
+        ret += llama_token_to_piece(ctx, *begin);
     }
     return ret;
 }
@@ -456,7 +456,7 @@ struct llama_rn_context
 
         if (!embd.empty() && embd.back() == llama_token_eos(ctx))
         {
-            // stopping_word = llama_token_to_str(ctx, embd.back());
+            // stopping_word = llama_token_to_piece(ctx, embd.back());
             has_next_token = false;
             stopped_eos = true;
             LOG_VERBOSE("eos token found", "");
@@ -503,7 +503,7 @@ struct llama_rn_context
     {
         const completion_token_output token_with_probs = nextToken();
 
-        const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_str(ctx, token_with_probs.tok);
+        const std::string token_text = token_with_probs.tok == -1 ? "" : llama_token_to_piece(ctx, token_with_probs.tok);
         generated_text += token_text;
 
         if (params.n_probs > 0)
@@ -551,7 +551,7 @@ struct llama_rn_context
         }
 
         LOG_VERBOSE("next token, token: %s, token_text: %s, has_next_token: %d, n_remain: %d, num_tokens_predicted: %d, stopped_eos: %d, stopped_word: %d, stopped_limit: %d, stopping_word: %s",
-            llama_token_to_str(ctx, token_with_probs.tok),
+            llama_token_to_piece(ctx, token_with_probs.tok),
             tokens_to_output_formatted_string(ctx, token_with_probs.tok).c_str(),
             has_next_token,
             n_remain,

diff --git a/ios/RNLlamaContext.mm b/ios/RNLlamaContext.mm
@@ -207,7 +207,7 @@ - (NSDictionary *)completion:(NSDictionary *)params
         if (token_with_probs.tok == -1 || llama->multibyte_pending > 0) {
             continue;
         }
-        const std::string token_text = llama_token_to_str(llama->ctx, token_with_probs.tok);
+        const std::string token_text = llama_token_to_piece(llama->ctx, token_with_probs.tok);
 
         size_t pos = std::min(sent_count, llama->generated_text.size());
 

diff --git a/llama.cpp b/llama.cpp
diff --git a/scripts/bootstrap.sh b/scripts/bootstrap.sh
@@ -3,6 +3,11 @@
 git submodule init
 git submodule update --recursive
 
+cd llama.cpp
+./scripts/build-info.sh > build-info.h
+cd -
+
+cp ./llama.cpp/build-info.h ./cpp/build-info.h
 cp ./llama.cpp/ggml.h ./cpp/ggml.h
 cp ./llama.cpp/ggml.c ./cpp/ggml.c
 cp ./llama.cpp/ggml-metal.h ./cpp/ggml-metal.h

diff --git a/scripts/ggml-metal.m.patch b/scripts/ggml-metal.m.patch
@@ -1,6 +1,6 @@
---- ggml-metal-orig.m	2023-08-27 09:44:37
-+++ ggml-metal.m	2023-08-27 09:46:07
-@@ -226,13 +226,13 @@
+--- ggml-metal-orig.m	2023-08-29 10:32:31
++++ ggml-metal.m	2023-08-29 10:32:32
+@@ -230,13 +230,13 @@
  #undef LM_GGML_METAL_ADD_KERNEL
      }
 
@@ -20,7 +20,7 @@
 
      return ctx;
  }
-@@ -374,15 +374,15 @@
+@@ -436,15 +436,15 @@
              }
          }