[auto] Sync version 2405231813.0.0+llamacpp-release.b2985

== Relevant log messages from source repo: commit 1debe72737ea131cb52975da3d53ed3a835df3a6 Author: Georgi Gerganov <[email protected]> Date: Thu May 23 17:17:43 2024 +0300 ggml : silence UB sanitizer error during iq2_xxs quantization (#0) commit 007489e895bad02e4e54758bf0bdf2d6a4cdb7c1 Author: Tristan Druyen <[email protected]> Date: Thu May 23 16:15:15 2024 +0200 Fix phi3 chat template confusion with zephyr (#7449) * Fix phi3 template matching vs zephyr * Add regression test for new phi3 chat template * Implement review suggestions * Fix phi3 jinja test templates & match by <|end|> * Apply suggestion Co-authored-by: Xuan Son Nguyen <[email protected]> * Add all phi3 template variants in tests * Remove unneeded message trimming Co-authored-by: Xuan Son Nguyen <[email protected]> * Fix tests to not expect trimmed messages --------- Co-authored-by: Xuan Son Nguyen <[email protected]> commit 3015851c5ac7334fb544a23a70a284c117b87044 Author: Daniel Bevenius <[email protected]> Date: Thu May 23 14:29:26 2024 +0200 llama : add getters for n_threads/n_threads_batch (#7464) * llama : add getters for n_threads/n_threads_batch This commit adds two new functions to the llama API. The functions can be used to get the number of threads used for generating a single token and the number of threads used for prompt and batch processing (multiple tokens). The motivation for this is that we want to be able to get the number of threads that the a context is using. The main use case is for a testing/verification that the number of threads is set correctly. Signed-off-by: Daniel Bevenius <[email protected]> * squash! llama : add getters for n_threads/n_threads_batch Rename the getters to llama_n_threads and llama_n_threads_batch. Signed-off-by: Daniel Bevenius <[email protected]> --------- Signed-off-by: Daniel Bevenius <[email protected]> commit 55ac3b7aeaf52f19786ed96e885d89521fc0f6c8 Author: Georgi Gerganov <[email protected]> Date: Thu May 23 15:28:14 2024 +0300 ci : use Pythia models instead of OpenLlama (#7470) * ci : start using Pythia models over OpenLlama ggml-ci * ci : disable q2_k ppl tests * ci : use convert-hf-to-gguf.py * ci : update gg_get_model * ci : fix convert outfile name ggml-ci * llama : gptneox arch use F32 attn prec ggml-ci
KerfuffleV2 · May 23, 2024 · 7296be6 · 7296be6
1 parent d7a3bbd
commit 7296be6
Show file tree

Hide file tree

Showing 5 changed files with 6 additions and 4 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "ggml-sys-bleedingedge"
-version = "2405231219.0.0+llamacpp-release.b2979"
+version = "2405231813.0.0+llamacpp-release.b2985"
 description = "Bleeding edge low-level bindings to GGML. "
 repository = "https://github.com/KerfuffleV2/ggml-sys-bleedingedge"
 keywords = ["deep-learning", "machine-learning", "tensors", "ggml", "ml"]

diff --git a/VERSION.txt b/VERSION.txt
@@ -1 +1 @@
-2405231219.0.0+llamacpp-release.b2979
+2405231813.0.0+llamacpp-release.b2985
diff --git a/ggml-tag-current.txt b/ggml-tag-current.txt
@@ -1 +1 @@
-b2979
+b2985
diff --git a/ggml-tag-previous.txt b/ggml-tag-previous.txt
@@ -1 +1 @@
-b2968
+b2979
diff --git a/src/lib.rs b/src/lib.rs
@@ -5474,6 +5474,8 @@ extern "C" {
     pub fn llama_batch_free(batch: llama_batch);
     pub fn llama_decode(ctx: *mut llama_context, batch: llama_batch) -> i32;
     pub fn llama_set_n_threads(ctx: *mut llama_context, n_threads: u32, n_threads_batch: u32);
+    pub fn llama_n_threads(ctx: *mut llama_context) -> u32;
+    pub fn llama_n_threads_batch(ctx: *mut llama_context) -> u32;
     pub fn llama_set_causal_attn(ctx: *mut llama_context, causal_attn: bool);
     pub fn llama_set_abort_callback(
         ctx: *mut llama_context,
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		2405231219.0.0+llamacpp-release.b2979
		2405231813.0.0+llamacpp-release.b2985