[auto] Sync version 2405231219.0.0+llamacpp-release.b2979

== Relevant log messages from source repo: commit 9b82476ee9e73065a759f8bcc4cf27ec7ab2ed8c Author: fairydreaming <[email protected]> Date: Thu May 23 11:49:53 2024 +0200 Add missing inference support for GPTNeoXForCausalLM (Pythia and GPT-NeoX base models) (#7461) * convert-hf : add conversion of bloom-style qkv tensor to gpt-style qkv (code borrowed from BloomModel) * llama : add inference support for LLM_ARCH_GPTNEOX * llama : add model types for every Pythia variant and GPT-NeoX Co-authored-by: Stanisław Szymczyk <[email protected]> commit a61a94e543e3c6877c087e80fca27a0313ce5fd5 Author: Georgi Gerganov <[email protected]> Date: Thu May 23 12:38:18 2024 +0300 llama : rename n_ctx -> cache.size, less confusing (#0) commit d48c88cbd563b6cf0ce972e2f56796896e240736 Author: Georgi Gerganov <[email protected]> Date: Thu May 23 10:00:44 2024 +0300 ggml : remove ggml_flash_attn and ggml_flash_ff (#7463) ggml-ci commit e84b71c2c6da6e69c8f815168ea836f9716a325e Author: Georgi Gerganov <[email protected]> Date: Thu May 23 10:00:21 2024 +0300 ggml : drop support for QK_K=64 (#7473) * ggml : drop support for QK_K=64 ggml-ci * opencl : restore QK_K=256 define commit 1b1e27cb49158123ef4902aa41eb368c9e76e6a1 Author: 0cc4m <[email protected]> Date: Thu May 23 08:59:59 2024 +0200 Update vulkan rope implementation to support frequency factors (#7475)
KerfuffleV2 · May 23, 2024 · d7a3bbd · d7a3bbd
1 parent 6ab87f5
commit d7a3bbd
Show file tree

Hide file tree

Showing 5 changed files with 24 additions and 41 deletions.
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "ggml-sys-bleedingedge"
-version = "2405221814.0.0+llamacpp-release.b2968"
+version = "2405231219.0.0+llamacpp-release.b2979"
 description = "Bleeding edge low-level bindings to GGML. "
 repository = "https://github.com/KerfuffleV2/ggml-sys-bleedingedge"
 keywords = ["deep-learning", "machine-learning", "tensors", "ggml", "ml"]

diff --git a/VERSION.txt b/VERSION.txt
@@ -1 +1 @@
-2405221814.0.0+llamacpp-release.b2968
+2405231219.0.0+llamacpp-release.b2979
diff --git a/ggml-tag-current.txt b/ggml-tag-current.txt
@@ -1 +1 @@
-b2968
+b2979
diff --git a/ggml-tag-previous.txt b/ggml-tag-previous.txt
@@ -1 +1 @@
-b2965
+b2968
diff --git a/src/lib.rs b/src/lib.rs
@@ -728,28 +728,26 @@ pub const ggml_op_GGML_OP_ARANGE: ggml_op = 51;
 pub const ggml_op_GGML_OP_TIMESTEP_EMBEDDING: ggml_op = 52;
 pub const ggml_op_GGML_OP_ARGSORT: ggml_op = 53;
 pub const ggml_op_GGML_OP_LEAKY_RELU: ggml_op = 54;
-pub const ggml_op_GGML_OP_FLASH_ATTN: ggml_op = 55;
-pub const ggml_op_GGML_OP_FLASH_ATTN_EXT: ggml_op = 56;
-pub const ggml_op_GGML_OP_FLASH_FF: ggml_op = 57;
-pub const ggml_op_GGML_OP_FLASH_ATTN_BACK: ggml_op = 58;
-pub const ggml_op_GGML_OP_SSM_CONV: ggml_op = 59;
-pub const ggml_op_GGML_OP_SSM_SCAN: ggml_op = 60;
-pub const ggml_op_GGML_OP_WIN_PART: ggml_op = 61;
-pub const ggml_op_GGML_OP_WIN_UNPART: ggml_op = 62;
-pub const ggml_op_GGML_OP_GET_REL_POS: ggml_op = 63;
-pub const ggml_op_GGML_OP_ADD_REL_POS: ggml_op = 64;
-pub const ggml_op_GGML_OP_UNARY: ggml_op = 65;
-pub const ggml_op_GGML_OP_MAP_UNARY: ggml_op = 66;
-pub const ggml_op_GGML_OP_MAP_BINARY: ggml_op = 67;
-pub const ggml_op_GGML_OP_MAP_CUSTOM1_F32: ggml_op = 68;
-pub const ggml_op_GGML_OP_MAP_CUSTOM2_F32: ggml_op = 69;
-pub const ggml_op_GGML_OP_MAP_CUSTOM3_F32: ggml_op = 70;
-pub const ggml_op_GGML_OP_MAP_CUSTOM1: ggml_op = 71;
-pub const ggml_op_GGML_OP_MAP_CUSTOM2: ggml_op = 72;
-pub const ggml_op_GGML_OP_MAP_CUSTOM3: ggml_op = 73;
-pub const ggml_op_GGML_OP_CROSS_ENTROPY_LOSS: ggml_op = 74;
-pub const ggml_op_GGML_OP_CROSS_ENTROPY_LOSS_BACK: ggml_op = 75;
-pub const ggml_op_GGML_OP_COUNT: ggml_op = 76;
+pub const ggml_op_GGML_OP_FLASH_ATTN_EXT: ggml_op = 55;
+pub const ggml_op_GGML_OP_FLASH_ATTN_BACK: ggml_op = 56;
+pub const ggml_op_GGML_OP_SSM_CONV: ggml_op = 57;
+pub const ggml_op_GGML_OP_SSM_SCAN: ggml_op = 58;
+pub const ggml_op_GGML_OP_WIN_PART: ggml_op = 59;
+pub const ggml_op_GGML_OP_WIN_UNPART: ggml_op = 60;
+pub const ggml_op_GGML_OP_GET_REL_POS: ggml_op = 61;
+pub const ggml_op_GGML_OP_ADD_REL_POS: ggml_op = 62;
+pub const ggml_op_GGML_OP_UNARY: ggml_op = 63;
+pub const ggml_op_GGML_OP_MAP_UNARY: ggml_op = 64;
+pub const ggml_op_GGML_OP_MAP_BINARY: ggml_op = 65;
+pub const ggml_op_GGML_OP_MAP_CUSTOM1_F32: ggml_op = 66;
+pub const ggml_op_GGML_OP_MAP_CUSTOM2_F32: ggml_op = 67;
+pub const ggml_op_GGML_OP_MAP_CUSTOM3_F32: ggml_op = 68;
+pub const ggml_op_GGML_OP_MAP_CUSTOM1: ggml_op = 69;
+pub const ggml_op_GGML_OP_MAP_CUSTOM2: ggml_op = 70;
+pub const ggml_op_GGML_OP_MAP_CUSTOM3: ggml_op = 71;
+pub const ggml_op_GGML_OP_CROSS_ENTROPY_LOSS: ggml_op = 72;
+pub const ggml_op_GGML_OP_CROSS_ENTROPY_LOSS_BACK: ggml_op = 73;
+pub const ggml_op_GGML_OP_COUNT: ggml_op = 74;
 pub const ggml_unary_op_GGML_UNARY_OP_ABS: ggml_unary_op = 0;
 pub const ggml_unary_op_GGML_UNARY_OP_SGN: ggml_unary_op = 1;
 pub const ggml_unary_op_GGML_UNARY_OP_NEG: ggml_unary_op = 2;
@@ -4852,13 +4850,6 @@ extern "C" {
         a: *mut ggml_tensor,
         k: ::std::os::raw::c_int,
     ) -> *mut ggml_tensor;
-    pub fn ggml_flash_attn(
-        ctx: *mut ggml_context,
-        q: *mut ggml_tensor,
-        k: *mut ggml_tensor,
-        v: *mut ggml_tensor,
-        masked: bool,
-    ) -> *mut ggml_tensor;
     pub fn ggml_flash_attn_ext(
         ctx: *mut ggml_context,
         q: *mut ggml_tensor,
@@ -4877,14 +4868,6 @@ extern "C" {
         d: *mut ggml_tensor,
         masked: bool,
     ) -> *mut ggml_tensor;
-    pub fn ggml_flash_ff(
-        ctx: *mut ggml_context,
-        a: *mut ggml_tensor,
-        b0: *mut ggml_tensor,
-        b1: *mut ggml_tensor,
-        c0: *mut ggml_tensor,
-        c1: *mut ggml_tensor,
-    ) -> *mut ggml_tensor;
     pub fn ggml_ssm_conv(
         ctx: *mut ggml_context,
         s: *mut ggml_tensor,
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		2405221814.0.0+llamacpp-release.b2968
		2405231219.0.0+llamacpp-release.b2979