From d7a3bbd5b548b5c878f9b973166ed2dd92e15517 Mon Sep 17 00:00:00 2001
From: github-actions <github-actions@github.com>
Date: Thu, 23 May 2024 12:20:31 +0000
Subject: [PATCH] [auto] Sync version 2405231219.0.0+llamacpp-release.b2979
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

== Relevant log messages from source repo:

commit 9b82476ee9e73065a759f8bcc4cf27ec7ab2ed8c
Author: fairydreaming <166155368+fairydreaming@users.noreply.github.com>
Date:   Thu May 23 11:49:53 2024 +0200

    Add missing inference support for GPTNeoXForCausalLM (Pythia and GPT-NeoX base models) (#7461)

    * convert-hf : add conversion of bloom-style qkv tensor to gpt-style qkv (code borrowed from BloomModel)

    * llama : add inference support for LLM_ARCH_GPTNEOX

    * llama : add model types for every Pythia variant and GPT-NeoX

    Co-authored-by: Stanisław Szymczyk <sszymczy@gmail.com>

commit a61a94e543e3c6877c087e80fca27a0313ce5fd5
Author: Georgi Gerganov <ggerganov@gmail.com>
Date:   Thu May 23 12:38:18 2024 +0300

    llama : rename n_ctx -> cache.size, less confusing (#0)

commit d48c88cbd563b6cf0ce972e2f56796896e240736
Author: Georgi Gerganov <ggerganov@gmail.com>
Date:   Thu May 23 10:00:44 2024 +0300

    ggml : remove ggml_flash_attn and ggml_flash_ff (#7463)

    ggml-ci

commit e84b71c2c6da6e69c8f815168ea836f9716a325e
Author: Georgi Gerganov <ggerganov@gmail.com>
Date:   Thu May 23 10:00:21 2024 +0300

    ggml : drop support for QK_K=64 (#7473)

    * ggml : drop support for QK_K=64

    ggml-ci

    * opencl : restore QK_K=256 define

commit 1b1e27cb49158123ef4902aa41eb368c9e76e6a1
Author: 0cc4m <picard12@live.de>
Date:   Thu May 23 08:59:59 2024 +0200

    Update vulkan rope implementation to support frequency factors (#7475)
---
 Cargo.toml            |  2 +-
 VERSION.txt           |  2 +-
 ggml-tag-current.txt  |  2 +-
 ggml-tag-previous.txt |  2 +-
 src/lib.rs            | 57 +++++++++++++++----------------------------
 5 files changed, 24 insertions(+), 41 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 342e54e..86b094f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "ggml-sys-bleedingedge"
-version = "2405221814.0.0+llamacpp-release.b2968"
+version = "2405231219.0.0+llamacpp-release.b2979"
 description = "Bleeding edge low-level bindings to GGML. "
 repository = "https://github.com/KerfuffleV2/ggml-sys-bleedingedge"
 keywords = ["deep-learning", "machine-learning", "tensors", "ggml", "ml"]
diff --git a/VERSION.txt b/VERSION.txt
index 4ca95e9..ab9cc17 100644
--- a/VERSION.txt
+++ b/VERSION.txt
@@ -1 +1 @@
-2405221814.0.0+llamacpp-release.b2968
+2405231219.0.0+llamacpp-release.b2979
diff --git a/ggml-tag-current.txt b/ggml-tag-current.txt
index cafd479..ab62181 100644
--- a/ggml-tag-current.txt
+++ b/ggml-tag-current.txt
@@ -1 +1 @@
-b2968
+b2979
diff --git a/ggml-tag-previous.txt b/ggml-tag-previous.txt
index 4bd6edd..cafd479 100644
--- a/ggml-tag-previous.txt
+++ b/ggml-tag-previous.txt
@@ -1 +1 @@
-b2965
+b2968
diff --git a/src/lib.rs b/src/lib.rs
index dee3e19..a16f5d2 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -728,28 +728,26 @@ pub const ggml_op_GGML_OP_ARANGE: ggml_op = 51;
 pub const ggml_op_GGML_OP_TIMESTEP_EMBEDDING: ggml_op = 52;
 pub const ggml_op_GGML_OP_ARGSORT: ggml_op = 53;
 pub const ggml_op_GGML_OP_LEAKY_RELU: ggml_op = 54;
-pub const ggml_op_GGML_OP_FLASH_ATTN: ggml_op = 55;
-pub const ggml_op_GGML_OP_FLASH_ATTN_EXT: ggml_op = 56;
-pub const ggml_op_GGML_OP_FLASH_FF: ggml_op = 57;
-pub const ggml_op_GGML_OP_FLASH_ATTN_BACK: ggml_op = 58;
-pub const ggml_op_GGML_OP_SSM_CONV: ggml_op = 59;
-pub const ggml_op_GGML_OP_SSM_SCAN: ggml_op = 60;
-pub const ggml_op_GGML_OP_WIN_PART: ggml_op = 61;
-pub const ggml_op_GGML_OP_WIN_UNPART: ggml_op = 62;
-pub const ggml_op_GGML_OP_GET_REL_POS: ggml_op = 63;
-pub const ggml_op_GGML_OP_ADD_REL_POS: ggml_op = 64;
-pub const ggml_op_GGML_OP_UNARY: ggml_op = 65;
-pub const ggml_op_GGML_OP_MAP_UNARY: ggml_op = 66;
-pub const ggml_op_GGML_OP_MAP_BINARY: ggml_op = 67;
-pub const ggml_op_GGML_OP_MAP_CUSTOM1_F32: ggml_op = 68;
-pub const ggml_op_GGML_OP_MAP_CUSTOM2_F32: ggml_op = 69;
-pub const ggml_op_GGML_OP_MAP_CUSTOM3_F32: ggml_op = 70;
-pub const ggml_op_GGML_OP_MAP_CUSTOM1: ggml_op = 71;
-pub const ggml_op_GGML_OP_MAP_CUSTOM2: ggml_op = 72;
-pub const ggml_op_GGML_OP_MAP_CUSTOM3: ggml_op = 73;
-pub const ggml_op_GGML_OP_CROSS_ENTROPY_LOSS: ggml_op = 74;
-pub const ggml_op_GGML_OP_CROSS_ENTROPY_LOSS_BACK: ggml_op = 75;
-pub const ggml_op_GGML_OP_COUNT: ggml_op = 76;
+pub const ggml_op_GGML_OP_FLASH_ATTN_EXT: ggml_op = 55;
+pub const ggml_op_GGML_OP_FLASH_ATTN_BACK: ggml_op = 56;
+pub const ggml_op_GGML_OP_SSM_CONV: ggml_op = 57;
+pub const ggml_op_GGML_OP_SSM_SCAN: ggml_op = 58;
+pub const ggml_op_GGML_OP_WIN_PART: ggml_op = 59;
+pub const ggml_op_GGML_OP_WIN_UNPART: ggml_op = 60;
+pub const ggml_op_GGML_OP_GET_REL_POS: ggml_op = 61;
+pub const ggml_op_GGML_OP_ADD_REL_POS: ggml_op = 62;
+pub const ggml_op_GGML_OP_UNARY: ggml_op = 63;
+pub const ggml_op_GGML_OP_MAP_UNARY: ggml_op = 64;
+pub const ggml_op_GGML_OP_MAP_BINARY: ggml_op = 65;
+pub const ggml_op_GGML_OP_MAP_CUSTOM1_F32: ggml_op = 66;
+pub const ggml_op_GGML_OP_MAP_CUSTOM2_F32: ggml_op = 67;
+pub const ggml_op_GGML_OP_MAP_CUSTOM3_F32: ggml_op = 68;
+pub const ggml_op_GGML_OP_MAP_CUSTOM1: ggml_op = 69;
+pub const ggml_op_GGML_OP_MAP_CUSTOM2: ggml_op = 70;
+pub const ggml_op_GGML_OP_MAP_CUSTOM3: ggml_op = 71;
+pub const ggml_op_GGML_OP_CROSS_ENTROPY_LOSS: ggml_op = 72;
+pub const ggml_op_GGML_OP_CROSS_ENTROPY_LOSS_BACK: ggml_op = 73;
+pub const ggml_op_GGML_OP_COUNT: ggml_op = 74;
 pub const ggml_unary_op_GGML_UNARY_OP_ABS: ggml_unary_op = 0;
 pub const ggml_unary_op_GGML_UNARY_OP_SGN: ggml_unary_op = 1;
 pub const ggml_unary_op_GGML_UNARY_OP_NEG: ggml_unary_op = 2;
@@ -4852,13 +4850,6 @@ extern "C" {
         a: *mut ggml_tensor,
         k: ::std::os::raw::c_int,
     ) -> *mut ggml_tensor;
-    pub fn ggml_flash_attn(
-        ctx: *mut ggml_context,
-        q: *mut ggml_tensor,
-        k: *mut ggml_tensor,
-        v: *mut ggml_tensor,
-        masked: bool,
-    ) -> *mut ggml_tensor;
     pub fn ggml_flash_attn_ext(
         ctx: *mut ggml_context,
         q: *mut ggml_tensor,
@@ -4877,14 +4868,6 @@ extern "C" {
         d: *mut ggml_tensor,
         masked: bool,
     ) -> *mut ggml_tensor;
-    pub fn ggml_flash_ff(
-        ctx: *mut ggml_context,
-        a: *mut ggml_tensor,
-        b0: *mut ggml_tensor,
-        b1: *mut ggml_tensor,
-        c0: *mut ggml_tensor,
-        c1: *mut ggml_tensor,
-    ) -> *mut ggml_tensor;
     pub fn ggml_ssm_conv(
         ctx: *mut ggml_context,
         s: *mut ggml_tensor,