From d7a3bbd5b548b5c878f9b973166ed2dd92e15517 Mon Sep 17 00:00:00 2001 From: github-actions Date: Thu, 23 May 2024 12:20:31 +0000 Subject: [PATCH] [auto] Sync version 2405231219.0.0+llamacpp-release.b2979 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit == Relevant log messages from source repo: commit 9b82476ee9e73065a759f8bcc4cf27ec7ab2ed8c Author: fairydreaming <166155368+fairydreaming@users.noreply.github.com> Date: Thu May 23 11:49:53 2024 +0200 Add missing inference support for GPTNeoXForCausalLM (Pythia and GPT-NeoX base models) (#7461) * convert-hf : add conversion of bloom-style qkv tensor to gpt-style qkv (code borrowed from BloomModel) * llama : add inference support for LLM_ARCH_GPTNEOX * llama : add model types for every Pythia variant and GPT-NeoX Co-authored-by: Stanisław Szymczyk commit a61a94e543e3c6877c087e80fca27a0313ce5fd5 Author: Georgi Gerganov Date: Thu May 23 12:38:18 2024 +0300 llama : rename n_ctx -> cache.size, less confusing (#0) commit d48c88cbd563b6cf0ce972e2f56796896e240736 Author: Georgi Gerganov Date: Thu May 23 10:00:44 2024 +0300 ggml : remove ggml_flash_attn and ggml_flash_ff (#7463) ggml-ci commit e84b71c2c6da6e69c8f815168ea836f9716a325e Author: Georgi Gerganov Date: Thu May 23 10:00:21 2024 +0300 ggml : drop support for QK_K=64 (#7473) * ggml : drop support for QK_K=64 ggml-ci * opencl : restore QK_K=256 define commit 1b1e27cb49158123ef4902aa41eb368c9e76e6a1 Author: 0cc4m Date: Thu May 23 08:59:59 2024 +0200 Update vulkan rope implementation to support frequency factors (#7475) --- Cargo.toml | 2 +- VERSION.txt | 2 +- ggml-tag-current.txt | 2 +- ggml-tag-previous.txt | 2 +- src/lib.rs | 57 +++++++++++++++---------------------------- 5 files changed, 24 insertions(+), 41 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 342e54e..86b094f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ggml-sys-bleedingedge" -version = "2405221814.0.0+llamacpp-release.b2968" +version = "2405231219.0.0+llamacpp-release.b2979" description = "Bleeding edge low-level bindings to GGML. " repository = "https://github.com/KerfuffleV2/ggml-sys-bleedingedge" keywords = ["deep-learning", "machine-learning", "tensors", "ggml", "ml"] diff --git a/VERSION.txt b/VERSION.txt index 4ca95e9..ab9cc17 100644 --- a/VERSION.txt +++ b/VERSION.txt @@ -1 +1 @@ -2405221814.0.0+llamacpp-release.b2968 +2405231219.0.0+llamacpp-release.b2979 diff --git a/ggml-tag-current.txt b/ggml-tag-current.txt index cafd479..ab62181 100644 --- a/ggml-tag-current.txt +++ b/ggml-tag-current.txt @@ -1 +1 @@ -b2968 +b2979 diff --git a/ggml-tag-previous.txt b/ggml-tag-previous.txt index 4bd6edd..cafd479 100644 --- a/ggml-tag-previous.txt +++ b/ggml-tag-previous.txt @@ -1 +1 @@ -b2965 +b2968 diff --git a/src/lib.rs b/src/lib.rs index dee3e19..a16f5d2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -728,28 +728,26 @@ pub const ggml_op_GGML_OP_ARANGE: ggml_op = 51; pub const ggml_op_GGML_OP_TIMESTEP_EMBEDDING: ggml_op = 52; pub const ggml_op_GGML_OP_ARGSORT: ggml_op = 53; pub const ggml_op_GGML_OP_LEAKY_RELU: ggml_op = 54; -pub const ggml_op_GGML_OP_FLASH_ATTN: ggml_op = 55; -pub const ggml_op_GGML_OP_FLASH_ATTN_EXT: ggml_op = 56; -pub const ggml_op_GGML_OP_FLASH_FF: ggml_op = 57; -pub const ggml_op_GGML_OP_FLASH_ATTN_BACK: ggml_op = 58; -pub const ggml_op_GGML_OP_SSM_CONV: ggml_op = 59; -pub const ggml_op_GGML_OP_SSM_SCAN: ggml_op = 60; -pub const ggml_op_GGML_OP_WIN_PART: ggml_op = 61; -pub const ggml_op_GGML_OP_WIN_UNPART: ggml_op = 62; -pub const ggml_op_GGML_OP_GET_REL_POS: ggml_op = 63; -pub const ggml_op_GGML_OP_ADD_REL_POS: ggml_op = 64; -pub const ggml_op_GGML_OP_UNARY: ggml_op = 65; -pub const ggml_op_GGML_OP_MAP_UNARY: ggml_op = 66; -pub const ggml_op_GGML_OP_MAP_BINARY: ggml_op = 67; -pub const ggml_op_GGML_OP_MAP_CUSTOM1_F32: ggml_op = 68; -pub const ggml_op_GGML_OP_MAP_CUSTOM2_F32: ggml_op = 69; -pub const ggml_op_GGML_OP_MAP_CUSTOM3_F32: ggml_op = 70; -pub const ggml_op_GGML_OP_MAP_CUSTOM1: ggml_op = 71; -pub const ggml_op_GGML_OP_MAP_CUSTOM2: ggml_op = 72; -pub const ggml_op_GGML_OP_MAP_CUSTOM3: ggml_op = 73; -pub const ggml_op_GGML_OP_CROSS_ENTROPY_LOSS: ggml_op = 74; -pub const ggml_op_GGML_OP_CROSS_ENTROPY_LOSS_BACK: ggml_op = 75; -pub const ggml_op_GGML_OP_COUNT: ggml_op = 76; +pub const ggml_op_GGML_OP_FLASH_ATTN_EXT: ggml_op = 55; +pub const ggml_op_GGML_OP_FLASH_ATTN_BACK: ggml_op = 56; +pub const ggml_op_GGML_OP_SSM_CONV: ggml_op = 57; +pub const ggml_op_GGML_OP_SSM_SCAN: ggml_op = 58; +pub const ggml_op_GGML_OP_WIN_PART: ggml_op = 59; +pub const ggml_op_GGML_OP_WIN_UNPART: ggml_op = 60; +pub const ggml_op_GGML_OP_GET_REL_POS: ggml_op = 61; +pub const ggml_op_GGML_OP_ADD_REL_POS: ggml_op = 62; +pub const ggml_op_GGML_OP_UNARY: ggml_op = 63; +pub const ggml_op_GGML_OP_MAP_UNARY: ggml_op = 64; +pub const ggml_op_GGML_OP_MAP_BINARY: ggml_op = 65; +pub const ggml_op_GGML_OP_MAP_CUSTOM1_F32: ggml_op = 66; +pub const ggml_op_GGML_OP_MAP_CUSTOM2_F32: ggml_op = 67; +pub const ggml_op_GGML_OP_MAP_CUSTOM3_F32: ggml_op = 68; +pub const ggml_op_GGML_OP_MAP_CUSTOM1: ggml_op = 69; +pub const ggml_op_GGML_OP_MAP_CUSTOM2: ggml_op = 70; +pub const ggml_op_GGML_OP_MAP_CUSTOM3: ggml_op = 71; +pub const ggml_op_GGML_OP_CROSS_ENTROPY_LOSS: ggml_op = 72; +pub const ggml_op_GGML_OP_CROSS_ENTROPY_LOSS_BACK: ggml_op = 73; +pub const ggml_op_GGML_OP_COUNT: ggml_op = 74; pub const ggml_unary_op_GGML_UNARY_OP_ABS: ggml_unary_op = 0; pub const ggml_unary_op_GGML_UNARY_OP_SGN: ggml_unary_op = 1; pub const ggml_unary_op_GGML_UNARY_OP_NEG: ggml_unary_op = 2; @@ -4852,13 +4850,6 @@ extern "C" { a: *mut ggml_tensor, k: ::std::os::raw::c_int, ) -> *mut ggml_tensor; - pub fn ggml_flash_attn( - ctx: *mut ggml_context, - q: *mut ggml_tensor, - k: *mut ggml_tensor, - v: *mut ggml_tensor, - masked: bool, - ) -> *mut ggml_tensor; pub fn ggml_flash_attn_ext( ctx: *mut ggml_context, q: *mut ggml_tensor, @@ -4877,14 +4868,6 @@ extern "C" { d: *mut ggml_tensor, masked: bool, ) -> *mut ggml_tensor; - pub fn ggml_flash_ff( - ctx: *mut ggml_context, - a: *mut ggml_tensor, - b0: *mut ggml_tensor, - b1: *mut ggml_tensor, - c0: *mut ggml_tensor, - c1: *mut ggml_tensor, - ) -> *mut ggml_tensor; pub fn ggml_ssm_conv( ctx: *mut ggml_context, s: *mut ggml_tensor,