Skip to content

Commit 7044072

Browse files
committed
ggml-backend : add load_tensor() to backend API
Add new backend API which allows loading tensor's data with precomputed hashes stored in the model KV. ref: #12954
1 parent 7c727fb commit 7044072

File tree

6 files changed

+82
-3
lines changed

6 files changed

+82
-3
lines changed

examples/gguf-hash/gguf-hash.cpp

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ typedef enum {
5555

5656
struct hash_params {
5757
std::string input;
58+
bool fnv = false;
5859
bool xxh64 = false;
5960
bool sha1 = false;
6061
bool sha256 = false;
@@ -103,6 +104,7 @@ static void hash_print_usage(const char * executable) {
103104
printf("\n");
104105
printf("options:\n");
105106
printf(" -h, --help show this help message and exit\n");
107+
printf(" --fnv use FNV-1a hash\n");
106108
printf(" --xxh64 use xxh64 hash\n");
107109
printf(" --sha1 use sha1 hash\n");
108110
printf(" --sha256 use sha256 hash\n");
@@ -131,6 +133,11 @@ static void hash_params_parse_ex(int argc, const char ** argv, hash_params & par
131133
exit(0);
132134
}
133135

136+
if (arg == "--fnv") {
137+
arg_found = true;
138+
params.fnv = true;
139+
}
140+
134141
if (arg == "--xxh64") {
135142
arg_found = true;
136143
params.xxh64 = true;
@@ -283,6 +290,18 @@ static void generate_uuidv5(const unsigned char sha1_digest[20], unsigned char u
283290
uuid[ 8] |= (0x8 << 4);
284291
}
285292

293+
// Computes FNV-1a hash of the data
294+
static uint64_t fnv_hash(const uint8_t * data, size_t len) {
295+
const uint64_t fnv_prime = 0x100000001b3ULL;
296+
uint64_t hash = 0xcbf29ce484222325ULL;
297+
298+
for (size_t i = 0; i < len; ++i) {
299+
hash ^= data[i];
300+
hash *= fnv_prime;
301+
}
302+
return hash;
303+
}
304+
286305
static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
287306
const std::string & fname = hash_params.input;
288307
struct ggml_context * ctx_data = NULL;
@@ -326,7 +345,11 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
326345
SHA1Update( &sha1_for_uuid_ctx, (unsigned char const *)uuidv5_namespace, sizeof(uuidv5_namespace));
327346
}
328347

348+
struct gguf_context * ctx_out = gguf_init_empty();
329349
struct gguf_context * ctx = gguf_init_from_file(fname.c_str(), params);
350+
351+
gguf_set_kv(ctx_out, ctx);
352+
330353
const int n_tensors = gguf_get_n_tensors(ctx);
331354
bool tensor_layer_in_manifest = false;
332355
bool model_in_manifest = false;
@@ -335,10 +358,19 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
335358
for (int i = 0; i < n_tensors; ++i) {
336359
const char * name = gguf_get_tensor_name(ctx, i);
337360
struct ggml_tensor * cur = ggml_get_tensor(ctx_data, name);
361+
gguf_add_tensor(ctx_out, cur);
338362
auto n_bytes = ggml_nbytes(cur);
339363
auto *raw_data = cur->data;
340364
const std::string tensor_layer_name = fname + ":" + name;
341365

366+
if (hash_params.fnv) {
367+
uint64_t hash = fnv_hash((const uint8_t *)raw_data, n_bytes);
368+
printf("%016lx %s\n", hash, tensor_layer_name.c_str());
369+
char hash_key[128];
370+
snprintf(hash_key, sizeof(hash_key), "%s_hash", name);
371+
gguf_set_val_u64(ctx_out, hash_key, hash);
372+
}
373+
342374
if (hash_params.xxh64) {
343375

344376
if (!hash_params.no_layer) {
@@ -580,6 +612,9 @@ static hash_exit_code_t gguf_hash(const hash_params & hash_params) {
580612
}
581613
}
582614

615+
auto fname_out = fname + ".rpc";
616+
gguf_write_to_file(ctx_out, fname_out.c_str(), false);
617+
gguf_free(ctx_out);
583618

584619
ggml_free(ctx_data);
585620
gguf_free(ctx);
@@ -663,7 +698,7 @@ int main(int argc, const char ** argv) {
663698

664699
// Autoselect the highest security hash if manifest is provided but
665700
// the user has not specifically defined the hash they care about
666-
if (!params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
701+
if (!params.fnv && !params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
667702
// User has not selected a specific value, pick most secure hash
668703
if (manifest_check.sha256) {
669704
params.sha256 = true;
@@ -680,7 +715,7 @@ int main(int argc, const char ** argv) {
680715
}
681716

682717
// By default if no swich argument provided, assume xxh64
683-
if (!params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
718+
if (!params.fnv && !params.xxh64 && !params.sha1 && !params.uuid && !params.sha256) {
684719
params.xxh64 = true;
685720
}
686721

ggml/include/ggml-backend.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ extern "C" {
8888

8989
// "offset" refers to the offset in tensor->data for setting/getting data
9090
GGML_API void ggml_backend_tensor_set( struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
91+
GGML_API bool ggml_backend_tensor_load( struct ggml_tensor * tensor, size_t offset, uint64_t hash);
9192
GGML_API void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
9293
GGML_API void ggml_backend_tensor_memset( struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
9394

ggml/src/ggml-backend-impl.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ extern "C" {
4949
void (*memset_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, uint8_t value, size_t offset, size_t size);
5050
void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
5151
void (*get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
52+
// (optional) load tensor with data which has the specified hash
53+
bool (*load_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, size_t offset, uint64_t hash);
5254
// (optional) tensor copy: dst is in the buffer, src may be in any buffer, including buffers from a different backend (return false if not supported)
5355
bool (*cpy_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * src, struct ggml_tensor * dst);
5456
// clear the entire buffer

ggml/src/ggml-backend.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -268,6 +268,18 @@ void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, siz
268268
buf->iface.set_tensor(buf, tensor, data, offset, size);
269269
}
270270

271+
bool ggml_backend_tensor_load(struct ggml_tensor * tensor, size_t offset, uint64_t hash) {
272+
GGML_ASSERT(tensor);
273+
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
274+
GGML_ASSERT(buf != NULL && "tensor buffer not set");
275+
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
276+
277+
if (buf->iface.set_tensor == NULL) {
278+
return false;
279+
}
280+
return buf->iface.load_tensor(buf, tensor, offset, hash);
281+
}
282+
271283
void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
272284
GGML_ASSERT(tensor);
273285
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
@@ -560,6 +572,7 @@ static const struct ggml_backend_buffer_i ggml_backend_multi_buffer_i = {
560572
/* .memset_tensor = */ NULL,
561573
/* .set_tensor = */ NULL,
562574
/* .get_tensor = */ NULL,
575+
/* .load_tensor = */ NULL,
563576
/* .cpy_tensor = */ NULL,
564577
/* .clear = */ ggml_backend_multi_buffer_clear,
565578
/* .reset = */ NULL,
@@ -1908,6 +1921,7 @@ static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_i = {
19081921
/* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor,
19091922
/* .set_tensor = */ ggml_backend_cpu_buffer_set_tensor,
19101923
/* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor,
1924+
/* .load_tensor = */ NULL,
19111925
/* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor,
19121926
/* .clear = */ ggml_backend_cpu_buffer_clear,
19131927
/* .reset = */ NULL,
@@ -1920,6 +1934,7 @@ static const struct ggml_backend_buffer_i ggml_backend_cpu_buffer_from_ptr_i = {
19201934
/* .memset_tensor = */ ggml_backend_cpu_buffer_memset_tensor,
19211935
/* .set_tensor = */ ggml_backend_cpu_buffer_set_tensor,
19221936
/* .get_tensor = */ ggml_backend_cpu_buffer_get_tensor,
1937+
/* .load_tensor = */ NULL,
19231938
/* .cpy_tensor = */ ggml_backend_cpu_buffer_cpy_tensor,
19241939
/* .clear = */ ggml_backend_cpu_buffer_clear,
19251940
/* .reset = */ NULL,

ggml/src/ggml-rpc/ggml-rpc.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,21 @@ static void ggml_backend_rpc_buffer_get_tensor(ggml_backend_buffer_t buffer, con
569569
GGML_ASSERT(status);
570570
}
571571

572+
static bool ggml_backend_rpc_buffer_load_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, size_t offset, uint64_t hash) {
573+
ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
574+
rpc_tensor rpc_tensor = serialize_tensor(tensor);
575+
// input serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes)
576+
size_t input_size = sizeof(rpc_tensor) + sizeof(uint64_t) + sizeof(uint64_t);
577+
std::vector<uint8_t> input(input_size, 0);
578+
memcpy(input.data(), &rpc_tensor, sizeof(rpc_tensor));
579+
memcpy(input.data() + sizeof(rpc_tensor), &offset, sizeof(offset));
580+
memcpy(input.data() + sizeof(rpc_tensor) + sizeof(offset), &hash, sizeof(hash));
581+
rpc_msg_set_tensor_hash_rsp response;
582+
bool status = send_rpc_cmd(ctx->sock, RPC_CMD_SET_TENSOR_HASH, input.data(), input.size(), &response, sizeof(response));
583+
GGML_ASSERT(status);
584+
return response.result;
585+
}
586+
572587
static bool ggml_backend_rpc_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) {
573588
// check if src and dst are on the same server
574589
ggml_backend_buffer_t src_buffer = src->buffer;
@@ -602,6 +617,7 @@ static ggml_backend_buffer_i ggml_backend_rpc_buffer_interface = {
602617
/* .memset_tensor = */ NULL,
603618
/* .set_tensor = */ ggml_backend_rpc_buffer_set_tensor,
604619
/* .get_tensor = */ ggml_backend_rpc_buffer_get_tensor,
620+
/* .load_tensor = */ ggml_backend_rpc_buffer_load_tensor,
605621
/* .cpy_tensor = */ ggml_backend_rpc_buffer_cpy_tensor,
606622
/* .clear = */ ggml_backend_rpc_buffer_clear,
607623
/* .reset = */ NULL,

src/llama-model-loader.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -376,6 +376,7 @@ namespace GGUFMeta {
376376
template bool llama_model_loader::get_key<bool> (enum llm_kv kid, bool & result, bool required);
377377
template bool llama_model_loader::get_key<float> (enum llm_kv kid, float & result, bool required);
378378
template bool llama_model_loader::get_key<uint32_t> (enum llm_kv kid, uint32_t & result, bool required);
379+
template bool llama_model_loader::get_key<uint64_t> (enum llm_kv kid, uint64_t & result, bool required);
379380
template bool llama_model_loader::get_key<std::string>(enum llm_kv kid, std::string & result, bool required);
380381

381382
template<>
@@ -1022,7 +1023,16 @@ bool llama_model_loader::load_all_data(
10221023
mmap_used.first = std::min(mmap_used.first, weight->offs);
10231024
mmap_used.second = std::max(mmap_used.second, weight->offs + n_size);
10241025
} else {
1025-
ggml_backend_tensor_set(cur, data, 0, n_size);
1026+
char hash_key[128];
1027+
snprintf(hash_key, sizeof(hash_key), "%s_hash", ggml_get_name(cur));
1028+
uint64_t hash_val = 0;
1029+
if (get_key(hash_key, hash_val, false)) {
1030+
if (!ggml_backend_tensor_load(cur, 0, hash_val)) {
1031+
ggml_backend_tensor_set(cur, data, 0, n_size);
1032+
}
1033+
} else {
1034+
ggml_backend_tensor_set(cur, data, 0, n_size);
1035+
}
10261036
}
10271037
} else {
10281038
const auto & file = files.at(weight->idx);

0 commit comments

Comments
 (0)