From 349439f239c4f5e27d414719ba8ac7340270ca6d Mon Sep 17 00:00:00 2001 From: leejet Date: Mon, 29 Jan 2024 23:05:18 +0800 Subject: [PATCH] style: format code --- .clang-format | 1 - clip.hpp | 57 +++++++++--------- common.hpp | 18 +++--- control.hpp | 134 +++++++++++++++++++++--------------------- esrgan.hpp | 2 +- examples/cli/main.cpp | 22 +++---- ggml_extend.hpp | 2 +- model.cpp | 2 +- preprocessing.hpp | 104 ++++++++++++++++---------------- stable-diffusion.cpp | 20 +++---- stable-diffusion.h | 12 ++-- unet.hpp | 24 ++++---- util.cpp | 2 +- vae.hpp | 2 +- 14 files changed, 200 insertions(+), 202 deletions(-) diff --git a/.clang-format b/.clang-format index 4fe720b8..37881bfc 100644 --- a/.clang-format +++ b/.clang-format @@ -3,7 +3,6 @@ UseTab: Never IndentWidth: 4 TabWidth: 4 AllowShortIfStatementsOnASingleLine: false -IndentCaseLabels: false ColumnLimit: 0 AccessModifierOffset: -4 NamespaceIndentation: All diff --git a/clip.hpp b/clip.hpp index e0451099..546e944b 100644 --- a/clip.hpp +++ b/clip.hpp @@ -241,7 +241,7 @@ class CLIPTokenizer { std::vector tokenize(std::string text, on_new_token_cb_t on_new_token_cb, size_t max_length = 0, - bool padding = false) { + bool padding = false) { std::vector tokens = encode(text, on_new_token_cb); tokens.insert(tokens.begin(), BOS_TOKEN_ID); if (max_length > 0) { @@ -486,7 +486,6 @@ struct ResidualAttentionBlock { ln2_w = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size); ln2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size); - } void map_by_name(std::map& tensors, const std::string prefix) { @@ -661,8 +660,8 @@ struct CLIPTextModel { mem_size += ggml_row_size(GGML_TYPE_I32, hidden_size * max_position_embeddings); // position_ids mem_size += ggml_row_size(wtype, hidden_size * vocab_size); // token_embed_weight mem_size += ggml_row_size(wtype, hidden_size * max_position_embeddings); // position_embed_weight - if(version == OPENAI_CLIP_VIT_L_14) { - mem_size += ggml_row_size(wtype, hidden_size * max_position_embeddings); // token_embed_custom + if (version == OPENAI_CLIP_VIT_L_14) { + mem_size += ggml_row_size(wtype, hidden_size * max_position_embeddings); // token_embed_custom } for (int i = 0; i < num_hidden_layers; i++) { mem_size += resblocks[i].calculate_mem_size(wtype); @@ -688,32 +687,32 @@ struct CLIPTextModel { } } - bool load_embedding(std::string embd_name, std::string embd_path, std::vector &bpe_tokens) { + bool load_embedding(std::string embd_name, std::string embd_path, std::vector& bpe_tokens) { // the order matters ModelLoader model_loader; - if(!model_loader.init_from_file(embd_path)) { + if (!model_loader.init_from_file(embd_path)) { LOG_ERROR("embedding '%s' failed", embd_name.c_str()); return false; } struct ggml_init_params params; - params.mem_size = 32 * 1024; // max for custom embeddings 32 KB - params.mem_buffer = NULL; - params.no_alloc = false; + params.mem_size = 32 * 1024; // max for custom embeddings 32 KB + params.mem_buffer = NULL; + params.no_alloc = false; struct ggml_context* embd_ctx = ggml_init(params); - struct ggml_tensor* embd = NULL; - auto on_load = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) { - if(tensor_storage.ne[0] != hidden_size) { + struct ggml_tensor* embd = NULL; + auto on_load = [&](const TensorStorage& tensor_storage, ggml_tensor** dst_tensor) { + if (tensor_storage.ne[0] != hidden_size) { LOG_DEBUG("embedding wrong hidden size, got %i, expected %i", tensor_storage.ne[0], hidden_size); return false; } - embd = ggml_new_tensor_2d(embd_ctx, token_embed_weight->type, hidden_size, tensor_storage.n_dims > 1 ? tensor_storage.ne[1] : 1); + embd = ggml_new_tensor_2d(embd_ctx, token_embed_weight->type, hidden_size, tensor_storage.n_dims > 1 ? tensor_storage.ne[1] : 1); *dst_tensor = embd; return true; }; model_loader.load_tensors(on_load, NULL); ggml_backend_tensor_set(token_embed_custom, embd->data, num_custom_embeddings * hidden_size * ggml_type_size(token_embed_custom->type), ggml_nbytes(embd)); readed_embeddings.push_back(embd_name); - for(int i = 0; i < embd->ne[1]; i++) { + for (int i = 0; i < embd->ne[1]; i++) { bpe_tokens.push_back(vocab_size + num_custom_embeddings); // LOG_DEBUG("new custom token: %i", vocab_size + num_custom_embeddings); num_custom_embeddings++; @@ -775,7 +774,7 @@ struct CLIPTextModel { final_ln_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_size); - if(version == OPENAI_CLIP_VIT_L_14) { + if (version == OPENAI_CLIP_VIT_L_14) { token_embed_custom = ggml_new_tensor_2d(ctx, wtype, hidden_size, max_position_embeddings); } @@ -878,11 +877,11 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule { auto hidden_states2 = text_model2.forward(ctx0, input_ids2, NULL); // [N, n_token, hidden_size2] hidden_states2 = ggml_reshape_4d(ctx0, - hidden_states2, - hidden_states2->ne[0], - hidden_states2->ne[1], - hidden_states2->ne[2], - hidden_states2->ne[3]); + hidden_states2, + hidden_states2->ne[0], + hidden_states2->ne[1], + hidden_states2->ne[2], + hidden_states2->ne[3]); hidden_states2 = ggml_cont(ctx0, ggml_permute(ctx0, hidden_states2, 2, 0, 1, 3)); hidden_states = ggml_concat(ctx0, hidden_states, hidden_states2); // [N, n_token, hidden_size + hidden_size2] @@ -913,20 +912,20 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule { LOG_DEBUG("parse '%s' to %s", text.c_str(), ss.str().c_str()); } - auto on_new_token_cb = [&] (std::string& str, std::vector &bpe_tokens) -> bool { - size_t word_end = str.find(","); + auto on_new_token_cb = [&](std::string& str, std::vector& bpe_tokens) -> bool { + size_t word_end = str.find(","); std::string embd_name = word_end == std::string::npos ? str : str.substr(0, word_end); - embd_name = trim(embd_name); + embd_name = trim(embd_name); std::string embd_path = get_full_path(text_model.embd_dir, embd_name + ".pt"); - if(embd_path.size() == 0) { + if (embd_path.size() == 0) { embd_path = get_full_path(text_model.embd_dir, embd_name + ".ckpt"); } - if(embd_path.size() == 0) { + if (embd_path.size() == 0) { embd_path = get_full_path(text_model.embd_dir, embd_name + ".safetensors"); } - if(embd_path.size() > 0) { - if(text_model.load_embedding(embd_name, embd_path, bpe_tokens)) { - if(word_end != std::string::npos) { + if (embd_path.size() > 0) { + if (text_model.load_embedding(embd_name, embd_path, bpe_tokens)) { + if (word_end != std::string::npos) { str = str.substr(word_end); } else { str = ""; @@ -1033,7 +1032,7 @@ struct FrozenCLIPEmbedderWithCustomWords : public GGMLModule { struct ggml_tensor* embeddings = NULL; - if(text_model.num_custom_embeddings > 0 && version != VERSION_XL) { + if (text_model.num_custom_embeddings > 0 && version != VERSION_XL) { embeddings = ggml_new_tensor_2d(ctx0, wtype, text_model.hidden_size, text_model.vocab_size + text_model.num_custom_embeddings /* custom placeholder */); ggml_allocr_alloc(allocr, embeddings); if (!ggml_allocr_is_measure(allocr)) { diff --git a/common.hpp b/common.hpp index a71e4d37..4a423d5a 100644 --- a/common.hpp +++ b/common.hpp @@ -281,7 +281,7 @@ struct SpatialTransformer { mem_size += 6 * ggml_row_size(GGML_TYPE_F32, in_channels); // norm1-3_w/b mem_size += 6 * ggml_row_size(wtype, in_channels * in_channels); // attn1_q/k/v/out_w attn2_q/out_w mem_size += 2 * ggml_row_size(wtype, in_channels * context_dim); // attn2_k/v_w - mem_size += ggml_row_size(wtype, in_channels * 4 * 2 * in_channels ); // ff_0_proj_w + mem_size += ggml_row_size(wtype, in_channels * 4 * 2 * in_channels); // ff_0_proj_w mem_size += ggml_row_size(GGML_TYPE_F32, in_channels * 4 * 2); // ff_0_proj_b mem_size += ggml_row_size(wtype, in_channels * 4 * in_channels); // ff_2_w mem_size += ggml_row_size(GGML_TYPE_F32, in_channels); // ff_2_b @@ -493,15 +493,15 @@ struct SpatialTransformer { { // GEGLU auto x_w = ggml_view_2d(ctx, - transformer.ff_0_proj_w, - transformer.ff_0_proj_w->ne[0], - transformer.ff_0_proj_w->ne[1] / 2, - transformer.ff_0_proj_w->nb[1], - 0); // [in_channels * 4, in_channels] + transformer.ff_0_proj_w, + transformer.ff_0_proj_w->ne[0], + transformer.ff_0_proj_w->ne[1] / 2, + transformer.ff_0_proj_w->nb[1], + 0); // [in_channels * 4, in_channels] auto x_b = ggml_view_1d(ctx, - transformer.ff_0_proj_b, - transformer.ff_0_proj_b->ne[0] / 2, - 0); // [in_channels * 4, in_channels] + transformer.ff_0_proj_b, + transformer.ff_0_proj_b->ne[0] / 2, + 0); // [in_channels * 4, in_channels] auto gate_w = ggml_view_2d(ctx, transformer.ff_0_proj_w, transformer.ff_0_proj_w->ne[0], diff --git a/control.hpp b/control.hpp index 543998f4..4b12ba72 100644 --- a/control.hpp +++ b/control.hpp @@ -1,8 +1,8 @@ #ifndef __CONTROL_HPP__ #define __CONTROL_HPP__ -#include "ggml_extend.hpp" #include "common.hpp" +#include "ggml_extend.hpp" #include "model.h" #define CONTROL_NET_GRAPH_SIZE 1536 @@ -14,33 +14,33 @@ */ struct CNHintBlock { - int hint_channels = 3; - int model_channels = 320; // SD 1.5 - int feat_channels[4] = { 16, 32, 96, 256 }; - int num_blocks = 3; + int hint_channels = 3; + int model_channels = 320; // SD 1.5 + int feat_channels[4] = {16, 32, 96, 256}; + int num_blocks = 3; ggml_tensor* conv_first_w; // [feat_channels[0], hint_channels, 3, 3] - ggml_tensor* conv_first_b; // [feat_channels[0]] + ggml_tensor* conv_first_b; // [feat_channels[0]] struct hint_block { - ggml_tensor* conv_0_w; // [feat_channels[idx], feat_channels[idx], 3, 3] - ggml_tensor* conv_0_b; // [feat_channels[idx]] + ggml_tensor* conv_0_w; // [feat_channels[idx], feat_channels[idx], 3, 3] + ggml_tensor* conv_0_b; // [feat_channels[idx]] - ggml_tensor* conv_1_w; // [feat_channels[idx + 1], feat_channels[idx], 3, 3] - ggml_tensor* conv_1_b; // [feat_channels[idx + 1]] + ggml_tensor* conv_1_w; // [feat_channels[idx + 1], feat_channels[idx], 3, 3] + ggml_tensor* conv_1_b; // [feat_channels[idx + 1]] }; hint_block blocks[3]; - ggml_tensor* conv_final_w; // [model_channels, feat_channels[3], 3, 3] - ggml_tensor* conv_final_b; // [model_channels] + ggml_tensor* conv_final_w; // [model_channels, feat_channels[3], 3, 3] + ggml_tensor* conv_final_b; // [model_channels] size_t calculate_mem_size() { size_t mem_size = feat_channels[0] * hint_channels * 3 * 3 * ggml_type_size(GGML_TYPE_F16); // conv_first_w - mem_size += feat_channels[0] * ggml_type_size(GGML_TYPE_F32); // conv_first_b + mem_size += feat_channels[0] * ggml_type_size(GGML_TYPE_F32); // conv_first_b for (int i = 0; i < num_blocks; i++) { - mem_size += feat_channels[i] * feat_channels[i] * 3 * 3 * ggml_type_size(GGML_TYPE_F16); // conv_0_w - mem_size += feat_channels[i] * ggml_type_size(GGML_TYPE_F32); // conv_0_b + mem_size += feat_channels[i] * feat_channels[i] * 3 * 3 * ggml_type_size(GGML_TYPE_F16); // conv_0_w + mem_size += feat_channels[i] * ggml_type_size(GGML_TYPE_F32); // conv_0_b mem_size += feat_channels[i + 1] * feat_channels[i] * 3 * 3 * ggml_type_size(GGML_TYPE_F16); // conv_1_w - mem_size += feat_channels[i + 1] * ggml_type_size(GGML_TYPE_F32); // conv_1_b + mem_size += feat_channels[i + 1] * ggml_type_size(GGML_TYPE_F32); // conv_1_b } mem_size += model_channels * feat_channels[3] * 3 * 3 * ggml_type_size(GGML_TYPE_F16); // conv_final_w mem_size += model_channels * ggml_type_size(GGML_TYPE_F32); // conv_final_b @@ -65,25 +65,25 @@ struct CNHintBlock { void map_by_name(std::map& tensors, const std::string prefix) { tensors[prefix + "input_hint_block.0.weight"] = conv_first_w; tensors[prefix + "input_hint_block.0.bias"] = conv_first_b; - int index = 2; + int index = 2; for (int i = 0; i < num_blocks; i++) { - tensors[prefix + "input_hint_block." + std::to_string(index) +".weight"] = blocks[i].conv_0_w; - tensors[prefix + "input_hint_block." + std::to_string(index) +".bias"] = blocks[i].conv_0_b; + tensors[prefix + "input_hint_block." + std::to_string(index) + ".weight"] = blocks[i].conv_0_w; + tensors[prefix + "input_hint_block." + std::to_string(index) + ".bias"] = blocks[i].conv_0_b; index += 2; - tensors[prefix + "input_hint_block." + std::to_string(index) +".weight"] = blocks[i].conv_1_w; - tensors[prefix + "input_hint_block." + std::to_string(index) +".bias"] = blocks[i].conv_1_b; + tensors[prefix + "input_hint_block." + std::to_string(index) + ".weight"] = blocks[i].conv_1_w; + tensors[prefix + "input_hint_block." + std::to_string(index) + ".bias"] = blocks[i].conv_1_b; index += 2; } tensors[prefix + "input_hint_block.14.weight"] = conv_final_w; tensors[prefix + "input_hint_block.14.bias"] = conv_final_b; } - struct ggml_tensor* forward(ggml_context* ctx, struct ggml_tensor* x) { + struct ggml_tensor* forward(ggml_context* ctx, struct ggml_tensor* x) { auto h = ggml_nn_conv_2d(ctx, x, conv_first_w, conv_first_b, 1, 1, 1, 1); - h = ggml_silu_inplace(ctx, h); + h = ggml_silu_inplace(ctx, h); auto body_h = h; - for(int i = 0; i < num_blocks; i++) { + for (int i = 0; i < num_blocks; i++) { // operations.conv_nd(dims, 16, 16, 3, padding=1) body_h = ggml_nn_conv_2d(ctx, body_h, blocks[i].conv_0_w, blocks[i].conv_0_b, 1, 1, 1, 1); body_h = ggml_silu_inplace(ctx, body_h); @@ -101,10 +101,10 @@ struct CNHintBlock { struct CNZeroConv { int channels; ggml_tensor* conv_w; // [channels, channels, 1, 1] - ggml_tensor* conv_b; // [channels] + ggml_tensor* conv_b; // [channels] void init_params(struct ggml_context* ctx) { - conv_w = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, 1, 1, channels,channels); + conv_w = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, 1, 1, channels, channels); conv_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, channels); } }; @@ -119,7 +119,7 @@ struct ControlNet : public GGMLModule { std::vector transformer_depth = {1, 1, 1, 1}; int time_embed_dim = 1280; // model_channels*4 int num_heads = 8; - int num_head_channels = -1; // channels // num_heads + int num_head_channels = -1; // channels // num_heads int context_dim = 768; int middle_out_channel; CNHintBlock input_hint_block; @@ -146,20 +146,20 @@ struct ControlNet : public GGMLModule { SpatialTransformer middle_block_1; ResBlock middle_block_2; - struct ggml_tensor* middle_block_out_w; // [middle_out_channel, middle_out_channel, 1, 1] - struct ggml_tensor* middle_block_out_b; // [middle_out_channel, ] - ggml_backend_buffer_t control_buffer = NULL; // keep control output tensors in backend memory - ggml_context* control_ctx = NULL; - std::vector controls; // (12 input block outputs, 1 middle block output) SD 1.5 + struct ggml_tensor* middle_block_out_w; // [middle_out_channel, middle_out_channel, 1, 1] + struct ggml_tensor* middle_block_out_b; // [middle_out_channel, ] + ggml_backend_buffer_t control_buffer = NULL; // keep control output tensors in backend memory + ggml_context* control_ctx = NULL; + std::vector controls; // (12 input block outputs, 1 middle block output) SD 1.5 ControlNet() { name = "controlnet"; // input_blocks std::vector input_block_chans; input_block_chans.push_back(model_channels); - int ch = model_channels; + int ch = model_channels; zero_convs[0].channels = model_channels; - int ds = 1; + int ds = 1; int len_mults = channel_mult.size(); for (int i = 0; i < len_mults; i++) { @@ -220,7 +220,7 @@ struct ControlNet : public GGMLModule { middle_block_2.channels = ch; middle_block_2.emb_channels = time_embed_dim; middle_block_2.out_channels = ch; - middle_out_channel = ch; + middle_out_channel = ch; } size_t calculate_mem_size() { @@ -229,7 +229,7 @@ struct ControlNet : public GGMLModule { mem_size += ggml_row_size(wtype, time_embed_dim * model_channels); // time_embed_0_w mem_size += ggml_row_size(GGML_TYPE_F32, time_embed_dim); // time_embed_0_b mem_size += ggml_row_size(wtype, time_embed_dim * time_embed_dim); // time_embed_2_w - mem_size += ggml_row_size(GGML_TYPE_F32,time_embed_dim); // time_embed_2_b + mem_size += ggml_row_size(GGML_TYPE_F32, time_embed_dim); // time_embed_2_b mem_size += ggml_row_size(GGML_TYPE_F16, model_channels * in_channels * 3 * 3); // input_block_0_w mem_size += ggml_row_size(GGML_TYPE_F32, model_channels); // input_block_0_b @@ -260,8 +260,8 @@ struct ControlNet : public GGMLModule { mem_size += middle_block_1.calculate_mem_size(wtype); mem_size += middle_block_2.calculate_mem_size(wtype); - mem_size += ggml_row_size(GGML_TYPE_F16, middle_out_channel * middle_out_channel); // middle_block_out_w - mem_size += ggml_row_size(GGML_TYPE_F32, middle_out_channel); // middle_block_out_b + mem_size += ggml_row_size(GGML_TYPE_F16, middle_out_channel * middle_out_channel); // middle_block_out_w + mem_size += ggml_row_size(GGML_TYPE_F32, middle_out_channel); // middle_block_out_b return mem_size; } @@ -299,10 +299,10 @@ struct ControlNet : public GGMLModule { input_hint_block.init_params(params_ctx); - time_embed_0_w = ggml_new_tensor_2d(params_ctx, wtype, model_channels, time_embed_dim); - time_embed_0_b = ggml_new_tensor_1d(params_ctx, GGML_TYPE_F32, time_embed_dim); - time_embed_2_w = ggml_new_tensor_2d(params_ctx, wtype, time_embed_dim, time_embed_dim); - time_embed_2_b = ggml_new_tensor_1d(params_ctx, GGML_TYPE_F32, time_embed_dim); + time_embed_0_w = ggml_new_tensor_2d(params_ctx, wtype, model_channels, time_embed_dim); + time_embed_0_b = ggml_new_tensor_1d(params_ctx, GGML_TYPE_F32, time_embed_dim); + time_embed_2_w = ggml_new_tensor_2d(params_ctx, wtype, time_embed_dim, time_embed_dim); + time_embed_2_b = ggml_new_tensor_1d(params_ctx, GGML_TYPE_F32, time_embed_dim); // input_blocks input_block_0_w = ggml_new_tensor_4d(params_ctx, GGML_TYPE_F16, 3, 3, in_channels, model_channels); @@ -449,8 +449,8 @@ struct ControlNet : public GGMLModule { } for (int i = 0; i < num_zero_convs; i++) { - tensors[prefix + "zero_convs."+ std::to_string(i) + ".0.weight"] = zero_convs[i].conv_w; - tensors[prefix + "zero_convs."+ std::to_string(i) + ".0.bias"] = zero_convs[i].conv_b; + tensors[prefix + "zero_convs." + std::to_string(i) + ".0.weight"] = zero_convs[i].conv_w; + tensors[prefix + "zero_convs." + std::to_string(i) + ".0.bias"] = zero_convs[i].conv_b; } // middle_blocks @@ -474,9 +474,9 @@ struct ControlNet : public GGMLModule { }; struct ggml_context* ctx0 = ggml_init(params); - struct ggml_cgraph* gf = ggml_new_graph(ctx0); + struct ggml_cgraph* gf = ggml_new_graph(ctx0); // temporal tensors for transfer tensors from cpu to gpu if needed - struct ggml_tensor* hint_t = NULL; + struct ggml_tensor* hint_t = NULL; // it's performing a compute, check if backend isn't cpu if (!ggml_backend_is_cpu(backend)) { // pass input tensors to gpu memory @@ -488,7 +488,7 @@ struct ControlNet : public GGMLModule { } } else { // if it's cpu backend just pass the same tensors - hint_t = hint; + hint_t = hint; } struct ggml_tensor* out = input_hint_block.forward(ctx0, hint_t); ggml_build_forward_expand(gf, out); @@ -499,7 +499,7 @@ struct ControlNet : public GGMLModule { void process_hint(struct ggml_tensor* output, int n_threads, struct ggml_tensor* hint) { // compute buffer size auto get_graph = [&]() -> struct ggml_cgraph* { - return build_graph_hint(hint); + return build_graph_hint(hint); }; GGMLModule::alloc_compute_buffer(get_graph); // perform computation @@ -508,12 +508,12 @@ struct ControlNet : public GGMLModule { } void forward(struct ggml_cgraph* gf, - struct ggml_context* ctx0, - struct ggml_tensor* x, - struct ggml_tensor* hint, - struct ggml_tensor* timesteps, - struct ggml_tensor* context, - struct ggml_tensor* t_emb = NULL) { + struct ggml_context* ctx0, + struct ggml_tensor* x, + struct ggml_tensor* hint, + struct ggml_tensor* timesteps, + struct ggml_tensor* context, + struct ggml_tensor* t_emb = NULL) { // x: [N, in_channels, h, w] // timesteps: [N, ] // t_emb: [N, model_channels] @@ -532,7 +532,7 @@ struct ControlNet : public GGMLModule { // input block 0 struct ggml_tensor* h = ggml_nn_conv_2d(ctx0, x, input_block_0_w, input_block_0_b, 1, 1, 1, 1); // [N, model_channels, h, w] - h = ggml_add(ctx0, h, hint); + h = ggml_add(ctx0, h, hint); auto h_c = ggml_nn_conv_2d(ctx0, h, zero_convs[zero_conv_offset].conv_w, zero_convs[zero_conv_offset].conv_b); ggml_build_forward_expand(gf, ggml_cpy(ctx0, h_c, controls[zero_conv_offset])); @@ -554,7 +554,7 @@ struct ControlNet : public GGMLModule { } if (i != len_mults - 1) { ds *= 2; - h = input_down_samples[i].forward(ctx0, h); // [N, mult*model_channels, h/(2^(i+1)), w/(2^(i+1))] + h = input_down_samples[i].forward(ctx0, h); // [N, mult*model_channels, h/(2^(i+1)), w/(2^(i+1))] h_c = ggml_nn_conv_2d(ctx0, h, zero_convs[zero_conv_offset].conv_w, zero_convs[zero_conv_offset].conv_b); ggml_build_forward_expand(gf, ggml_cpy(ctx0, h_c, controls[zero_conv_offset])); zero_conv_offset++; @@ -603,7 +603,7 @@ struct ControlNet : public GGMLModule { // pass input tensors to gpu memory x_t = ggml_dup_tensor(ctx0, x); context_t = ggml_dup_tensor(ctx0, context); - hint_t = ggml_dup_tensor(ctx0, hint); + hint_t = ggml_dup_tensor(ctx0, hint); ggml_allocr_alloc(compute_allocr, x_t); if (timesteps != NULL) { timesteps_t = ggml_dup_tensor(ctx0, timesteps); @@ -649,17 +649,19 @@ struct ControlNet : public GGMLModule { struct ggml_tensor* t_emb = NULL) { { struct ggml_init_params params; - params.mem_size = static_cast(14 * ggml_tensor_overhead()) + 256; - params.mem_buffer = NULL; - params.no_alloc = true; - control_ctx = ggml_init(params); + params.mem_size = static_cast(14 * ggml_tensor_overhead()) + 256; + params.mem_buffer = NULL; + params.no_alloc = true; + control_ctx = ggml_init(params); size_t control_buffer_size = 0; int w = x->ne[0], h = x->ne[1], steps = 0; - for(int i = 0; i < (num_zero_convs + 1); i++) { + for (int i = 0; i < (num_zero_convs + 1); i++) { bool last = i == num_zero_convs; - int c = last ? middle_out_channel : zero_convs[i].channels; - if(!last && steps == 3) { - w /= 2; h /= 2; steps = 0; + int c = last ? middle_out_channel : zero_convs[i].channels; + if (!last && steps == 3) { + w /= 2; + h /= 2; + steps = 0; } controls.push_back(ggml_new_tensor_4d(control_ctx, GGML_TYPE_F32, w, h, c, 1)); control_buffer_size += ggml_nbytes(controls[i]); @@ -692,4 +694,4 @@ struct ControlNet : public GGMLModule { } }; -#endif // __CONTROL_HPP__ \ No newline at end of file +#endif // __CONTROL_HPP__ \ No newline at end of file diff --git a/esrgan.hpp b/esrgan.hpp index 90194c0d..c86363f7 100644 --- a/esrgan.hpp +++ b/esrgan.hpp @@ -376,7 +376,7 @@ struct ESRGAN : public GGMLModule { struct ggml_cgraph* gf = ggml_new_graph(ctx0); struct ggml_tensor* x_ = NULL; - float out_scale = 0.2f; + float out_scale = 0.2f; // it's performing a compute, check if backend isn't cpu if (!ggml_backend_is_cpu(backend)) { diff --git a/examples/cli/main.cpp b/examples/cli/main.cpp index b8b4a46b..7acc4449 100644 --- a/examples/cli/main.cpp +++ b/examples/cli/main.cpp @@ -6,8 +6,8 @@ #include #include -#include "stable-diffusion.h" #include "preprocessing.hpp" +#include "stable-diffusion.h" #define STB_IMAGE_IMPLEMENTATION #include "stb_image.h" @@ -81,7 +81,7 @@ struct SDParams { schedule_t schedule = DEFAULT; int sample_steps = 20; float strength = 0.75f; - float control_strength = 0.9f; + float control_strength = 0.9f; rng_type_t rng_type = CUDA_RNG; int64_t seed = 42; bool verbose = false; @@ -231,7 +231,7 @@ void parse_args(int argc, const char** argv, SDParams& params) { break; } params.embeddings_path = argv[i]; - } else if (arg == "--type") { + } else if (arg == "--type") { if (++i >= argc) { invalid_arg = true; break; @@ -338,7 +338,7 @@ void parse_args(int argc, const char** argv, SDParams& params) { params.vae_tiling = true; } else if (arg == "--control-net-cpu") { params.control_net_cpu = true; - } else if (arg == "--canny") { + } else if (arg == "--canny") { params.canny_preprocess = true; } else if (arg == "-b" || arg == "--batch-count") { if (++i >= argc) { @@ -590,18 +590,18 @@ int main(int argc, const char* argv[]) { sd_image_t* results; if (params.mode == TXT2IMG) { sd_image_t* control_image = NULL; - if(params.controlnet_path.size() > 0 && params.control_image_path.size() > 0) { - int c = 0; + if (params.controlnet_path.size() > 0 && params.control_image_path.size() > 0) { + int c = 0; input_image_buffer = stbi_load(params.control_image_path.c_str(), ¶ms.width, ¶ms.height, &c, 3); - if(input_image_buffer == NULL) { + if (input_image_buffer == NULL) { fprintf(stderr, "load image from '%s' failed\n", params.control_image_path.c_str()); return 1; } control_image = new sd_image_t{(uint32_t)params.width, - (uint32_t)params.height, - 3, - input_image_buffer}; - if(params.canny_preprocess) { // apply preprocessor + (uint32_t)params.height, + 3, + input_image_buffer}; + if (params.canny_preprocess) { // apply preprocessor LOG_INFO("Applying canny preprocessor"); control_image->data = preprocess_canny(control_image->data, control_image->width, control_image->height); } diff --git a/ggml_extend.hpp b/ggml_extend.hpp index 60ab430c..5f2969c4 100644 --- a/ggml_extend.hpp +++ b/ggml_extend.hpp @@ -462,7 +462,7 @@ __STATIC_INLINE__ struct ggml_tensor* ggml_nn_group_norm(struct ggml_context* ct __STATIC_INLINE__ void ggml_backend_tensor_get_and_sync(ggml_backend_t backend, const struct ggml_tensor* tensor, void* data, size_t offset, size_t size) { #ifdef SD_USE_CUBLAS - if(!ggml_backend_is_cpu(backend)) { + if (!ggml_backend_is_cpu(backend)) { ggml_backend_tensor_get_async(backend, tensor, data, offset, size); ggml_backend_synchronize(backend); } else { diff --git a/model.cpp b/model.cpp index ab3463d7..b89edf27 100644 --- a/model.cpp +++ b/model.cpp @@ -375,7 +375,7 @@ std::string convert_tensor_name(const std::string& name) { new_name = convert_open_clip_to_hf_clip(name); } else if (starts_with(name, "first_stage_model.decoder")) { new_name = convert_vae_decoder_name(name); - } else if (starts_with(name, "control_model.")) { // for controlnet pth models + } else if (starts_with(name, "control_model.")) { // for controlnet pth models size_t pos = name.find('.'); if (pos != std::string::npos) { new_name = name.substr(pos + 1); diff --git a/preprocessing.hpp b/preprocessing.hpp index d5bbd564..d0e899ca 100644 --- a/preprocessing.hpp +++ b/preprocessing.hpp @@ -2,17 +2,17 @@ #define __PREPROCESSING_HPP__ #include "ggml_extend.hpp" -#define M_PI_ 3.14159265358979323846 +#define M_PI_ 3.14159265358979323846 void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml_tensor* kernel, int padding) { struct ggml_init_params params; - params.mem_size = 20 * 1024 * 1024; // 10 - params.mem_buffer = NULL; - params.no_alloc = false; - struct ggml_context* ctx0 = ggml_init(params); + params.mem_size = 20 * 1024 * 1024; // 10 + params.mem_buffer = NULL; + params.no_alloc = false; + struct ggml_context* ctx0 = ggml_init(params); struct ggml_tensor* kernel_fp16 = ggml_new_tensor_4d(ctx0, GGML_TYPE_F16, kernel->ne[0], kernel->ne[1], 1, 1); - ggml_fp32_to_fp16_row((float*)kernel->data, (ggml_fp16_t*) kernel_fp16->data, ggml_nelements(kernel)); - ggml_tensor* h = ggml_conv_2d(ctx0, kernel_fp16, input, 1, 1, padding, padding, 1, 1); + ggml_fp32_to_fp16_row((float*)kernel->data, (ggml_fp16_t*)kernel_fp16->data, ggml_nelements(kernel)); + ggml_tensor* h = ggml_conv_2d(ctx0, kernel_fp16, input, 1, 1, padding, padding, 1, 1); ggml_cgraph* gf = ggml_new_graph(ctx0); ggml_build_forward_expand(gf, ggml_cpy(ctx0, h, output)); ggml_graph_compute_with_ctx(ctx0, gf, 1); @@ -20,14 +20,14 @@ void convolve(struct ggml_tensor* input, struct ggml_tensor* output, struct ggml } void gaussian_kernel(struct ggml_tensor* kernel) { - int ks_mid = kernel->ne[0] / 2; - float sigma = 1.4f; + int ks_mid = kernel->ne[0] / 2; + float sigma = 1.4f; float normal = 1.f / (2.0f * M_PI_ * powf(sigma, 2.0f)); - for(int y = 0; y < kernel->ne[0]; y++) { + for (int y = 0; y < kernel->ne[0]; y++) { float gx = -ks_mid + y; - for(int x = 0; x < kernel->ne[1]; x++) { + for (int x = 0; x < kernel->ne[1]; x++) { float gy = -ks_mid + x; - float k_ = expf(-((gx*gx + gy*gy) / (2.0f * powf(sigma, 2.0f)))) * normal; + float k_ = expf(-((gx * gx + gy * gy) / (2.0f * powf(sigma, 2.0f)))) * normal; ggml_tensor_set_f32(kernel, k_, x, y); } } @@ -36,9 +36,9 @@ void gaussian_kernel(struct ggml_tensor* kernel) { void grayscale(struct ggml_tensor* rgb_img, struct ggml_tensor* grayscale) { for (int iy = 0; iy < rgb_img->ne[1]; iy++) { for (int ix = 0; ix < rgb_img->ne[0]; ix++) { - float r = ggml_tensor_get_f32(rgb_img, ix, iy); - float g = ggml_tensor_get_f32(rgb_img, ix, iy, 1); - float b = ggml_tensor_get_f32(rgb_img, ix, iy, 2); + float r = ggml_tensor_get_f32(rgb_img, ix, iy); + float g = ggml_tensor_get_f32(rgb_img, ix, iy, 1); + float b = ggml_tensor_get_f32(rgb_img, ix, iy, 2); float gray = 0.2989f * r + 0.5870f * g + 0.1140f * b; ggml_tensor_set_f32(grayscale, gray, ix, iy); } @@ -47,19 +47,19 @@ void grayscale(struct ggml_tensor* rgb_img, struct ggml_tensor* grayscale) { void prop_hypot(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h) { int n_elements = ggml_nelements(h); - float* dx = (float*)x->data; - float* dy = (float*)y->data; - float* dh = (float*)h->data; - for (int i = 0; i data; + float* dy = (float*)y->data; + float* dh = (float*)h->data; + for (int i = 0; i < n_elements; i++) { dh[i] = sqrtf(dx[i] * dx[i] + dy[i] * dy[i]); } } void prop_arctan2(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tensor* h) { int n_elements = ggml_nelements(h); - float* dx = (float*)x->data; - float* dy = (float*)y->data; - float* dh = (float*)h->data; + float* dx = (float*)x->data; + float* dy = (float*)y->data; + float* dh = (float*)h->data; for (int i = 0; i < n_elements; i++) { dh[i] = atan2f(dy[i], dx[i]); } @@ -67,13 +67,13 @@ void prop_arctan2(struct ggml_tensor* x, struct ggml_tensor* y, struct ggml_tens void normalize_tensor(struct ggml_tensor* g) { int n_elements = ggml_nelements(g); - float* dg = (float*)g->data; - float max = -INFINITY; - for (int i = 0; i data; + float max = -INFINITY; + for (int i = 0; i < n_elements; i++) { max = dg[i] > max ? dg[i] : max; } max = 1.0f / max; - for (int i = 0; i ne[1] - 1; iy++) { for (int ix = 1; ix < result->ne[0] - 1; ix++) { float angle = ggml_tensor_get_f32(D, ix, iy) * 180.0f / M_PI_; - angle = angle < 0.0f ? angle += 180.0f : angle; - float q = 1.0f; - float r = 1.0f; + angle = angle < 0.0f ? angle += 180.0f : angle; + float q = 1.0f; + float r = 1.0f; // angle 0 - if((0 >= angle && angle < 22.5f) || (157.5f >= angle && angle <= 180)){ + if ((0 >= angle && angle < 22.5f) || (157.5f >= angle && angle <= 180)) { q = ggml_tensor_get_f32(G, ix, iy + 1); r = ggml_tensor_get_f32(G, ix, iy - 1); } @@ -119,8 +119,8 @@ void non_max_supression(struct ggml_tensor* result, struct ggml_tensor* G, struc void threshold_hystersis(struct ggml_tensor* img, float highThreshold, float lowThreshold, float weak, float strong) { int n_elements = ggml_nelements(img); - float* imd = (float*)img->data; - float max = -INFINITY; + float* imd = (float*)img->data; + float max = -INFINITY; for (int i = 0; i < n_elements; i++) { max = imd[i] > max ? imd[i] : max; } @@ -128,16 +128,16 @@ void threshold_hystersis(struct ggml_tensor* img, float highThreshold, float low float lt = ht * lowThreshold; for (int i = 0; i < n_elements; i++) { float img_v = imd[i]; - if(img_v >= ht) { // strong pixel + if (img_v >= ht) { // strong pixel imd[i] = strong; - } else if(img_v <= ht && img_v >= lt) { // strong pixel + } else if (img_v <= ht && img_v >= lt) { // strong pixel imd[i] = weak; } } for (int iy = 0; iy < img->ne[1]; iy++) { for (int ix = 0; ix < img->ne[0]; ix++) { - if(ix >= 3 && ix <= img->ne[0] - 3 && iy >= 3 && iy <= img->ne[1] - 3) { + if (ix >= 3 && ix <= img->ne[0] - 3 && iy >= 3 && iy <= img->ne[1] - 3) { ggml_tensor_set_f32(img, ggml_tensor_get_f32(img, ix, iy), ix, iy); } else { ggml_tensor_set_f32(img, 0.0f, ix, iy); @@ -149,8 +149,8 @@ void threshold_hystersis(struct ggml_tensor* img, float highThreshold, float low for (int iy = 1; iy < img->ne[1] - 1; iy++) { for (int ix = 1; ix < img->ne[0] - 1; ix++) { float imd_v = ggml_tensor_get_f32(img, ix, iy); - if(imd_v == weak) { - if(ggml_tensor_get_f32(img, ix + 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix + 1, iy) == strong || + if (imd_v == weak) { + if (ggml_tensor_get_f32(img, ix + 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix + 1, iy) == strong || ggml_tensor_get_f32(img, ix, iy - 1) == strong || ggml_tensor_get_f32(img, ix, iy + 1) == strong || ggml_tensor_get_f32(img, ix - 1, iy - 1) == strong || ggml_tensor_get_f32(img, ix - 1, iy) == strong) { ggml_tensor_set_f32(img, strong, ix, iy); @@ -164,9 +164,9 @@ void threshold_hystersis(struct ggml_tensor* img, float highThreshold, float low uint8_t* preprocess_canny(uint8_t* img, int width, int height, float highThreshold = 0.08f, float lowThreshold = 0.08f, float weak = 0.8f, float strong = 1.0f, bool inverse = false) { struct ggml_init_params params; - params.mem_size = static_cast(10 * 1024 * 1024); // 10 - params.mem_buffer = NULL; - params.no_alloc = false; + params.mem_size = static_cast(10 * 1024 * 1024); // 10 + params.mem_buffer = NULL; + params.no_alloc = false; struct ggml_context* work_ctx = ggml_init(params); if (!work_ctx) { @@ -177,29 +177,27 @@ uint8_t* preprocess_canny(uint8_t* img, int width, int height, float highThresho float kX[9] = { -1, 0, 1, -2, 0, 2, - -1, 0, 1 - }; + -1, 0, 1}; float kY[9] = { 1, 2, 1, 0, 0, 0, - -1, -2, -1 - }; + -1, -2, -1}; // generate kernel - int kernel_size = 5; + int kernel_size = 5; struct ggml_tensor* gkernel = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, kernel_size, kernel_size, 1, 1); - struct ggml_tensor* sf_kx = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1); + struct ggml_tensor* sf_kx = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1); memcpy(sf_kx->data, kX, ggml_nbytes(sf_kx)); struct ggml_tensor* sf_ky = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, 3, 3, 1, 1); memcpy(sf_ky->data, kY, ggml_nbytes(sf_ky)); gaussian_kernel(gkernel); - struct ggml_tensor* image = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1); + struct ggml_tensor* image = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1); struct ggml_tensor* image_gray = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 1, 1); - struct ggml_tensor* iX = ggml_dup_tensor(work_ctx, image_gray); - struct ggml_tensor* iY = ggml_dup_tensor(work_ctx, image_gray); - struct ggml_tensor* G = ggml_dup_tensor(work_ctx, image_gray); - struct ggml_tensor* tetha = ggml_dup_tensor(work_ctx, image_gray); + struct ggml_tensor* iX = ggml_dup_tensor(work_ctx, image_gray); + struct ggml_tensor* iY = ggml_dup_tensor(work_ctx, image_gray); + struct ggml_tensor* G = ggml_dup_tensor(work_ctx, image_gray); + struct ggml_tensor* tetha = ggml_dup_tensor(work_ctx, image_gray); sd_image_to_tensor(img, image); grayscale(image, image_gray); convolve(image_gray, image_gray, gkernel, 2); @@ -214,7 +212,7 @@ uint8_t* preprocess_canny(uint8_t* img, int width, int height, float highThresho for (int iy = 0; iy < height; iy++) { for (int ix = 0; ix < width; ix++) { float gray = ggml_tensor_get_f32(image_gray, ix, iy); - gray = inverse ? 1.0f - gray : gray; + gray = inverse ? 1.0f - gray : gray; ggml_tensor_set_f32(image, gray, ix, iy); ggml_tensor_set_f32(image, gray, ix, iy, 1); ggml_tensor_set_f32(image, gray, ix, iy, 2); @@ -226,4 +224,4 @@ uint8_t* preprocess_canny(uint8_t* img, int width, int height, float highThresho return output; } -#endif // __PREPROCESSING_HPP__ \ No newline at end of file +#endif // __PREPROCESSING_HPP__ \ No newline at end of file diff --git a/stable-diffusion.cpp b/stable-diffusion.cpp index ee67dd43..8dd5f16e 100644 --- a/stable-diffusion.cpp +++ b/stable-diffusion.cpp @@ -7,8 +7,8 @@ #include "util.h" #include "clip.hpp" -#include "denoiser.hpp" #include "control.hpp" +#include "denoiser.hpp" #include "esrgan.hpp" #include "lora.hpp" #include "tae.hpp" @@ -320,15 +320,15 @@ class StableDiffusionGGML { LOG_DEBUG("finished loaded file"); ggml_free(ctx); - if(control_net_path.size() > 0) { + if (control_net_path.size() > 0) { ggml_backend_t cn_backend = NULL; - if(control_net_cpu && !ggml_backend_is_cpu(backend)) { + if (control_net_cpu && !ggml_backend_is_cpu(backend)) { LOG_DEBUG("ControlNet: Using CPU backend"); cn_backend = ggml_backend_cpu_init(); } else { cn_backend = backend; } - if(!control_net.load_from_file(control_net_path, cn_backend, GGML_TYPE_F16 /* just f16 controlnet models */)) { + if (!control_net.load_from_file(control_net_path, cn_backend, GGML_TYPE_F16 /* just f16 controlnet models */)) { return false; } } @@ -549,8 +549,8 @@ class StableDiffusionGGML { struct ggml_tensor* noised_input = ggml_dup_tensor(work_ctx, x_t); struct ggml_tensor* timesteps = ggml_new_tensor_1d(work_ctx, GGML_TYPE_F32, 1); // [N, ] struct ggml_tensor* t_emb = new_timestep_embedding(work_ctx, NULL, timesteps, diffusion_model.model_channels); // [N, model_channels] - struct ggml_tensor* guided_hint = NULL; - if(control_hint != NULL) { + struct ggml_tensor* guided_hint = NULL; + if (control_hint != NULL) { guided_hint = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, noised_input->ne[0], noised_input->ne[1], diffusion_model.model_channels, 1); control_net.process_hint(guided_hint, n_threads, control_hint); control_net.alloc_compute_buffer(noised_input, guided_hint, c, t_emb); @@ -606,7 +606,7 @@ class StableDiffusionGGML { ggml_tensor_scale(noised_input, c_in); // cond - if(control_hint != NULL) { + if (control_hint != NULL) { control_net.compute(n_threads, noised_input, guided_hint, c, t_emb); } diffusion_model.compute(out_cond, n_threads, noised_input, NULL, c, control_net.controls, control_strength, t_emb, c_vector); @@ -614,7 +614,7 @@ class StableDiffusionGGML { float* negative_data = NULL; if (has_unconditioned) { // uncond - if(control_hint != NULL) { + if (control_hint != NULL) { control_net.compute(n_threads, noised_input, guided_hint, uc, t_emb); } @@ -1276,7 +1276,7 @@ sd_image_t* txt2img(sd_ctx_t* sd_ctx, } struct ggml_tensor* image_hint = NULL; - if(control_cond != NULL) { + if (control_cond != NULL) { image_hint = ggml_new_tensor_4d(work_ctx, GGML_TYPE_F32, width, height, 3, 1); sd_image_to_tensor(control_cond->data, image_hint); } @@ -1451,7 +1451,7 @@ sd_image_t* img2img(sd_ctx_t* sd_ctx, LOG_INFO("sampling using %s method", sampling_methods_str[sample_method]); struct ggml_tensor* x_0 = sd_ctx->sd->sample(work_ctx, init_latent, noise, c, c_vector, uc, - uc_vector, NULL, cfg_scale, sample_method, sigma_sched, 1.0f); + uc_vector, NULL, cfg_scale, sample_method, sigma_sched, 1.0f); // struct ggml_tensor *x_0 = load_tensor_from_file(ctx, "samples_ddim.bin"); // print_ggml_tensor(x_0); int64_t t3 = ggml_time_ms(); diff --git a/stable-diffusion.h b/stable-diffusion.h index c719a99b..a8c9f532 100644 --- a/stable-diffusion.h +++ b/stable-diffusion.h @@ -65,12 +65,12 @@ enum sd_type_t { SD_TYPE_Q8_0 = 8, SD_TYPE_Q8_1 = 9, // k-quantizations - SD_TYPE_Q2_K = 10, - SD_TYPE_Q3_K = 11, - SD_TYPE_Q4_K = 12, - SD_TYPE_Q5_K = 13, - SD_TYPE_Q6_K = 14, - SD_TYPE_Q8_K = 15, + SD_TYPE_Q2_K = 10, + SD_TYPE_Q3_K = 11, + SD_TYPE_Q4_K = 12, + SD_TYPE_Q5_K = 13, + SD_TYPE_Q6_K = 14, + SD_TYPE_Q8_K = 15, SD_TYPE_IQ2_XXS = 16, SD_TYPE_I8, SD_TYPE_I16, diff --git a/unet.hpp b/unet.hpp index 2c9e7c92..096c5dc9 100644 --- a/unet.hpp +++ b/unet.hpp @@ -495,9 +495,9 @@ struct UNetModel : public GGMLModule { h = middle_block_1.forward(ctx0, h, context); // [N, 4*model_channels, h/8, w/8] h = middle_block_2.forward(ctx0, h, emb); // [N, 4*model_channels, h/8, w/8] - if(control.size() > 0) { + if (control.size() > 0) { auto cs = ggml_scale_inplace(ctx0, control[control.size() - 1], control_net_strength); - h = ggml_add(ctx0, h, cs); // middle control + h = ggml_add(ctx0, h, cs); // middle control } int control_offset = control.size() - 2; @@ -507,9 +507,9 @@ struct UNetModel : public GGMLModule { auto h_skip = hs.back(); hs.pop_back(); - if(control.size() > 0) { - auto cs = ggml_scale_inplace(ctx0, control[control_offset], control_net_strength); - h_skip = ggml_add(ctx0, h_skip, cs); // control net condition + if (control.size() > 0) { + auto cs = ggml_scale_inplace(ctx0, control[control_offset], control_net_strength); + h_skip = ggml_add(ctx0, h_skip, cs); // control net condition control_offset--; } @@ -542,8 +542,8 @@ struct UNetModel : public GGMLModule { struct ggml_tensor* timesteps, struct ggml_tensor* context, std::vector control, - struct ggml_tensor* t_emb = NULL, - struct ggml_tensor* y = NULL, + struct ggml_tensor* t_emb = NULL, + struct ggml_tensor* y = NULL, float control_net_strength = 1.0) { // since we are using ggml-alloc, this buffer only needs enough space to hold the ggml_tensor and ggml_cgraph structs, but not the tensor data static size_t buf_size = ggml_tensor_overhead() * UNET_GRAPH_SIZE + ggml_graph_overhead(); @@ -611,12 +611,12 @@ struct UNetModel : public GGMLModule { } // offload all controls tensors to gpu - if(control.size() > 0 && !ggml_backend_is_cpu(backend) && control[0]->backend != GGML_BACKEND_GPU) { - for(int i = 0; i < control.size(); i++) { + if (control.size() > 0 && !ggml_backend_is_cpu(backend) && control[0]->backend != GGML_BACKEND_GPU) { + for (int i = 0; i < control.size(); i++) { ggml_tensor* cntl_t = ggml_dup_tensor(ctx0, control[i]); control_t.push_back(cntl_t); ggml_allocr_alloc(compute_allocr, cntl_t); - if(!ggml_allocr_is_measure(compute_allocr)) { + if (!ggml_allocr_is_measure(compute_allocr)) { ggml_backend_tensor_copy(control[i], control_t[i]); ggml_backend_synchronize(backend); } @@ -636,8 +636,8 @@ struct UNetModel : public GGMLModule { void alloc_compute_buffer(struct ggml_tensor* x, struct ggml_tensor* context, std::vector control, - struct ggml_tensor* t_emb = NULL, - struct ggml_tensor* y = NULL, + struct ggml_tensor* t_emb = NULL, + struct ggml_tensor* y = NULL, float control_net_strength = 1.0) { auto get_graph = [&]() -> struct ggml_cgraph* { return build_graph(x, NULL, context, control, t_emb, y, control_net_strength); diff --git a/util.cpp b/util.cpp index 4445f6c5..c5f3f861 100644 --- a/util.cpp +++ b/util.cpp @@ -1,6 +1,6 @@ #include "util.h" -#include #include +#include #include #include #include diff --git a/vae.hpp b/vae.hpp index 38af5408..f78777f9 100644 --- a/vae.hpp +++ b/vae.hpp @@ -121,7 +121,7 @@ struct AttnBlock { size_t calculate_mem_size(ggml_type wtype) { double mem_size = 0; mem_size += 6 * ggml_row_size(GGML_TYPE_F32, in_channels); // norm_w/norm_b/q_b/k_v/v_b/proj_out_b - mem_size += 4 * ggml_row_size(GGML_TYPE_F16, in_channels * in_channels * 1 * 1); // q_w/k_w/v_w/proj_out_w // object overhead + mem_size += 4 * ggml_row_size(GGML_TYPE_F16, in_channels * in_channels * 1 * 1); // q_w/k_w/v_w/proj_out_w // object overhead return static_cast(mem_size); }