Skip to content

Commit

Permalink
updated sdcpp, also set euler as default sampler
Browse files Browse the repository at this point in the history
  • Loading branch information
LostRuins committed Dec 1, 2024
1 parent e93c242 commit 2ba5949
Show file tree
Hide file tree
Showing 27 changed files with 1,514 additions and 521 deletions.
4 changes: 2 additions & 2 deletions kcpp_sdui.embd

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions klite.embd
Original file line number Diff line number Diff line change
Expand Up @@ -4406,7 +4406,7 @@ Current version indicated by LITEVER below.
img_img2imgstr: 0.6,
img_clipskip: -1,
img_steps: 20,
img_sampler: "Euler a",
img_sampler: "Euler",
img_aspect:0, //0=square,1=portrait,2=landscape,3=bigsquare
save_images: true,
save_remote_images: false,
Expand Down Expand Up @@ -19698,8 +19698,8 @@ Current version indicated by LITEVER below.
<div class="inlinelabel">
<div class="justifyleft" style="padding:4px">Sampler: </div>
<select title="Image Sampler" style="padding:1px; font-size:12px; height:20px; width: 100px;" class="form-control" id="img_sampler">
<option value="Euler a">Euler A</option>
<option value="Euler">Euler</option>
<option value="Euler a">Euler A</option>
<option value="Heun">Heun</option>
<option value="DPM2">DPM2</option>
<option value="LCM">LCM</option>
Expand Down
4 changes: 2 additions & 2 deletions koboldcpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
modelbusy = threading.Lock()
requestsinqueue = 0
defaultport = 5001
KcppVersion = "1.79.1"
KcppVersion = "1.80"
showdebug = True
guimode = False
showsamplerwarning = True
Expand Down Expand Up @@ -2014,7 +2014,7 @@ def do_GET(self):
if friendlysdmodelname=="inactive" or fullsdmodelpath=="":
response_body = (json.dumps([]).encode())
else:
response_body = (json.dumps([{"name":"Euler a","aliases":["k_euler_a","k_euler_ancestral"],"options":{}},{"name":"Euler","aliases":["k_euler"],"options":{}},{"name":"Heun","aliases":["k_heun"],"options":{}},{"name":"DPM2","aliases":["k_dpm_2"],"options":{}},{"name":"DPM++ 2M","aliases":["k_dpmpp_2m"],"options":{}},{"name":"LCM","aliases":["k_lcm"],"options":{}}]).encode())
response_body = (json.dumps([{"name":"Euler","aliases":["k_euler"],"options":{}},{"name":"Euler a","aliases":["k_euler_a","k_euler_ancestral"],"options":{}},{"name":"Heun","aliases":["k_heun"],"options":{}},{"name":"DPM2","aliases":["k_dpm_2"],"options":{}},{"name":"DPM++ 2M","aliases":["k_dpmpp_2m"],"options":{}},{"name":"LCM","aliases":["k_lcm"],"options":{}}]).encode())
elif self.path.endswith('/sdapi/v1/latent-upscale-modes'):
response_body = (json.dumps([]).encode())
elif self.path.endswith('/sdapi/v1/upscalers'):
Expand Down
77 changes: 52 additions & 25 deletions otherarch/sdcpp/clip.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,13 @@ class CLIPTokenizer {
}
}

std::string clean_up_tokenization(std::string& text) {
std::regex pattern(R"( ,)");
// Replace " ," with ","
std::string result = std::regex_replace(text, pattern, ",");
return result;
}

std::string decode(const std::vector<int>& tokens) {
std::string text = "";
for (int t : tokens) {
Expand All @@ -351,8 +358,12 @@ class CLIPTokenizer {
std::u32string ts = decoder[t];
// printf("%d, %s \n", t, utf32_to_utf8(ts).c_str());
std::string s = utf32_to_utf8(ts);
if (s.length() >= 4 && ends_with(s, "</w>")) {
text += " " + s.replace(s.length() - 4, s.length() - 1, "");
if (s.length() >= 4) {
if (ends_with(s, "</w>")) {
text += s.replace(s.length() - 4, s.length() - 1, "") + " ";
} else {
text += s;
}
} else {
text += " " + s;
}
Expand All @@ -364,6 +375,7 @@ class CLIPTokenizer {

// std::string s((char *)bytes.data());
// std::string s = "";
text = clean_up_tokenization(text);
return trim(text);
}

Expand Down Expand Up @@ -533,9 +545,12 @@ class CLIPEmbeddings : public GGMLBlock {
int64_t vocab_size;
int64_t num_positions;

void init_params(struct ggml_context* ctx, ggml_type wtype) {
params["token_embedding.weight"] = ggml_new_tensor_2d(ctx, wtype, embed_dim, vocab_size);
params["position_embedding.weight"] = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, embed_dim, num_positions);
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
enum ggml_type token_wtype = (tensor_types.find(prefix + "token_embedding.weight") != tensor_types.end()) ? tensor_types[prefix + "token_embedding.weight"] : GGML_TYPE_F32;
enum ggml_type position_wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "position_embedding.weight") != tensor_types.end()) ? tensor_types[prefix + "position_embedding.weight"] : GGML_TYPE_F32;

params["token_embedding.weight"] = ggml_new_tensor_2d(ctx, token_wtype, embed_dim, vocab_size);
params["position_embedding.weight"] = ggml_new_tensor_2d(ctx, position_wtype, embed_dim, num_positions);
}

public:
Expand Down Expand Up @@ -579,11 +594,14 @@ class CLIPVisionEmbeddings : public GGMLBlock {
int64_t image_size;
int64_t num_patches;
int64_t num_positions;
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
enum ggml_type patch_wtype = GGML_TYPE_F16; // tensor_types.find(prefix + "patch_embedding.weight") != tensor_types.end() ? tensor_types[prefix + "patch_embedding.weight"] : GGML_TYPE_F16;
enum ggml_type class_wtype = GGML_TYPE_F32; // tensor_types.find(prefix + "class_embedding") != tensor_types.end() ? tensor_types[prefix + "class_embedding"] : GGML_TYPE_F32;
enum ggml_type position_wtype = GGML_TYPE_F32; // tensor_types.find(prefix + "position_embedding.weight") != tensor_types.end() ? tensor_types[prefix + "position_embedding.weight"] : GGML_TYPE_F32;

void init_params(struct ggml_context* ctx, ggml_type wtype) {
params["patch_embedding.weight"] = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, patch_size, patch_size, num_channels, embed_dim);
params["class_embedding"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, embed_dim);
params["position_embedding.weight"] = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, embed_dim, num_positions);
params["patch_embedding.weight"] = ggml_new_tensor_4d(ctx, patch_wtype, patch_size, patch_size, num_channels, embed_dim);
params["class_embedding"] = ggml_new_tensor_1d(ctx, class_wtype, embed_dim);
params["position_embedding.weight"] = ggml_new_tensor_2d(ctx, position_wtype, embed_dim, num_positions);
}

public:
Expand Down Expand Up @@ -639,9 +657,10 @@ enum CLIPVersion {

class CLIPTextModel : public GGMLBlock {
protected:
void init_params(struct ggml_context* ctx, ggml_type wtype) {
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
if (version == OPEN_CLIP_VIT_BIGG_14) {
params["text_projection"] = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, projection_dim, hidden_size);
enum ggml_type wtype = GGML_TYPE_F32; // tensor_types.find(prefix + "text_projection") != tensor_types.end() ? tensor_types[prefix + "text_projection"] : GGML_TYPE_F32;
params["text_projection"] = ggml_new_tensor_2d(ctx, wtype, projection_dim, hidden_size);
}
}

Expand Down Expand Up @@ -711,8 +730,12 @@ class CLIPTextModel : public GGMLBlock {
if (return_pooled) {
auto text_projection = params["text_projection"];
ggml_tensor* pooled = ggml_view_1d(ctx, x, hidden_size, x->nb[1] * max_token_idx);
pooled = ggml_mul_mat(ctx, ggml_cont(ctx, ggml_transpose(ctx, text_projection)), pooled);
return pooled;
if (text_projection != NULL) {
pooled = ggml_nn_linear(ctx, pooled, text_projection, NULL);
} else {
LOG_DEBUG("Missing text_projection matrix, assuming identity...");
}
return pooled; // [hidden_size, 1, 1]
}

return x; // [N, n_token, hidden_size]
Expand Down Expand Up @@ -761,14 +784,17 @@ class CLIPVisionModel : public GGMLBlock {
auto x = embeddings->forward(ctx, pixel_values); // [N, num_positions, embed_dim]
x = pre_layernorm->forward(ctx, x);
x = encoder->forward(ctx, x, -1, false);
x = post_layernorm->forward(ctx, x); // [N, n_token, hidden_size]
// print_ggml_tensor(x, true, "ClipVisionModel x: ");
auto last_hidden_state = x;
x = post_layernorm->forward(ctx, x); // [N, n_token, hidden_size]

GGML_ASSERT(x->ne[3] == 1);
if (return_pooled) {
ggml_tensor* pooled = ggml_cont(ctx, ggml_view_2d(ctx, x, x->ne[0], x->ne[2], x->nb[2], 0));
return pooled; // [N, hidden_size]
} else {
return x; // [N, n_token, hidden_size]
// return x; // [N, n_token, hidden_size]
return last_hidden_state; // [N, n_token, hidden_size]
}
}
};
Expand All @@ -779,9 +805,9 @@ class CLIPProjection : public UnaryBlock {
int64_t out_features;
bool transpose_weight;

void init_params(struct ggml_context* ctx, ggml_type wtype) {
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, const std::string prefix = "") {
enum ggml_type wtype = tensor_types.find(prefix + "weight") != tensor_types.end() ? tensor_types[prefix + "weight"] : GGML_TYPE_F32;
if (transpose_weight) {
LOG_ERROR("transpose_weight");
params["weight"] = ggml_new_tensor_2d(ctx, wtype, out_features, in_features);
} else {
params["weight"] = ggml_new_tensor_2d(ctx, wtype, in_features, out_features);
Expand Down Expand Up @@ -842,12 +868,13 @@ struct CLIPTextModelRunner : public GGMLRunner {
CLIPTextModel model;

CLIPTextModelRunner(ggml_backend_t backend,
ggml_type wtype,
std::map<std::string, enum ggml_type>& tensor_types,
const std::string prefix,
CLIPVersion version = OPENAI_CLIP_VIT_L_14,
int clip_skip_value = 1,
bool with_final_ln = true)
: GGMLRunner(backend, wtype), model(version, clip_skip_value, with_final_ln) {
model.init(params_ctx, wtype);
: GGMLRunner(backend), model(version, clip_skip_value, with_final_ln) {
model.init(params_ctx, tensor_types, prefix);
}

std::string get_desc() {
Expand Down Expand Up @@ -889,13 +916,13 @@ struct CLIPTextModelRunner : public GGMLRunner {
struct ggml_tensor* embeddings = NULL;

if (num_custom_embeddings > 0 && custom_embeddings_data != NULL) {
auto custom_embeddings = ggml_new_tensor_2d(compute_ctx,
wtype,
model.hidden_size,
num_custom_embeddings);
auto token_embed_weight = model.get_token_embed_weight();
auto custom_embeddings = ggml_new_tensor_2d(compute_ctx,
token_embed_weight->type,
model.hidden_size,
num_custom_embeddings);
set_backend_tensor_data(custom_embeddings, custom_embeddings_data);

auto token_embed_weight = model.get_token_embed_weight();
// concatenate custom embeddings
embeddings = ggml_concat(compute_ctx, token_embed_weight, custom_embeddings, 1);
}
Expand Down
37 changes: 23 additions & 14 deletions otherarch/sdcpp/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,11 @@ class GEGLU : public GGMLBlock {
int64_t dim_in;
int64_t dim_out;

void init_params(struct ggml_context* ctx, ggml_type wtype) {
params["proj.weight"] = ggml_new_tensor_2d(ctx, wtype, dim_in, dim_out * 2);
params["proj.bias"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, dim_out * 2);
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, std::string prefix = "") {
enum ggml_type wtype = (tensor_types.find(prefix + "proj.weight") != tensor_types.end()) ? tensor_types[prefix + "proj.weight"] : GGML_TYPE_F32;
enum ggml_type bias_wtype = GGML_TYPE_F32; //(tensor_types.find(prefix + "proj.bias") != tensor_types.end()) ? tensor_types[prefix + "proj.bias"] : GGML_TYPE_F32;
params["proj.weight"] = ggml_new_tensor_2d(ctx, wtype, dim_in, dim_out * 2);
params["proj.bias"] = ggml_new_tensor_1d(ctx, bias_wtype, dim_out * 2);
}

public:
Expand Down Expand Up @@ -245,16 +247,19 @@ class CrossAttention : public GGMLBlock {
int64_t context_dim;
int64_t n_head;
int64_t d_head;
bool flash_attn;

public:
CrossAttention(int64_t query_dim,
int64_t context_dim,
int64_t n_head,
int64_t d_head)
int64_t d_head,
bool flash_attn = false)
: n_head(n_head),
d_head(d_head),
query_dim(query_dim),
context_dim(context_dim) {
context_dim(context_dim),
flash_attn(flash_attn) {
int64_t inner_dim = d_head * n_head;

blocks["to_q"] = std::shared_ptr<GGMLBlock>(new Linear(query_dim, inner_dim, false));
Expand Down Expand Up @@ -283,7 +288,7 @@ class CrossAttention : public GGMLBlock {
auto k = to_k->forward(ctx, context); // [N, n_context, inner_dim]
auto v = to_v->forward(ctx, context); // [N, n_context, inner_dim]

x = ggml_nn_attention_ext(ctx, q, k, v, n_head, NULL, false); // [N, n_token, inner_dim]
x = ggml_nn_attention_ext(ctx, q, k, v, n_head, NULL, false, false, flash_attn); // [N, n_token, inner_dim]

x = to_out_0->forward(ctx, x); // [N, n_token, query_dim]
return x;
Expand All @@ -301,15 +306,16 @@ class BasicTransformerBlock : public GGMLBlock {
int64_t n_head,
int64_t d_head,
int64_t context_dim,
bool ff_in = false)
bool ff_in = false,
bool flash_attn = false)
: n_head(n_head), d_head(d_head), ff_in(ff_in) {
// disable_self_attn is always False
// disable_temporal_crossattention is always False
// switch_temporal_ca_to_sa is always False
// inner_dim is always None or equal to dim
// gated_ff is always True
blocks["attn1"] = std::shared_ptr<GGMLBlock>(new CrossAttention(dim, dim, n_head, d_head));
blocks["attn2"] = std::shared_ptr<GGMLBlock>(new CrossAttention(dim, context_dim, n_head, d_head));
blocks["attn1"] = std::shared_ptr<GGMLBlock>(new CrossAttention(dim, dim, n_head, d_head, flash_attn));
blocks["attn2"] = std::shared_ptr<GGMLBlock>(new CrossAttention(dim, context_dim, n_head, d_head, flash_attn));
blocks["ff"] = std::shared_ptr<GGMLBlock>(new FeedForward(dim, dim));
blocks["norm1"] = std::shared_ptr<GGMLBlock>(new LayerNorm(dim));
blocks["norm2"] = std::shared_ptr<GGMLBlock>(new LayerNorm(dim));
Expand Down Expand Up @@ -374,7 +380,8 @@ class SpatialTransformer : public GGMLBlock {
int64_t n_head,
int64_t d_head,
int64_t depth,
int64_t context_dim)
int64_t context_dim,
bool flash_attn = false)
: in_channels(in_channels),
n_head(n_head),
d_head(d_head),
Expand All @@ -388,7 +395,7 @@ class SpatialTransformer : public GGMLBlock {

for (int i = 0; i < depth; i++) {
std::string name = "transformer_blocks." + std::to_string(i);
blocks[name] = std::shared_ptr<GGMLBlock>(new BasicTransformerBlock(inner_dim, n_head, d_head, context_dim));
blocks[name] = std::shared_ptr<GGMLBlock>(new BasicTransformerBlock(inner_dim, n_head, d_head, context_dim, false, flash_attn));
}

blocks["proj_out"] = std::shared_ptr<GGMLBlock>(new Conv2d(inner_dim, in_channels, {1, 1}));
Expand Down Expand Up @@ -433,8 +440,10 @@ class SpatialTransformer : public GGMLBlock {

class AlphaBlender : public GGMLBlock {
protected:
void init_params(struct ggml_context* ctx, ggml_type wtype) {
params["mix_factor"] = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 1);
void init_params(struct ggml_context* ctx, std::map<std::string, enum ggml_type>& tensor_types, std::string prefix = "") {
// Get the type of the "mix_factor" tensor from the input tensors map with the specified prefix
enum ggml_type wtype = GGML_TYPE_F32; //(tensor_types.ypes.find(prefix + "mix_factor") != tensor_types.end()) ? tensor_types[prefix + "mix_factor"] : GGML_TYPE_F32;
params["mix_factor"] = ggml_new_tensor_1d(ctx, wtype, 1);
}

float get_alpha() {
Expand Down Expand Up @@ -511,4 +520,4 @@ class VideoResBlock : public ResBlock {
}
};

#endif // __COMMON_HPP__
#endif // __COMMON_HPP__
Loading

0 comments on commit 2ba5949

Please sign in to comment.