From f62bf09c27c3ababc8323662882da5fadf5fc2f2 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Mon, 9 Oct 2023 17:37:31 +0200 Subject: [PATCH 1/4] docstring + more accurate model context size computation --- encodec.cpp | 92 ++++++++++++++++++++++++++++++++++++++++++----------- encodec.h | 24 ++++++++++---- 2 files changed, 92 insertions(+), 24 deletions(-) diff --git a/encodec.cpp b/encodec.cpp index 8fc41ba..ac69c48 100644 --- a/encodec.cpp +++ b/encodec.cpp @@ -230,6 +230,45 @@ bool encodec_load_model_weights(const std::string& fname, encodec_model& model) } } + // load hparams + { + auto & hparams = model.hparams; + + read_safe(infile, hparams.in_channels); + read_safe(infile, hparams.hidden_dim); + read_safe(infile, hparams.n_filters); + read_safe(infile, hparams.kernel_size); + read_safe(infile, hparams.residual_kernel_size); + // read_safe(infile, hparams.ratios); + read_safe(infile, hparams.n_q); + read_safe(infile, hparams.n_bins); + read_safe(infile, hparams.ftype); + + const int32_t qntvr = hparams.ftype / GGML_QNT_VERSION_FACTOR; + + printf("%s: in_channels = %d\n", __func__, hparams.in_channels); + printf("%s: hidden_dim = %d\n", __func__, hparams.hidden_dim); + printf("%s: n_filters = %d\n", __func__, hparams.n_filters); + printf("%s: kernel_size = %d\n", __func__, hparams.kernel_size); + printf("%s: res_kernel = %d\n", __func__, hparams.residual_kernel_size); + // printf("%s: ratios = %d\n", __func__, hparams.ratios); + printf("%s: n_q = %d\n", __func__, hparams.n_q); + printf("%s: n_bins = %d\n", __func__, hparams.n_bins); + printf("%s: ftype = %d\n", __func__, hparams.ftype); + printf("%s: qntvr = %d\n", __func__, qntvr); + + hparams.ftype %= GGML_QNT_VERSION_FACTOR; + } + + // for the big tensors, we have the option to store the data in 16-bit floats or quantized + // in order to save memory and also to speed up the computation + ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype) (model.hparams.ftype)); + if (wtype == GGML_TYPE_COUNT) { + fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", + __func__, fname.c_str(), model.hparams.ftype); + return 1; + } + auto & ctx = model.ctx; size_t ctx_size = 0; @@ -241,38 +280,55 @@ bool encodec_load_model_weights(const std::string& fname, encodec_model& model) const int hidden_dim = hparams.hidden_dim; const int n_filters = hparams.n_filters; const int kernel_size = hparams.kernel_size; + const int res_kernel_sz = hparams.residual_kernel_size; const int n_q = hparams.n_q; + const int n_bins = hparams.n_bins; const int *ratios = hparams.ratios; + const int n_lstm_layers = hparams.n_lstm_layers; // encoder { + int mult = 1; // scaling factor for hidden size + // initial conv1d layer - ctx_size += in_channels*n_filters*kernel_size*ggml_type_size(GGML_TYPE_F32); // weight - ctx_size += n_filters*ggml_type_size(GGML_TYPE_F32); //bias + ctx_size += in_channels * n_filters * kernel_size * ggml_type_size(wtype); // weight + ctx_size += n_filters * ggml_type_size(GGML_TYPE_F32); // bias // resnet blocks - ctx_size += 3*4*16*n_filters*ggml_type_size(GGML_TYPE_F32); // upper bound on w_g, w_v and bias + for (int i = 0; i < 4; i++) { + // conv1 + ctx_size += res_kernel_sz * (mult*n_filters) * (mult*n_filters/2) * ggml_type_size(wtype); // weight + ctx_size += (mult*n_filters/2) * ggml_type_size(GGML_TYPE_F32); // bias + + // conv2 + ctx_size += (mult*n_filters/2) * (mult*n_filters) * ggml_type_size(wtype); // weight + ctx_size += (mult*n_filters) * ggml_type_size(GGML_TYPE_F32); // bias + + // shortcut + ctx_size += (mult*n_filters) * (mult*n_filters) * ggml_type_size(wtype); // weight + ctx_size += (mult*n_filters) * ggml_type_size(GGML_TYPE_F32); // bias - //downsampling blocks - ctx_size += 3*4*16*n_filters*16*n_filters*2*ratios[0]*2*ggml_type_size(GGML_TYPE_F32); // upper bound on w_g, w_v and bias + // downsampling layers + ctx_size += (2*ratios[3-i]) * (mult*n_filters) * (mult*n_filters*2) * ggml_type_size(wtype); // weight + ctx_size += (2*mult*n_filters) * ggml_type_size(GGML_TYPE_F32); // bias + + mult *= 2; + } // lstm - ctx_size += 2*16*n_filters*16*n_filters*2*2*ggml_type_size(GGML_TYPE_F32); // weights - ctx_size += 4*16*n_filters*2*2*ggml_type_size(GGML_TYPE_F32); // bias + ctx_size += 2 * n_lstm_layers * (mult*n_filters) * (4*mult*n_filters) * ggml_type_size(wtype); // weight_ih and weight_hh + ctx_size += 2 * n_lstm_layers * (4*mult*n_filters) * ggml_type_size(GGML_TYPE_F32); // bias_ih and bias_hh // final conv - ctx_size += 3*16*n_filters*hidden_dim*kernel_size*ggml_type_size(GGML_TYPE_F32); // upper bound on w_g, w_v and bias + ctx_size += kernel_size * (mult*n_filters) * hidden_dim * ggml_type_size(wtype); // weight + ctx_size += hidden_dim * ggml_type_size(GGML_TYPE_F32); // bias } - // decoder mirrors the encoder (same number of parameter), just double context size + // decoder mirrors the encoder (same number of parameters), just double context size ctx_size *= 2; // quantizer - { - ctx_size += n_q; // inited - ctx_size += n_q*1024; // cluster_size - ctx_size += 2*n_q*hidden_dim*1024; // embed and embed_avg - } + ctx_size += hidden_dim * n_bins * ggml_type_size(wtype); // embed ctx_size += 10ull*MB; // object overhead } @@ -376,10 +432,10 @@ bool encodec_load_model_weights(const std::string& fname, encodec_model& model) // final conv model.encoder.final_conv_w = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, kernel_size, mult*n_filters, hidden_dim); - model.encoder.final_conv_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_dim); + model.encoder.final_conv_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_dim); model.tensors["encoder.model.15.conv.conv.weight"] = model.encoder.final_conv_w; - model.tensors["encoder.model.15.conv.conv.bias"] = model.encoder.final_conv_b; + model.tensors["encoder.model.15.conv.conv.bias"] = model.encoder.final_conv_b; } // decoder @@ -463,9 +519,9 @@ bool encodec_load_model_weights(const std::string& fname, encodec_model& model) model.quantizer.blocks.resize(n_q); for (int i = 0; i < n_q; i++) { - model.quantizer.blocks[i].embed = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, hidden_dim, n_bins); + model.quantizer.blocks[i].embed = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, hidden_dim, n_bins); - model.tensors["quantizer.vq.layers." + std::to_string(i) + "._codebook.embed"] = model.quantizer.blocks[i].embed; + model.tensors["quantizer.vq.layers." + std::to_string(i) + "._codebook.embed"] = model.quantizer.blocks[i].embed; } } diff --git a/encodec.h b/encodec.h index 8270192..00dcb1c 100644 --- a/encodec.h +++ b/encodec.h @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include "ggml.h" @@ -16,21 +16,33 @@ static const size_t MB = 1024*1024; struct encodec_hparams { + // The number of input channels is always 1 (mono). int32_t in_channels = 1; + // The hidden dimension for the codebook. int32_t hidden_dim = 128; + // The number of filters for the first convolution. int32_t n_filters = 32; + // The filter size for upsampling and downsampling. int32_t ratios[4] = {8, 5, 4, 2}; + // The kernel size for the first convolution. int32_t kernel_size = 7; + // The kernel size for the residual blocks. int32_t residual_kernel_size = 3; + // Compression int32_t compress = 2; + // The number of layers in the LSTM modules. int32_t n_lstm_layers = 2; + // The stride of the first convolution. int32_t stride = 1; - // number of codebooks is determined by the bandwidth selected. - // Supported bandwidths are 1.5kbps (n_q = 2), 3 kbps (n_q = 4), 6 kbps (n_q = 8) and 12 kbps (n_q =16) and 24kbps (n_q=32). - int32_t n_q = 32; - int32_t n_bins = 1024; - int32_t sr = 24000; + // The number of codebooks is determined by the bandwidth selected. + // Supported bandwidths are 1.5kbps (n_q = 2), 3 kbps (n_q = 4), 6 kbps (n_q = 8), + // 12 kbps (n_q = 16) and 24kbps (n_q = 32). + int32_t n_q = 32; + int32_t n_bins = 1024; + int32_t sr = 24000; + + int32_t ftype; }; // res + downsample block at some ratio From a06c81e17cd2194361cce95a6179edb5fdb12e78 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Mon, 9 Oct 2023 17:58:59 +0200 Subject: [PATCH 2/4] parse model weights --- convert.py | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/convert.py b/convert.py index 3572d44..d4dddc7 100644 --- a/convert.py +++ b/convert.py @@ -43,11 +43,8 @@ parser.add_argument("--use-f16", type=bool, default=True) -def parse_codec_model(checkpoint, out_dir, use_f16): +def parse_codec_model(checkpoint, outfile, use_f16): """Load encodec model checkpoint.""" - outfile = open(out_dir, "wb") - outfile.write(struct.pack("i", 0x67676d6c)) # ggml magic - for name in checkpoint.keys(): if "weight_g" in name: # the tensor has already been parsed with the corresponding "weight_v" @@ -107,6 +104,27 @@ def parse_codec_model(checkpoint, out_dir, use_f16): outfile.close() +def parse_hparams(outfile, use_f16): + # for now this is hardcoded as we only support the 24Khz model + in_channels = 1 + hidden_dim = 128 + n_filters = 32 + kernel_size = 7 + residual_kernel_size = 3 + n_q = 32 + n_bins = 1024 + ftype = int(use_f16) + + outfile.write(struct.pack("i", in_channels)) + outfile.write(struct.pack("i", hidden_dim)) + outfile.write(struct.pack("i", n_filters)) + outfile.write(struct.pack("i", kernel_size)) + outfile.write(struct.pack("i", residual_kernel_size)) + outfile.write(struct.pack("i", n_q)) + outfile.write(struct.pack("i", n_bins)) + outfile.write(struct.pack("i", ftype)) + + if __name__ == "__main__": args = parser.parse_args() @@ -118,6 +136,15 @@ def parse_codec_model(checkpoint, out_dir, use_f16): outfile = Path(out_dir / "ggml-model.bin") checkpoint = torch.load(dir_model / "encodec_24khz-d7cc33bc.th", map_location="cpu") + + # Step 1: insert ggml magic + outfile = open(out_dir, "wb") + outfile.write(struct.pack("i", 0x67676d6c)) + + # Step 2: insert hyperparameters + parse_hparams(outfile, args.use_f16) + + # Step 3: insert weights parse_codec_model(checkpoint, outfile, args.use_f16) print("Done.") From 66e14fbbd7fdf516aa58a6424b7cda459083ea68 Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Mon, 9 Oct 2023 18:18:18 +0200 Subject: [PATCH 3/4] fix use_f16 --- convert.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/convert.py b/convert.py index d4dddc7..ac69a66 100644 --- a/convert.py +++ b/convert.py @@ -40,7 +40,7 @@ parser = argparse.ArgumentParser() parser.add_argument("--dir-model", type=str, required=True) parser.add_argument("--out-dir", type=str, required=True) -parser.add_argument("--use-f16", type=bool, default=True) +parser.add_argument("--use-f16", action="store_true") def parse_codec_model(checkpoint, outfile, use_f16): @@ -138,7 +138,7 @@ def parse_hparams(outfile, use_f16): checkpoint = torch.load(dir_model / "encodec_24khz-d7cc33bc.th", map_location="cpu") # Step 1: insert ggml magic - outfile = open(out_dir, "wb") + outfile = open(outfile, "wb") outfile.write(struct.pack("i", 0x67676d6c)) # Step 2: insert hyperparameters From 006397feca9a16d36577d608d14a3a423baec8ae Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Mon, 9 Oct 2023 18:29:26 +0200 Subject: [PATCH 4/4] weights are correctly loaded --- convert.py | 11 ++++++++++- encodec.cpp | 46 +++++++++++++++++++++++----------------------- 2 files changed, 33 insertions(+), 24 deletions(-) diff --git a/convert.py b/convert.py index ac69a66..f3860d2 100644 --- a/convert.py +++ b/convert.py @@ -45,6 +45,8 @@ def parse_codec_model(checkpoint, outfile, use_f16): """Load encodec model checkpoint.""" + n_f16, n_f32 = 0, 0 + for name in checkpoint.keys(): if "weight_g" in name: # the tensor has already been parsed with the corresponding "weight_v" @@ -78,18 +80,21 @@ def parse_codec_model(checkpoint, outfile, use_f16): print(f"Processing variable: {name} with shape: {var_data.shape}") if use_f16: - if "weight" in name: + if "weight" in name or "embed" in name: print(" Converting to float16") var_data = var_data.astype(np.float16) ftype_cur = 1 + n_f16 += 1 else: print(" Converting to float32") var_data = var_data.astype(np.float32) ftype_cur = 0 + n_f32 += 1 else: print(" Converting to float32") var_data = var_data.astype(np.float32) ftype_cur = 0 + n_f32 += 1 n_dims = len(var_data.shape) encoded_name = name.encode("utf-8") @@ -103,6 +108,10 @@ def parse_codec_model(checkpoint, outfile, use_f16): outfile.close() + print("\n") + print(f"n_f16: {n_f16} ({n_f16/(n_f16 + n_f32)*100:.0f}%)") + print(f"n_f32: {n_f32} ({n_f32/(n_f16 + n_f32)*100:.0f}%)") + def parse_hparams(outfile, use_f16): # for now this is hardcoded as we only support the 24Khz model diff --git a/encodec.cpp b/encodec.cpp index ac69c48..acbca37 100644 --- a/encodec.cpp +++ b/encodec.cpp @@ -367,7 +367,7 @@ bool encodec_load_model_weights(const std::string& fname, encodec_model& model) int mult = 1; // scaling factor for hidden size - model.encoder.init_conv_w = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, kernel_size, in_channels, mult*n_filters); + model.encoder.init_conv_w = ggml_new_tensor_3d(ctx, wtype, kernel_size, in_channels, mult*n_filters); model.encoder.init_conv_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, mult*n_filters); model.tensors["encoder.model.0.conv.conv.weight"] = model.encoder.init_conv_w; @@ -375,28 +375,28 @@ bool encodec_load_model_weights(const std::string& fname, encodec_model& model) for (int i = 0; i < 4; i++) { // conv1 - model.encoder.blocks[i].conv_1_w = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, res_kernel_sz, mult*n_filters, mult*n_filters/2); + model.encoder.blocks[i].conv_1_w = ggml_new_tensor_3d(ctx, wtype, res_kernel_sz, mult*n_filters, mult*n_filters/2); model.encoder.blocks[i].conv_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, mult*n_filters/2); model.tensors["encoder.model." + std::to_string(3*i+1) + ".block.1.conv.conv.weight"] = model.encoder.blocks[i].conv_1_w; model.tensors["encoder.model." + std::to_string(3*i+1) + ".block.1.conv.conv.bias"] = model.encoder.blocks[i].conv_1_b; // conv2 - model.encoder.blocks[i].conv_2_w = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 1, mult*n_filters/2, mult*n_filters); + model.encoder.blocks[i].conv_2_w = ggml_new_tensor_3d(ctx, wtype, 1, mult*n_filters/2, mult*n_filters); model.encoder.blocks[i].conv_2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, mult*n_filters); model.tensors["encoder.model." + std::to_string(3*i+1) + ".block.3.conv.conv.weight"] = model.encoder.blocks[i].conv_2_w; model.tensors["encoder.model." + std::to_string(3*i+1) + ".block.3.conv.conv.bias"] = model.encoder.blocks[i].conv_2_b; // shortcut conv - model.encoder.blocks[i].conv_sc_w = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 1, mult*n_filters, mult*n_filters); + model.encoder.blocks[i].conv_sc_w = ggml_new_tensor_3d(ctx, wtype, 1, mult*n_filters, mult*n_filters); model.encoder.blocks[i].conv_sc_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, mult*n_filters); model.tensors["encoder.model." + std::to_string(3*i+1) + ".shortcut.conv.conv.weight"] = model.encoder.blocks[i].conv_sc_w; model.tensors["encoder.model." + std::to_string(3*i+1) + ".shortcut.conv.conv.bias"] = model.encoder.blocks[i].conv_sc_b; // downsampling - model.encoder.blocks[i].ds_conv_w = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 2*ratios[3-i], mult*n_filters, mult*n_filters*2); + model.encoder.blocks[i].ds_conv_w = ggml_new_tensor_3d(ctx, wtype, 2*ratios[3-i], mult*n_filters, mult*n_filters*2); model.encoder.blocks[i].ds_conv_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, mult*n_filters*2); model.tensors["encoder.model." + std::to_string(3*(i+1)) + ".conv.conv.weight"] = model.encoder.blocks[i].ds_conv_w; @@ -406,14 +406,14 @@ bool encodec_load_model_weights(const std::string& fname, encodec_model& model) } // LSTM - model.encoder.lstm.l0_ih_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, mult*n_filters, 4*mult*n_filters); - model.encoder.lstm.l1_ih_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, mult*n_filters, 4*mult*n_filters); + model.encoder.lstm.l0_ih_w = ggml_new_tensor_2d(ctx, wtype, mult*n_filters, 4*mult*n_filters); + model.encoder.lstm.l1_ih_w = ggml_new_tensor_2d(ctx, wtype, mult*n_filters, 4*mult*n_filters); model.tensors["encoder.model.13.lstm.weight_ih_l0"] = model.encoder.lstm.l0_ih_w; model.tensors["encoder.model.13.lstm.weight_ih_l1"] = model.encoder.lstm.l1_ih_w; - model.encoder.lstm.l0_hh_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, mult*n_filters, 4*mult*n_filters); - model.encoder.lstm.l1_hh_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, mult*n_filters, 4*mult*n_filters); + model.encoder.lstm.l0_hh_w = ggml_new_tensor_2d(ctx, wtype, mult*n_filters, 4*mult*n_filters); + model.encoder.lstm.l1_hh_w = ggml_new_tensor_2d(ctx, wtype, mult*n_filters, 4*mult*n_filters); model.tensors["encoder.model.13.lstm.weight_hh_l0"] = model.encoder.lstm.l0_hh_w; model.tensors["encoder.model.13.lstm.weight_hh_l1"] = model.encoder.lstm.l1_hh_w; @@ -431,7 +431,7 @@ bool encodec_load_model_weights(const std::string& fname, encodec_model& model) model.tensors["encoder.model.13.lstm.bias_hh_l1"] = model.encoder.lstm.l1_hh_b; // final conv - model.encoder.final_conv_w = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, kernel_size, mult*n_filters, hidden_dim); + model.encoder.final_conv_w = ggml_new_tensor_3d(ctx, wtype, kernel_size, mult*n_filters, hidden_dim); model.encoder.final_conv_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, hidden_dim); model.tensors["encoder.model.15.conv.conv.weight"] = model.encoder.final_conv_w; @@ -444,21 +444,21 @@ bool encodec_load_model_weights(const std::string& fname, encodec_model& model) int mult = 16; // 2**len(ratios) - model.decoder.init_conv_w = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, kernel_size, hidden_dim, mult*n_filters); + model.decoder.init_conv_w = ggml_new_tensor_3d(ctx, wtype, kernel_size, hidden_dim, mult*n_filters); model.decoder.init_conv_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, mult*n_filters); model.tensors["decoder.model.0.conv.conv.weight"] = model.decoder.init_conv_w; model.tensors["decoder.model.0.conv.conv.bias"] = model.decoder.init_conv_b; // LSTM - model.decoder.lstm.l0_ih_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, mult*n_filters, 4*mult*n_filters); - model.decoder.lstm.l1_ih_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, mult*n_filters, 4*mult*n_filters); + model.decoder.lstm.l0_ih_w = ggml_new_tensor_2d(ctx, wtype, mult*n_filters, 4*mult*n_filters); + model.decoder.lstm.l1_ih_w = ggml_new_tensor_2d(ctx, wtype, mult*n_filters, 4*mult*n_filters); model.tensors["decoder.model.1.lstm.weight_ih_l0"] = model.decoder.lstm.l0_ih_w; model.tensors["decoder.model.1.lstm.weight_ih_l1"] = model.decoder.lstm.l1_ih_w; - model.decoder.lstm.l0_hh_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, mult*n_filters, 4*mult*n_filters); - model.decoder.lstm.l1_hh_w = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, mult*n_filters, 4*mult*n_filters); + model.decoder.lstm.l0_hh_w = ggml_new_tensor_2d(ctx, wtype, mult*n_filters, 4*mult*n_filters); + model.decoder.lstm.l1_hh_w = ggml_new_tensor_2d(ctx, wtype, mult*n_filters, 4*mult*n_filters); model.tensors["decoder.model.1.lstm.weight_hh_l0"] = model.decoder.lstm.l0_hh_w; model.tensors["decoder.model.1.lstm.weight_hh_l1"] = model.decoder.lstm.l1_hh_w; @@ -477,28 +477,28 @@ bool encodec_load_model_weights(const std::string& fname, encodec_model& model) for (int i = 0; i < 4; i++) { // upsampling - model.decoder.blocks[i].us_conv_w = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, ratios[i]*2, mult*n_filters/2, mult*n_filters); + model.decoder.blocks[i].us_conv_w = ggml_new_tensor_3d(ctx, wtype, ratios[i]*2, mult*n_filters/2, mult*n_filters); model.decoder.blocks[i].us_conv_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, mult*n_filters/2); model.tensors["decoder.model." + std::to_string(3*(i+1)) + ".convtr.convtr.weight"] = model.decoder.blocks[i].us_conv_w; model.tensors["decoder.model." + std::to_string(3*(i+1)) + ".convtr.convtr.bias"] = model.decoder.blocks[i].us_conv_b; // conv1 - model.decoder.blocks[i].conv_1_w = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, res_kernel_sz, mult*n_filters/2, mult*n_filters/4); + model.decoder.blocks[i].conv_1_w = ggml_new_tensor_3d(ctx, wtype, res_kernel_sz, mult*n_filters/2, mult*n_filters/4); model.decoder.blocks[i].conv_1_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, mult*n_filters/4); model.tensors["decoder.model." + std::to_string(3*(i+1)+1) + ".block.1.conv.conv.weight"] = model.decoder.blocks[i].conv_1_w; model.tensors["decoder.model." + std::to_string(3*(i+1)+1) + ".block.1.conv.conv.bias"] = model.decoder.blocks[i].conv_1_b; // conv2 - model.decoder.blocks[i].conv_2_w = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 1, mult*n_filters/4, mult*n_filters/2); + model.decoder.blocks[i].conv_2_w = ggml_new_tensor_3d(ctx, wtype, 1, mult*n_filters/4, mult*n_filters/2); model.decoder.blocks[i].conv_2_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, mult*n_filters/2); model.tensors["decoder.model." + std::to_string(3*(i+1)+1) + ".block.3.conv.conv.weight"] = model.decoder.blocks[i].conv_2_w; model.tensors["decoder.model." + std::to_string(3*(i+1)+1) + ".block.3.conv.conv.bias"] = model.decoder.blocks[i].conv_2_b; // shortcut - model.decoder.blocks[i].conv_sc_w = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 1, mult*n_filters/2, mult*n_filters/2); + model.decoder.blocks[i].conv_sc_w = ggml_new_tensor_3d(ctx, wtype, 1, mult*n_filters/2, mult*n_filters/2); model.decoder.blocks[i].conv_sc_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, mult*n_filters/2); model.tensors["decoder.model." + std::to_string(3*(i+1)+1) + ".shortcut.conv.conv.weight"] = model.decoder.blocks[i].conv_sc_w; @@ -507,8 +507,8 @@ bool encodec_load_model_weights(const std::string& fname, encodec_model& model) mult /= 2; } - model.decoder.final_conv_w = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, kernel_size, n_filters, in_channels); - model.decoder.final_conv_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, in_channels); + model.decoder.final_conv_w = ggml_new_tensor_3d(ctx, wtype, kernel_size, n_filters, in_channels); + model.decoder.final_conv_b = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, in_channels); model.tensors["decoder.model.15.conv.conv.weight"] = model.decoder.final_conv_w; model.tensors["decoder.model.15.conv.conv.bias"] = model.decoder.final_conv_b; @@ -519,7 +519,7 @@ bool encodec_load_model_weights(const std::string& fname, encodec_model& model) model.quantizer.blocks.resize(n_q); for (int i = 0; i < n_q; i++) { - model.quantizer.blocks[i].embed = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, hidden_dim, n_bins); + model.quantizer.blocks[i].embed = ggml_new_tensor_2d(ctx, wtype, hidden_dim, n_bins); model.tensors["quantizer.vq.layers." + std::to_string(i) + "._codebook.embed"] = model.quantizer.blocks[i].embed; } @@ -583,7 +583,7 @@ bool encodec_load_model_weights(const std::string& fname, encodec_model& model) infile.read(reinterpret_cast(tensor->data), ggml_nbytes(tensor)); - printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0); + // printf("%48s - [%5d, %5d, %5d], type = %6s, %6.2f MB\n", name.data(), ne[0], ne[1], ne[2], ftype == 0 ? "float" : "f16", ggml_nbytes(tensor)/1024.0/1024.0); total_size += ggml_nbytes(tensor); model.n_loaded++;