From cd8d3cfdf5cafbf5d4efbc8a1a4ef3dbb627e37f Mon Sep 17 00:00:00 2001 From: Pierre-Antoine Bannier Date: Fri, 19 Apr 2024 13:16:48 +0200 Subject: [PATCH] new API for bark small --- encodec.cpp | 53 ++++++++++++++++++++++++++++++++++++++++------------- encodec.h | 11 +++++++++++ 2 files changed, 51 insertions(+), 13 deletions(-) diff --git a/encodec.cpp b/encodec.cpp index d16fe75..0dc427e 100644 --- a/encodec.cpp +++ b/encodec.cpp @@ -257,21 +257,13 @@ static struct ggml_tensor *forward_pass_lstm_unilayer( return hs; } -bool encodec_load_model_weights(const std::string &fname, encodec_model &model, int n_gpu_layers) { - fprintf(stderr, "%s: loading model from '%s'\n", __func__, fname.c_str()); - - auto infile = std::ifstream(fname, std::ios::binary); - if (!infile) { - fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str()); - return false; - } - +bool encodec_load_model_weights(std::ifstream &infile, encodec_model &model, int n_gpu_layers) { // verify magic (i.e. ggml signature in hex format) { uint32_t magic; read_safe(infile, magic); if (magic != ENCODEC_FILE_MAGIC) { - fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str()); + fprintf(stderr, "%s: invalid model file (bad magic)\n", __func__); return false; } } @@ -312,8 +304,8 @@ bool encodec_load_model_weights(const std::string &fname, encodec_model &model, // in order to save memory and also to speed up the computation ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype)(model.hparams.ftype)); if (wtype == GGML_TYPE_COUNT) { - fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n", - __func__, fname.c_str(), model.hparams.ftype); + fprintf(stderr, "%s: invalid model file (bad ftype value %d)\n", + __func__, model.hparams.ftype); return 1; } @@ -1341,13 +1333,48 @@ bool encodec_decompress_audio( return true; } +struct encodec_context *encodec_load_model(std::ifstream &fin, int n_gpu_layers) { + int64_t t_start_load_us = ggml_time_us(); + + struct encodec_context *ectx = new encodec_context(); + + ectx->model = encodec_model(); + if (!encodec_load_model_weights(fin, ectx->model, n_gpu_layers)) { + fprintf(stderr, "%s: failed to load model weights\n", __func__); + return {}; + } + + // pre-compute the number of codebooks required + int bandwidth = ectx->model.hparams.bandwidth; + int sr = ectx->model.hparams.sr; + + int hop_length = 1; + for (int i = 0; i < 4; i++) { + hop_length *= ectx->model.hparams.ratios[i]; + } + ectx->model.hparams.hop_length = hop_length; + + ectx->model.hparams.n_q = get_num_codebooks(bandwidth, hop_length, sr); + fprintf(stderr, "%s: n_q = %d\n", __func__, ectx->model.hparams.n_q); + + ectx->t_load_us = ggml_time_us() - t_start_load_us; + + return ectx; +} + struct encodec_context *encodec_load_model(const std::string &model_path, int n_gpu_layers) { int64_t t_start_load_us = ggml_time_us(); + auto infile = std::ifstream(model_path, std::ios::binary); + if (!infile) { + fprintf(stderr, "%s: failed to open '%s'\n", __func__, model_path.c_str()); + return nullptr; + } + struct encodec_context *ectx = new encodec_context(); ectx->model = encodec_model(); - if (!encodec_load_model_weights(model_path, ectx->model, n_gpu_layers)) { + if (!encodec_load_model_weights(infile, ectx->model, n_gpu_layers)) { fprintf(stderr, "%s: failed to load model weights from '%s'\n", __func__, model_path.c_str()); return {}; } diff --git a/encodec.h b/encodec.h index 62391b9..17dec20 100644 --- a/encodec.h +++ b/encodec.h @@ -192,6 +192,17 @@ struct encodec_context *encodec_load_model( const std::string &model_path, int n_gpu_layers); +/** + * Loads an encodec model from an opened input file stream. + * + * @param fin The input file stream to read the encodec model from. The pointer in the file should be placed at the model data. + * @param n_gpu_layers The number of GPU layers to use. + * @return A pointer to the encodec context struct. + */ +struct encodec_context *encodec_load_model( + std::ifstream &fin, + int n_gpu_layers); + /** * Sets the target bandwidth for the given encodec context. *