From cd8d3cfdf5cafbf5d4efbc8a1a4ef3dbb627e37f Mon Sep 17 00:00:00 2001
From: Pierre-Antoine Bannier <pierreantoine.bannier@gmail.com>
Date: Fri, 19 Apr 2024 13:16:48 +0200
Subject: [PATCH] new API for bark small

---
 encodec.cpp | 53 ++++++++++++++++++++++++++++++++++++++++-------------
 encodec.h   | 11 +++++++++++
 2 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/encodec.cpp b/encodec.cpp
index d16fe75..0dc427e 100644
--- a/encodec.cpp
+++ b/encodec.cpp
@@ -257,21 +257,13 @@ static struct ggml_tensor *forward_pass_lstm_unilayer(
     return hs;
 }
 
-bool encodec_load_model_weights(const std::string &fname, encodec_model &model, int n_gpu_layers) {
-    fprintf(stderr, "%s: loading model from '%s'\n", __func__, fname.c_str());
-
-    auto infile = std::ifstream(fname, std::ios::binary);
-    if (!infile) {
-        fprintf(stderr, "%s: failed to open '%s'\n", __func__, fname.c_str());
-        return false;
-    }
-
+bool encodec_load_model_weights(std::ifstream &infile, encodec_model &model, int n_gpu_layers) {
     // verify magic (i.e. ggml signature in hex format)
     {
         uint32_t magic;
         read_safe(infile, magic);
         if (magic != ENCODEC_FILE_MAGIC) {
-            fprintf(stderr, "%s: invalid model file '%s' (bad magic)\n", __func__, fname.c_str());
+            fprintf(stderr, "%s: invalid model file (bad magic)\n", __func__);
             return false;
         }
     }
@@ -312,8 +304,8 @@ bool encodec_load_model_weights(const std::string &fname, encodec_model &model,
     // in order to save memory and also to speed up the computation
     ggml_type wtype = ggml_ftype_to_ggml_type((ggml_ftype)(model.hparams.ftype));
     if (wtype == GGML_TYPE_COUNT) {
-        fprintf(stderr, "%s: invalid model file '%s' (bad ftype value %d)\n",
-                __func__, fname.c_str(), model.hparams.ftype);
+        fprintf(stderr, "%s: invalid model file (bad ftype value %d)\n",
+                __func__, model.hparams.ftype);
         return 1;
     }
 
@@ -1341,13 +1333,48 @@ bool encodec_decompress_audio(
     return true;
 }
 
+struct encodec_context *encodec_load_model(std::ifstream &fin, int n_gpu_layers) {
+    int64_t t_start_load_us = ggml_time_us();
+
+    struct encodec_context *ectx = new encodec_context();
+
+    ectx->model = encodec_model();
+    if (!encodec_load_model_weights(fin, ectx->model, n_gpu_layers)) {
+        fprintf(stderr, "%s: failed to load model weights\n", __func__);
+        return {};
+    }
+
+    // pre-compute the number of codebooks required
+    int bandwidth = ectx->model.hparams.bandwidth;
+    int sr = ectx->model.hparams.sr;
+
+    int hop_length = 1;
+    for (int i = 0; i < 4; i++) {
+        hop_length *= ectx->model.hparams.ratios[i];
+    }
+    ectx->model.hparams.hop_length = hop_length;
+
+    ectx->model.hparams.n_q = get_num_codebooks(bandwidth, hop_length, sr);
+    fprintf(stderr, "%s: n_q = %d\n", __func__, ectx->model.hparams.n_q);
+
+    ectx->t_load_us = ggml_time_us() - t_start_load_us;
+
+    return ectx;
+}
+
 struct encodec_context *encodec_load_model(const std::string &model_path, int n_gpu_layers) {
     int64_t t_start_load_us = ggml_time_us();
 
+    auto infile = std::ifstream(model_path, std::ios::binary);
+    if (!infile) {
+        fprintf(stderr, "%s: failed to open '%s'\n", __func__, model_path.c_str());
+        return nullptr;
+    }
+
     struct encodec_context *ectx = new encodec_context();
 
     ectx->model = encodec_model();
-    if (!encodec_load_model_weights(model_path, ectx->model, n_gpu_layers)) {
+    if (!encodec_load_model_weights(infile, ectx->model, n_gpu_layers)) {
         fprintf(stderr, "%s: failed to load model weights from '%s'\n", __func__, model_path.c_str());
         return {};
     }
diff --git a/encodec.h b/encodec.h
index 62391b9..17dec20 100644
--- a/encodec.h
+++ b/encodec.h
@@ -192,6 +192,17 @@ struct encodec_context *encodec_load_model(
     const std::string &model_path,
     int n_gpu_layers);
 
+/**
+ * Loads an encodec model from an opened input file stream.
+ *
+ * @param fin The input file stream to read the encodec model from. The pointer in the file should be placed at the model data.
+ * @param n_gpu_layers The number of GPU layers to use.
+ * @return A pointer to the encodec context struct.
+ */
+struct encodec_context *encodec_load_model(
+    std::ifstream &fin,
+    int n_gpu_layers);
+
 /**
  * Sets the target bandwidth for the given encodec context.
  *