From 29029d0096ed254f257f1118ecb8282cc63edec8 Mon Sep 17 00:00:00 2001 From: Roberts Slisans Date: Thu, 31 Aug 2023 12:00:28 +0300 Subject: [PATCH 1/2] add max_gen_duration_s to waveform generation --- bark/api.py | 5 ++++- bark/generation.py | 12 ++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/bark/api.py b/bark/api.py index 7b646b7f..6e00c1e7 100644 --- a/bark/api.py +++ b/bark/api.py @@ -38,6 +38,7 @@ def semantic_to_waveform( temp: float = 0.7, silent: bool = False, output_full: bool = False, + max_gen_duration_s=None, ): """Generate audio array from semantic input. @@ -47,6 +48,7 @@ def semantic_to_waveform( temp: generation temperature (1.0 more diverse, 0.0 more conservative) silent: disable progress bar output_full: return full generation to be used as a history prompt + max_gen_duration_s: maximum duration of generated audio in seconds Returns: numpy audio array at sample frequency 24khz @@ -56,7 +58,8 @@ def semantic_to_waveform( history_prompt=history_prompt, temp=temp, silent=silent, - use_kv_caching=True + use_kv_caching=True, + max_gen_duration_s=max_gen_duration_s, ) fine_tokens = generate_fine( coarse_tokens, diff --git a/bark/generation.py b/bark/generation.py index 54f98709..cabf262d 100644 --- a/bark/generation.py +++ b/bark/generation.py @@ -538,6 +538,7 @@ def generate_coarse( max_coarse_history=630, # min 60 (faster), max 630 (more context) sliding_window_len=60, use_kv_caching=False, + max_gen_duration_s=None, ): """Generate coarse audio codes from semantic tokens.""" assert ( @@ -605,6 +606,17 @@ def generate_coarse( * N_COARSE_CODEBOOKS ) ) + + if max_gen_duration_s is not None: + n_steps = min( + n_steps, + int( + np.floor( + round(max_gen_duration_s * COARSE_RATE_HZ) + ) * N_COARSE_CODEBOOKS + ) + ) + assert n_steps > 0 and n_steps % N_COARSE_CODEBOOKS == 0 x_semantic = np.hstack([x_semantic_history, x_semantic]).astype(np.int32) x_coarse = x_coarse_history.astype(np.int32) From 0d91823ead3d87c317f12d01d325fca9408c669e Mon Sep 17 00:00:00 2001 From: Roberts Slisans Date: Thu, 31 Aug 2023 12:08:46 +0300 Subject: [PATCH 2/2] bump version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 16d284bd..bc647415 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "suno-bark" -version = "0.0.1a" +version = "0.1.0" description = "Bark text to audio model" readme = "README.md" requires-python = ">=3.8"