Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
joehoover authored Jul 11, 2023
2 parents 0155bfa + d874966 commit dc8e1f7
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 18 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,13 @@ We offer a number of way to interact with MusicGen:
4. You can play with MusicGen by running the jupyter notebook at [`demo.ipynb`](./demo.ipynb) locally (if you have a GPU).
5. Checkout [@camenduru Colab page](https://github.com/camenduru/MusicGen-colab) which is regularly
updated with contributions from @camenduru and the community.
6. MusicGen is available in 🤗 Transformers from v4.31.0 onwards, see section [🤗 Transformers Usage](#-transformers-usage) below.
7. Finally, MusicGen is also [hosted](https://replicate.com/facebookresearch/musicgen) on Replicate, where you can interact with the model via web UI or API.
<<<<<<< main
6. Finally, MusicGen is available in 🤗 Transformers from v4.31.0 onwards, see section [🤗 Transformers Usage](#-transformers-usage) below.
7. MusicGen is also [hosted](https://replicate.com/facebookresearch/musicgen) on Replicate, where you can interact with the model via web UI or API.

=======
6. Finally, MusicGen is available in 🤗 Transformers from v4.31.0 onwards, see section [🤗 Transformers Usage](#-transformers-usage) below.
>>>>>>> main
## API

Expand Down
78 changes: 62 additions & 16 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,11 @@
import argparse
from concurrent.futures import ProcessPoolExecutor
import os
from pathlib import Path
import subprocess as sp
from tempfile import NamedTemporaryFile
import time
import typing as tp
import warnings

import torch
Expand Down Expand Up @@ -50,6 +52,29 @@ def interrupt():
INTERRUPTING = True


class FileCleaner:
def __init__(self, file_lifetime: float = 3600):
self.file_lifetime = file_lifetime
self.files = []

def add(self, path: tp.Union[str, Path]):
self._cleanup()
self.files.append((time.time(), Path(path)))

def _cleanup(self):
now = time.time()
for time_added, path in list(self.files):
if now - time_added > self.file_lifetime:
if path.exists():
path.unlink()
self.files.pop(0)
else:
break


file_cleaner = FileCleaner()


def make_waveform(*args, **kwargs):
# Further remove some warnings.
be = time.time()
Expand Down Expand Up @@ -103,8 +128,12 @@ def _do_predictions(texts, melodies, duration, progress=False, **gen_kwargs):
file.name, output, MODEL.sample_rate, strategy="loudness",
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
out_files.append(pool.submit(make_waveform, file.name))
file_cleaner.add(file.name)
res = [out_file.result() for out_file in out_files]
for file in res:
file_cleaner.add(file)
print("batch finished", len(texts), time.time() - be)
print("Tempfiles currently stored: ", len(file_cleaner.files))
return res


Expand Down Expand Up @@ -140,18 +169,21 @@ def _progress(generated, to_generate):
top_k=topk, top_p=topp, temperature=temperature, cfg_coef=cfg_coef)
return outs[0]


def toggle_audio_src(choice):
if choice == "mic":
return gr.update(source="microphone", value=None, label="Microphone")
else:
return gr.update(source="upload", value=None, label="File")



def ui_full(launch_kwargs):
with gr.Blocks() as interface:
gr.Markdown(
"""
# MusicGen
This is your private demo for [MusicGen](https://github.com/facebookresearch/audiocraft), a simple and controllable model for music generation
This is your private demo for [MusicGen](https://github.com/facebookresearch/audiocraft),
a simple and controllable model for music generation
presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284)
"""
)
Expand All @@ -160,14 +192,17 @@ def ui_full(launch_kwargs):
with gr.Row():
text = gr.Text(label="Input Text", interactive=True)
with gr.Column():
radio = gr.Radio(["file", "mic"], value="file", label="Condition on a melody (optional) File or Mic")
melody = gr.Audio(source="upload", type="numpy", label="File", interactive=True, elem_id="melody-input")
radio = gr.Radio(["file", "mic"], value="file",
label="Condition on a melody (optional) File or Mic")
melody = gr.Audio(source="upload", type="numpy", label="File",
interactive=True, elem_id="melody-input")
with gr.Row():
submit = gr.Button("Submit")
# Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
_ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
with gr.Row():
model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
model = gr.Radio(["melody", "medium", "small", "large"],
label="Model", value="melody", interactive=True)
with gr.Row():
duration = gr.Slider(minimum=1, maximum=120, value=10, label="Duration", interactive=True)
with gr.Row():
Expand All @@ -177,7 +212,9 @@ def ui_full(launch_kwargs):
cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
with gr.Column():
output = gr.Video(label="Generated Music")
submit.click(predict_full, inputs=[model, text, melody, duration, topk, topp, temperature, cfg_coef], outputs=[output])
submit.click(predict_full,
inputs=[model, text, melody, duration, topk, topp, temperature, cfg_coef],
outputs=[output])
radio.change(toggle_audio_src, radio, [melody], queue=False, show_progress=False)
gr.Examples(
fn=predict_full,
Expand Down Expand Up @@ -221,17 +258,20 @@ def ui_full(launch_kwargs):
This can take a long time, and the model might lose consistency. The model might also
decide at arbitrary positions that the song ends.
**WARNING:** Choosing long durations will take a long time to generate (2min might take ~10min). An overlap of 12 seconds
is kept with the previously generated chunk, and 18 "new" seconds are generated each time.
**WARNING:** Choosing long durations will take a long time to generate (2min might take ~10min).
An overlap of 12 seconds is kept with the previously generated chunk, and 18 "new" seconds
are generated each time.
We present 4 model variations:
1. Melody -- a music generation model capable of generating music condition on text and melody inputs. **Note**, you can also use text only.
1. Melody -- a music generation model capable of generating music condition
on text and melody inputs. **Note**, you can also use text only.
2. Small -- a 300M transformer decoder conditioned on text only.
3. Medium -- a 1.5B transformer decoder conditioned on text only.
4. Large -- a 3.3B transformer decoder conditioned on text only (might OOM for the longest sequences.)
When using `melody`, ou can optionaly provide a reference audio from
which a broad melody will be extracted. The model will then try to follow both the description and melody provided.
which a broad melody will be extracted. The model will then try to follow both
the description and melody provided.
You can also use your own GPU or a Google Colab by following the instructions on our repo.
See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft)
Expand All @@ -248,11 +288,14 @@ def ui_batched(launch_kwargs):
"""
# MusicGen
This is the demo for [MusicGen](https://github.com/facebookresearch/audiocraft), a simple and controllable model for music generation
This is the demo for [MusicGen](https://github.com/facebookresearch/audiocraft),
a simple and controllable model for music generation
presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284).
<br/>
<a href="https://huggingface.co/spaces/facebook/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
<img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
<a href="https://huggingface.co/spaces/facebook/MusicGen?duplicate=true"
style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
<img style="margin-bottom: 0em;display: inline;margin-top: -.25em;"
src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
for longer sequences, more control and no queue.</p>
"""
)
Expand All @@ -261,13 +304,16 @@ def ui_batched(launch_kwargs):
with gr.Row():
text = gr.Text(label="Describe your music", lines=2, interactive=True)
with gr.Column():
radio = gr.Radio(["file", "mic"], value="file", label="Condition on a melody (optional) File or Mic")
melody = gr.Audio(source="upload", type="numpy", label="File", interactive=True, elem_id="melody-input")
radio = gr.Radio(["file", "mic"], value="file",
label="Condition on a melody (optional) File or Mic")
melody = gr.Audio(source="upload", type="numpy", label="File",
interactive=True, elem_id="melody-input")
with gr.Row():
submit = gr.Button("Generate")
with gr.Column():
output = gr.Video(label="Generated Music")
submit.click(predict_batched, inputs=[text, melody], outputs=[output], batch=True, max_batch_size=MAX_BATCH_SIZE)
submit.click(predict_batched, inputs=[text, melody],
outputs=[output], batch=True, max_batch_size=MAX_BATCH_SIZE)
radio.change(toggle_audio_src, radio, [melody], queue=False, show_progress=False)
gr.Examples(
fn=predict_batched,
Expand Down

0 comments on commit dc8e1f7

Please sign in to comment.