From 2ec8729d51a8561841d95b55bbe7fc4e7991eab2 Mon Sep 17 00:00:00 2001 From: Laurent Mazare Date: Thu, 22 Aug 2024 22:22:03 +0100 Subject: [PATCH] Fix for parler-tts, do not add the last slice of padding tokens. (#2442) * Fix for parler-tts, do not add the last slice of padding tokens. * Support for the mini model. --- candle-examples/examples/parler-tts/main.rs | 23 ++++++++++++++++++-- candle-transformers/src/models/parler_tts.rs | 1 - 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/candle-examples/examples/parler-tts/main.rs b/candle-examples/examples/parler-tts/main.rs index 4e3730e2cb..88e0ef8b30 100644 --- a/candle-examples/examples/parler-tts/main.rs +++ b/candle-examples/examples/parler-tts/main.rs @@ -86,6 +86,17 @@ struct Args { /// The output wav file. #[arg(long, default_value = "out.wav")] out_file: String, + + #[arg(long, default_value = "large-v1")] + which: Which, +} + +#[derive(Clone, Debug, Copy, PartialEq, Eq, clap::ValueEnum)] +enum Which { + #[value(name = "large-v1")] + LargeV1, + #[value(name = "mini-v1")] + MiniV1, } fn main() -> anyhow::Result<()> { @@ -117,7 +128,10 @@ fn main() -> anyhow::Result<()> { let api = hf_hub::api::sync::Api::new()?; let model_id = match args.model_id { Some(model_id) => model_id.to_string(), - None => "parler-tts/parler-tts-large-v1".to_string(), + None => match args.which { + Which::LargeV1 => "parler-tts/parler-tts-large-v1".to_string(), + Which::MiniV1 => "parler-tts/parler-tts-mini-v1".to_string(), + }, }; let revision = match args.revision { Some(r) => r, @@ -130,7 +144,12 @@ fn main() -> anyhow::Result<()> { )); let model_files = match args.model_file { Some(m) => vec![m.into()], - None => candle_examples::hub_load_safetensors(&repo, "model.safetensors.index.json")?, + None => match args.which { + Which::MiniV1 => vec![repo.get("model.safetensors")?], + Which::LargeV1 => { + candle_examples::hub_load_safetensors(&repo, "model.safetensors.index.json")? + } + }, }; let config = match args.config_file { Some(m) => m.into(), diff --git a/candle-transformers/src/models/parler_tts.rs b/candle-transformers/src/models/parler_tts.rs index 16023a7c6f..da40124741 100644 --- a/candle-transformers/src/models/parler_tts.rs +++ b/candle-transformers/src/models/parler_tts.rs @@ -429,7 +429,6 @@ impl Model { let min_len = all_audio_tokens.iter().map(|v| v.len()).min().unwrap_or(0); all_audio_tokens.iter_mut().for_each(|v| { v.resize(min_len, 0); - v.push(self.pad_token_id) }); let all_audio_tokens = Tensor::new(all_audio_tokens, &candle::Device::Cpu)?; Ok(all_audio_tokens)