From 170961b9ed12bd01253cda5ff3329bab3a29818f Mon Sep 17 00:00:00 2001 From: oxygen-dioxide <54425948+oxygen-dioxide@users.noreply.github.com> Date: Mon, 12 Aug 2024 20:39:27 +0800 Subject: [PATCH 1/2] support phonemes.json file --- .../DiffSinger/DiffSingerBasePhonemizer.cs | 39 ++++++++++++++--- OpenUtau.Core/DiffSinger/DiffSingerConfig.cs | 2 + OpenUtau.Core/DiffSinger/DiffSingerPitch.cs | 42 +++++++++++++++---- .../DiffSinger/DiffSingerRenderer.cs | 14 ++++++- OpenUtau.Core/DiffSinger/DiffSingerSinger.cs | 26 +++++++++--- OpenUtau.Core/DiffSinger/DiffSingerUtils.cs | 39 +++++++++++++++++ .../DiffSinger/DiffSingerVariance.cs | 39 ++++++++++++++--- 7 files changed, 177 insertions(+), 24 deletions(-) diff --git a/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs b/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs index 0305bce39..1e3fcf288 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs @@ -17,6 +17,7 @@ public abstract class DiffSingerBasePhonemizer : MachineLearningPhonemizer { USinger singer; DsConfig dsConfig; + DictionarylanguageIds = new Dictionary(); string rootPath; float frameMs; ulong linguisticHash; @@ -24,7 +25,7 @@ public abstract class DiffSingerBasePhonemizer : MachineLearningPhonemizer InferenceSession linguisticModel; InferenceSession durationModel; IG2p g2p; - List phonemes; + Dictionary phonemeTokens; DiffSingerSpeakerEmbedManager speakerEmbedManager; string defaultPause = "SP"; @@ -53,12 +54,26 @@ public override void SetSinger(USinger singer) { Log.Error(e, $"failed to load dsconfig from {configPath}"); return; } + //Load language id if needed + if(dsConfig.use_lang_id){ + if(dsConfig.languages == null){ + Log.Error("\"languages\" field is not specified in dsconfig.yaml"); + return; + } + var langIdPath = Path.Join(rootPath, dsConfig.languages); + try { + languageIds = DiffSingerUtils.LoadLanguageIds(langIdPath); + } catch (Exception e) { + Log.Error(e, $"failed to load language id from {langIdPath}"); + return; + } + } this.frameMs = dsConfig.frameMs(); //Load g2p g2p = LoadG2p(rootPath); //Load phonemes list string phonemesPath = Path.Combine(rootPath, dsConfig.phonemes); - phonemes = File.ReadLines(phonemesPath,singer.TextFileEncoding).ToList(); + phonemeTokens = DiffSingerUtils.LoadPhonemes(phonemesPath); //Load models var linguisticModelPath = Path.Join(rootPath, dsConfig.linguistic); try { @@ -228,11 +243,11 @@ public DiffSingerSpeakerEmbedManager getSpeakerEmbedManager(){ } int PhonemeTokenize(string phoneme){ - int result = phonemes.IndexOf(phoneme); - if(result < 0){ + bool success = phonemeTokens.TryGetValue(phoneme, out int token); + if(!success){ throw new Exception($"Phoneme \"{phoneme}\" isn't supported by timing model. Please check {Path.Combine(rootPath, dsConfig.phonemes)}"); } - return result; + return token; } protected override void ProcessPart(Note[][] phrase) { @@ -290,6 +305,16 @@ protected override void ProcessPart(Note[][] phrase) { linguisticInputs.Add(NamedOnnxValue.CreateFromTensor("word_dur", new DenseTensor(word_dur, new int[] { word_dur.Length }, false) .Reshape(new int[] { 1, word_dur.Length }))); + //Language id + if(dsConfig.use_lang_id){ + var langIdByPhone = phrasePhonemes + .SelectMany(n => n.Phonemes) + .Select(p => (long)languageIds.GetValueOrDefault(p.Language(), 0)) + .ToArray(); + var langIdTensor = new DenseTensor(langIdByPhone, new int[] { langIdByPhone.Length }, false) + .Reshape(new int[] { 1, langIdByPhone.Length }); + linguisticInputs.Add(NamedOnnxValue.CreateFromTensor("languages", langIdTensor)); + } Onnx.VerifyInputNames(linguisticModel, linguisticInputs); var linguisticCache = Preferences.Default.DiffSingerTensorCache ? new DiffSingerCache(linguisticHash, linguisticInputs) @@ -393,6 +418,10 @@ public dsPhoneme(string symbol, string speaker){ Symbol = symbol; Speaker = speaker; } + + public string Language(){ + return DiffSingerUtils.PhonemeLanguage(Symbol); + } } class phonemesPerNote{ diff --git a/OpenUtau.Core/DiffSinger/DiffSingerConfig.cs b/OpenUtau.Core/DiffSinger/DiffSingerConfig.cs index 4486250ba..46b863aa2 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerConfig.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerConfig.cs @@ -16,6 +16,7 @@ public class AugmentationArgs { [Serializable] public class DsConfig { public string phonemes = "phonemes.txt"; + public string languages; public string acoustic; public string vocoder; public List speakers; @@ -28,6 +29,7 @@ public class DsConfig { public bool useTensionEmbed = false; public AugmentationArgs augmentationArgs; public bool useContinuousAcceleration = false; + public bool use_lang_id = false; [YamlMember(Alias = "use_shallow_diffusion")] public bool? _useShallowDiffusion; [YamlMember(Alias = "use_variable_depth")] public bool? _useVariableDepth; [YamlIgnore] diff --git a/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs b/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs index 2bf4f61d3..e73d02d05 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerPitch.cs @@ -2,8 +2,6 @@ using System.Collections.Generic; using System.IO; using System.Linq; -using System.Runtime.CompilerServices; -using System.Text; using K4os.Hash.xxHash; using Microsoft.ML.OnnxRuntime; using Microsoft.ML.OnnxRuntime.Tensors; @@ -19,7 +17,8 @@ public class DsPitch : IDisposable { string rootPath; DsConfig dsConfig; - List phonemes; + Dictionary languageIds = new Dictionary(); + Dictionary phonemeTokens; ulong linguisticHash; InferenceSession linguisticModel; InferenceSession pitchModel; @@ -39,9 +38,23 @@ public DsPitch(string rootPath) if(dsConfig.pitch == null){ throw new Exception("This voicebank doesn't contain a pitch model"); } + //Load language id if needed + if(dsConfig.use_lang_id){ + if(dsConfig.languages == null){ + Log.Error("\"languages\" field is not specified in dsconfig.yaml"); + return; + } + var langIdPath = Path.Join(rootPath, dsConfig.languages); + try { + languageIds = DiffSingerUtils.LoadLanguageIds(langIdPath); + } catch (Exception e) { + Log.Error(e, $"failed to load language id from {langIdPath}"); + return; + } + } //Load phonemes list string phonemesPath = Path.Combine(rootPath, dsConfig.phonemes); - phonemes = File.ReadLines(phonemesPath, Encoding.UTF8).ToList(); + phonemeTokens = DiffSingerUtils.LoadPhonemes(phonemesPath); //Load models var linguisticModelPath = Path.Join(rootPath, dsConfig.linguistic); var linguisticModelBytes = File.ReadAllBytes(linguisticModelPath); @@ -81,11 +94,11 @@ void SetRange(T[] list, T value, int startIndex, int endIndex){ } int PhonemeTokenize(string phoneme){ - int result = phonemes.IndexOf(phoneme); - if(result < 0){ + bool success = phonemeTokens.TryGetValue(phoneme, out int token); + if(!success){ throw new Exception($"Phoneme \"{phoneme}\" isn't supported by pitch model. Please check {Path.Combine(rootPath, dsConfig.phonemes)}"); } - return result; + return token; } public RenderPitchResult Process(RenderPhrase phrase){ @@ -133,12 +146,25 @@ public RenderPitchResult Process(RenderPhrase phrase){ linguisticInputs.Add(NamedOnnxValue.CreateFromTensor("word_dur", new DenseTensor(word_dur, new int[] { word_dur.Length }, false) .Reshape(new int[] { 1, word_dur.Length }))); - }else{ + } else { //if predict_dur is false, use phoneme encode mode linguisticInputs.Add(NamedOnnxValue.CreateFromTensor("ph_dur", new DenseTensor(ph_dur.Select(x=>(Int64)x).ToArray(), new int[] { ph_dur.Length }, false) .Reshape(new int[] { 1, ph_dur.Length }))); } + //Language id + if(dsConfig.use_lang_id){ + var langIdByPhone = phrase.phones + .Select(p => (long)languageIds.GetValueOrDefault( + DiffSingerUtils.PhonemeLanguage(p.phoneme),0 + )) + .Prepend(0) + .Append(0) + .ToArray(); + var langIdTensor = new DenseTensor(langIdByPhone, new int[] { langIdByPhone.Length }, false) + .Reshape(new int[] { 1, langIdByPhone.Length }); + linguisticInputs.Add(NamedOnnxValue.CreateFromTensor("languages", langIdTensor)); + } Onnx.VerifyInputNames(linguisticModel, linguisticInputs); var linguisticCache = Preferences.Default.DiffSingerTensorCache diff --git a/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs b/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs index 5944fe223..8437723be 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerRenderer.cs @@ -258,7 +258,19 @@ float[] InvokeDiffsinger(RenderPhrase phrase, double depth, int steps, Cancellat acousticInputs.Add(NamedOnnxValue.CreateFromTensor("speedup", new DenseTensor(new long[] { speedup }, new int[] { 1 }, false))); } - + //Language id + if(singer.dsConfig.use_lang_id){ + var langIdByPhone = phrase.phones + .Select(p => (long)singer.languageIds.GetValueOrDefault( + DiffSingerUtils.PhonemeLanguage(p.phoneme),0 + )) + .Prepend(0) + .Append(0) + .ToArray(); + var langIdTensor = new DenseTensor(langIdByPhone, new int[] { langIdByPhone.Length }, false) + .Reshape(new int[] { 1, langIdByPhone.Length }); + acousticInputs.Add(NamedOnnxValue.CreateFromTensor("languages", langIdTensor)); + } //speaker if(singer.dsConfig.speakers != null) { var speakerEmbedManager = singer.getSpeakerEmbedManager(); diff --git a/OpenUtau.Core/DiffSinger/DiffSingerSinger.cs b/OpenUtau.Core/DiffSinger/DiffSingerSinger.cs index 9eb114dbe..8a39af6a5 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerSinger.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerSinger.cs @@ -41,8 +41,9 @@ class DiffSingerSinger : USinger { List subbanks = new List(); List otos = new List(); Dictionary otoMap = new Dictionary(); - public List phonemes = new List(); + Dictionary phonemeTokens; + public Dictionary languageIds = new Dictionary(); public DsConfig dsConfig; public ulong acousticHash; public InferenceSession acousticSession = null; @@ -94,7 +95,8 @@ public DiffSingerSinger(Voicebank voicebank) { string phonemesPath = Path.Combine(Location, dsConfig.phonemes); if(phonemesPath != null && File.Exists(phonemesPath)){ try { - phonemes = File.ReadLines(phonemesPath, TextFileEncoding).ToList(); + phonemeTokens = DiffSingerUtils.LoadPhonemes(phonemesPath); + phonemes = phonemeTokens.Keys.ToList(); } catch (Exception e){ Log.Error(e, $"Failed to load phoneme list for {Name} from {phonemesPath}"); } @@ -102,6 +104,20 @@ public DiffSingerSinger(Voicebank voicebank) { Log.Error($"phonemes file not found for {Name} at {phonemesPath}"); } + //Load language Id if needed + if(dsConfig.use_lang_id){ + if(dsConfig.languages == null){ + Log.Error("\"languages\" field is not specified in dsconfig.yaml"); + } else { + var langIdPath = Path.Join(Location, dsConfig.languages); + try { + languageIds = DiffSingerUtils.LoadLanguageIds(langIdPath); + } catch (Exception e) { + Log.Error(e, $"failed to load language id from {langIdPath}"); + } + } + } + var dummyOtoSet = new UOtoSet(new OtoSet(), Location); foreach (var phone in phonemes) { var uOto = UOto.OfDummy(phone); @@ -194,11 +210,11 @@ public DsVariance getVariancePredictor(){ } public int PhonemeTokenize(string phoneme){ - int result = phonemes.IndexOf(phoneme); - if(result < 0){ + bool success = phonemeTokens.TryGetValue(phoneme, out int token); + if(!success){ throw new Exception($"Phoneme \"{phoneme}\" isn't supported by acoustic model. Please check {Path.Combine(Location, dsConfig.phonemes)}"); } - return result; + return token; } public override void FreeMemory(){ diff --git a/OpenUtau.Core/DiffSinger/DiffSingerUtils.cs b/OpenUtau.Core/DiffSinger/DiffSingerUtils.cs index d56c788e0..19876e2ed 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerUtils.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerUtils.cs @@ -1,5 +1,9 @@ using System; +using System.Collections.Generic; +using System.IO; +using System.Text; using Microsoft.ML.OnnxRuntime.Tensors; +using Newtonsoft.Json; using OpenUtau.Core.Render; namespace OpenUtau.Core.DiffSinger { @@ -95,5 +99,40 @@ public static string ShapeString(Tensor tensor){ var shape = tensor.Dimensions; return "(" + string.Join(", ", shape.ToArray()) + ")"; } + + public static Dictionary LoadPhonemes(string filePath){ + switch(Path.GetExtension(filePath).ToLower()){ + case ".json": + return LoadPhonemesFromJson(filePath); + default: + return LoadPhonemesFromTxt(filePath); + } + } + + static Dictionary LoadPhonemesFromJson(string filePath){ + var json = File.ReadAllText(filePath, Encoding.UTF8); + return JsonConvert.DeserializeObject>(json); + } + + static Dictionary LoadPhonemesFromTxt(string filePath){ + var lines = File.ReadAllLines(filePath, Encoding.UTF8); + var result = new Dictionary(); + for (int i = 0; i < lines.Length; i++) { + result[lines[i]] = i; + } + return result; + } + + public static Dictionary LoadLanguageIds(string filePath){ + var json = File.ReadAllText(filePath, Encoding.UTF8); + return JsonConvert.DeserializeObject>(json); + } + + public static string PhonemeLanguage(string phoneme){ + if(phoneme.Contains("/")){ + return phoneme.Split("/")[0]; + } + return ""; + } } } diff --git a/OpenUtau.Core/DiffSinger/DiffSingerVariance.cs b/OpenUtau.Core/DiffSinger/DiffSingerVariance.cs index c0e97612e..6ed09c849 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerVariance.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerVariance.cs @@ -22,7 +22,8 @@ public struct VarianceResult{ public class DsVariance : IDisposable{ string rootPath; DsConfig dsConfig; - List phonemes; + Dictionary languageIds = new Dictionary(); + Dictionary phonemeTokens; ulong linguisticHash; ulong varianceHash; InferenceSession linguisticModel; @@ -40,9 +41,23 @@ public DsVariance(string rootPath) dsConfig = Yaml.DefaultDeserializer.Deserialize( File.ReadAllText(Path.Combine(rootPath, "dsconfig.yaml"), Encoding.UTF8)); + //Load language id if needed + if(dsConfig.use_lang_id){ + if(dsConfig.languages == null){ + Log.Error("\"languages\" field is not specified in dsconfig.yaml"); + return; + } + var langIdPath = Path.Join(rootPath, dsConfig.languages); + try { + languageIds = DiffSingerUtils.LoadLanguageIds(langIdPath); + } catch (Exception e) { + Log.Error(e, $"failed to load language id from {langIdPath}"); + return; + } + } //Load phonemes list string phonemesPath = Path.Combine(rootPath, dsConfig.phonemes); - phonemes = File.ReadLines(phonemesPath, Encoding.UTF8).ToList(); + phonemeTokens = DiffSingerUtils.LoadPhonemes(phonemesPath); //Load models var linguisticModelPath = Path.Join(rootPath, dsConfig.linguistic); var linguisticModelBytes = File.ReadAllBytes(linguisticModelPath); @@ -78,12 +93,13 @@ public DiffSingerSpeakerEmbedManager getSpeakerEmbedManager(){ } int PhonemeTokenize(string phoneme){ - int result = phonemes.IndexOf(phoneme); - if(result < 0){ + bool success = phonemeTokens.TryGetValue(phoneme, out int token); + if(!success){ throw new Exception($"Phoneme \"{phoneme}\" isn't supported by variance model. Please check {Path.Combine(rootPath, dsConfig.phonemes)}"); } - return result; + return token; } + public VarianceResult Process(RenderPhrase phrase){ int headFrames = (int)Math.Round(headMs / frameMs); int tailFrames = (int)Math.Round(tailMs / frameMs); @@ -132,6 +148,19 @@ public VarianceResult Process(RenderPhrase phrase){ new DenseTensor(ph_dur.Select(x=>(Int64)x).ToArray(), new int[] { ph_dur.Length }, false) .Reshape(new int[] { 1, ph_dur.Length }))); } + //Language id + if(dsConfig.use_lang_id){ + var langIdByPhone = phrase.phones + .Select(p => (long)languageIds.GetValueOrDefault( + DiffSingerUtils.PhonemeLanguage(p.phoneme),0 + )) + .Prepend(0) + .Append(0) + .ToArray(); + var langIdTensor = new DenseTensor(langIdByPhone, new int[] { langIdByPhone.Length }, false) + .Reshape(new int[] { 1, langIdByPhone.Length }); + linguisticInputs.Add(NamedOnnxValue.CreateFromTensor("languages", langIdTensor)); + } Onnx.VerifyInputNames(linguisticModel, linguisticInputs); var linguisticCache = Preferences.Default.DiffSingerTensorCache From d49a9afd169b06008d41e0071b37624c1ac4f3fc Mon Sep 17 00:00:00 2001 From: oxygen-dioxide <54425948+oxygen-dioxide@users.noreply.github.com> Date: Fri, 23 Aug 2024 14:03:46 +0800 Subject: [PATCH 2/2] support phonetic hint without language prefix --- .../DiffSinger/DiffSingerBasePhonemizer.cs | 35 +++++++++++++++---- .../DiffSingerChinesePhonemizer.cs | 1 + .../DiffSingerEnglishPhonemizer.cs | 1 + .../Phonemizers/DiffSingerGermanPhonemizer.cs | 1 + .../DiffSingerItalianPhonemizer.cs | 1 + .../DiffSingerJapanesePhonemizer.cs | 1 + .../DiffSingerJyutpingPhonemizer.cs | 1 + .../DiffSingerKoreanG2PPhonemizer.cs | 1 + .../Phonemizers/DiffSingerKoreanPhonemizer.cs | 1 + .../DiffSingerPortuguesePhonemizer.cs | 1 + .../DiffSingerRussianPhonemizer.cs | 1 + .../DiffSingerSpanishPhonemizer.cs | 1 + 12 files changed, 39 insertions(+), 7 deletions(-) diff --git a/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs b/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs index 1e3fcf288..dc738f93e 100644 --- a/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/DiffSingerBasePhonemizer.cs @@ -30,6 +30,7 @@ public abstract class DiffSingerBasePhonemizer : MachineLearningPhonemizer string defaultPause = "SP"; protected virtual string GetDictionaryName()=>"dsdict.yaml"; + protected virtual string GetLangCode()=>String.Empty;//The language code of the language the phonemizer is made for public override void SetSinger(USinger singer) { this.singer = singer; @@ -121,6 +122,29 @@ protected virtual IG2p LoadG2p(string rootPath) { return new G2pFallbacks(g2ps.ToArray()); } + //Check if the phoneme is supported. If unsupported, return an empty string. + //And apply language prefix to phoneme + string ValidatePhoneme(string phoneme){ + if(g2p.IsValidSymbol(phoneme)){ + return phoneme; + } + var langCode = GetLangCode(); + if(langCode != String.Empty){ + var phonemeWithLanguage = langCode + "/" + phoneme; + if(g2p.IsValidSymbol(phonemeWithLanguage)){ + return phonemeWithLanguage; + } + } + return String.Empty; + } + + string[] ParsePhoneticHint(string phoneticHint) { + return phoneticHint.Split() + .Select(ValidatePhoneme) + .Where(s => !String.IsNullOrEmpty(s)) // skip invalid symbols. + .ToArray(); + } + string[] GetSymbols(Note note) { //priority: //1. phonetic hint @@ -129,9 +153,7 @@ string[] GetSymbols(Note note) { //4. empty if (!string.IsNullOrEmpty(note.phoneticHint)) { // Split space-separated symbols into an array. - return note.phoneticHint.Split() - .Where(s => g2p.IsValidSymbol(s)) // skip the invalid symbols. - .ToArray(); + return ParsePhoneticHint(note.phoneticHint); } // User has not provided hint, query g2p dictionary. var g2presult = g2p.Query(note.lyric) @@ -139,10 +161,8 @@ string[] GetSymbols(Note note) { if(g2presult != null) { return g2presult; } - //not founded in g2p dictionary, treat lyric as phonetic hint - var lyricSplited = note.lyric.Split() - .Where(s => g2p.IsValidSymbol(s)) // skip the invalid symbols. - .ToArray(); + //not found in g2p dictionary, treat lyric as phonetic hint + var lyricSplited = ParsePhoneticHint(note.lyric); if (lyricSplited.Length > 0) { return lyricSplited; } @@ -183,6 +203,7 @@ List ProcessWord(Note[] notes, string[] symbols){ } for(int i=0; i=2 && isGlide[i-1] && !isVowel[i-2]){ isStart[i-1] = true; }else{ diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerChinesePhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerChinesePhonemizer.cs index e32a6d0d0..55f5b1f3b 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerChinesePhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerChinesePhonemizer.cs @@ -6,6 +6,7 @@ namespace OpenUtau.Core.DiffSinger { [Phonemizer("DiffSinger Chinese Phonemizer", "DIFFS ZH", language: "ZH")] public class DiffSingerChinesePhonemizer : DiffSingerBasePhonemizer { protected override string GetDictionaryName()=>"dsdict-zh.yaml"; + protected override string GetLangCode()=>"zh"; protected override string[] Romanize(IEnumerable lyrics) { return BaseChinesePhonemizer.Romanize(lyrics); } diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerEnglishPhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerEnglishPhonemizer.cs index bc9c912ca..f975ed7db 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerEnglishPhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerEnglishPhonemizer.cs @@ -7,6 +7,7 @@ namespace OpenUtau.Core.DiffSinger public class DiffSingerEnglishPhonemizer : DiffSingerG2pPhonemizer { protected override string GetDictionaryName()=>"dsdict-en.yaml"; + protected override string GetLangCode()=>"en"; protected override IG2p LoadBaseG2p() => new ArpabetG2p(); protected override string[] GetBaseG2pVowels() => new string[] { "aa", "ae", "ah", "ao", "aw", "ay", "eh", "er", diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerGermanPhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerGermanPhonemizer.cs index 5acd2854e..75908e04e 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerGermanPhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerGermanPhonemizer.cs @@ -7,6 +7,7 @@ namespace OpenUtau.Core.DiffSinger public class DiffSingerGermanPhonemizer : DiffSingerG2pPhonemizer { protected override string GetDictionaryName()=>"dsdict-de.yaml"; + protected override string GetLangCode()=>"de"; protected override IG2p LoadBaseG2p() => new GermanG2p(); protected override string[] GetBaseG2pVowels() => new string[] { "aa", "ae", "ah", "ao", "aw", "ax", "ay", "ee", "eh", "er", "ex", "ih", "iy", "oe", "ohh", "ooh", "oy", "ue", "uh", "uw", "yy" diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerItalianPhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerItalianPhonemizer.cs index 6d03c13be..87a354b18 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerItalianPhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerItalianPhonemizer.cs @@ -5,6 +5,7 @@ namespace OpenUtau.Core.DiffSinger { [Phonemizer("DiffSinger Italian Phonemizer", "DIFFS IT", language: "IT")] public class DiffSingerItalianPhonemizer : DiffSingerG2pPhonemizer { protected override string GetDictionaryName() => "dsdict-it.yaml"; + protected override string GetLangCode()=>"it"; protected override IG2p LoadBaseG2p() => new ItalianG2p(); protected override string[] GetBaseG2pVowels() => new string[] { "a", "a1", "e", "e1", "EE", "i", "i1", "o", "o1", "OO", "u", "u1" diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs index 6e7d8ebf4..72291c1c9 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJapanesePhonemizer.cs @@ -8,6 +8,7 @@ namespace OpenUtau.Core.DiffSinger { [Phonemizer("DiffSinger Japanese Phonemizer", "DIFFS JA", language: "JA")] public class DiffSingerJapanesePhonemizer : DiffSingerG2pPhonemizer { protected override string GetDictionaryName()=>"dsdict-ja.yaml"; + protected override string GetLangCode()=>"ja"; protected override IG2p LoadBaseG2p() => new JapaneseMonophoneG2p(); protected override string[] GetBaseG2pVowels() => new string[] { "A", "AP", "E", "I", "N", "O", "SP", "U", diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJyutpingPhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJyutpingPhonemizer.cs index 0e84b153b..6b19f8919 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJyutpingPhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerJyutpingPhonemizer.cs @@ -7,6 +7,7 @@ namespace OpenUtau.Core.DiffSinger { [Phonemizer("DiffSinger Jyutping Phonemizer", "DIFFS ZH-YUE", language: "ZH-YUE")] public class DiffSingerJyutpingPhonemizer : DiffSingerBasePhonemizer { protected override string GetDictionaryName() => "dsdict-zh-yue.yaml"; + protected override string GetLangCode()=>"yue"; protected override string[] Romanize(IEnumerable lyrics) { return ZhG2p.CantoneseInstance.Convert(lyrics.ToList(), false, true).Split(" "); } diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerKoreanG2PPhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerKoreanG2PPhonemizer.cs index c54212ac1..64091b837 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerKoreanG2PPhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerKoreanG2PPhonemizer.cs @@ -7,6 +7,7 @@ namespace OpenUtau.Core.DiffSinger public class DiffSingerKoreanG2PPhonemizer : DiffSingerG2pPhonemizer { protected override string GetDictionaryName() => "dsdict-ko.yaml"; + protected override string GetLangCode()=>"ko"; protected override IG2p LoadBaseG2p() => new KoreanG2p(); protected override string[] GetBaseG2pVowels() => new string[] { "a", "e", "eo", "eu", "i", "o", "u", "w", "y" diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerKoreanPhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerKoreanPhonemizer.cs index e3c641418..39ac4832b 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerKoreanPhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerKoreanPhonemizer.cs @@ -9,6 +9,7 @@ namespace OpenUtau.Core.DiffSinger public class DiffSingerKoreanPhonemizer : DiffSingerBasePhonemizer { protected override string GetDictionaryName()=>"dsdict-ko.yaml"; + protected override string GetLangCode()=>"ko"; public override void SetUp(Note[][] groups, UProject project, UTrack track) { if (groups.Length == 0) { diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerPortuguesePhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerPortuguesePhonemizer.cs index 78bd965c4..e95732ade 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerPortuguesePhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerPortuguesePhonemizer.cs @@ -7,6 +7,7 @@ namespace OpenUtau.Core.DiffSinger public class DiffSingerPortuguesePhonemizer : DiffSingerG2pPhonemizer { protected override string GetDictionaryName()=>"dsdict-pt.yaml"; + protected override string GetLangCode()=>"pt"; protected override IG2p LoadBaseG2p() => new PortugueseG2p(); protected override string[] GetBaseG2pVowels() => new string[] { "E", "O", "a", "a~", "e", "e~", "i", "i~", "o", "o~", "u", "u~" diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerRussianPhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerRussianPhonemizer.cs index 3e0dade02..9c4b79316 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerRussianPhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerRussianPhonemizer.cs @@ -7,6 +7,7 @@ namespace OpenUtau.Core.DiffSinger public class DiffSingerRussianPhonemizer : DiffSingerG2pPhonemizer { protected override string GetDictionaryName()=>"dsdict-ru.yaml"; + protected override string GetLangCode()=>"ru"; protected override IG2p LoadBaseG2p() => new RussianG2p(); protected override string[] GetBaseG2pVowels() => new string[] { "a", "aa", "ay", "ee", "i", "ii", "ja", "je", "jo", "ju", "oo", diff --git a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerSpanishPhonemizer.cs b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerSpanishPhonemizer.cs index c8ab7aae0..1ebb95813 100644 --- a/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerSpanishPhonemizer.cs +++ b/OpenUtau.Core/DiffSinger/Phonemizers/DiffSingerSpanishPhonemizer.cs @@ -7,6 +7,7 @@ namespace OpenUtau.Core.DiffSinger public class DiffSingerSpanishPhonemizer : DiffSingerG2pPhonemizer { protected override string GetDictionaryName()=>"dsdict-es.yaml"; + protected override string GetLangCode()=>"es"; protected override IG2p LoadBaseG2p() => new SpanishG2p(); protected override string[] GetBaseG2pVowels() => new string[] { "a", "e", "i", "o", "u"