diff --git a/OpenUtau.Core/Voicevox/Phonemizers/SimpleVoicevoxPhonemizer.cs b/OpenUtau.Core/Voicevox/Phonemizers/SimpleVoicevoxPhonemizer.cs index e92101933..dffbabe70 100644 --- a/OpenUtau.Core/Voicevox/Phonemizers/SimpleVoicevoxPhonemizer.cs +++ b/OpenUtau.Core/Voicevox/Phonemizers/SimpleVoicevoxPhonemizer.cs @@ -7,7 +7,7 @@ namespace Voicevox { [Phonemizer("Simple Voicevox Japanese Phonemizer", "S-VOICEVOX JA", language: "JA")] public class SimpleVoicevoxPhonemizer : Phonemizer { - protected VoicevoxSinger singer; + protected VoicevoxSinger singer; public override void SetSinger(USinger singer) { this.singer = singer as VoicevoxSinger; @@ -18,7 +18,7 @@ public override void SetSinger(USinger singer) { } protected bool IsSyllableVowelExtensionNote(Note note) { - return note.lyric.StartsWith("+~") || note.lyric.StartsWith("+*"); + return note.lyric.StartsWith("+~") || note.lyric.StartsWith("+*") || note.lyric.StartsWith("+") || note.lyric.StartsWith("-"); } public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevNeighbour, Note? nextNeighbour, Note[] prevNeighbours) { @@ -32,15 +32,17 @@ public override Result Process(Note[] notes, Note? prev, Note? next, Note? prevN notes[i].lyric = lyricList[1]; } if (!IsSyllableVowelExtensionNote(notes[i])) { - if (VoicevoxUtils.IsHiraKana(notes[i].lyric)) { - phonemes.Add(new Phoneme { phoneme = notes[i].lyric }); - } else if (VoicevoxUtils.IsPau(notes[i].lyric)) { - phonemes.Add(new Phoneme { phoneme = "R" }); - } else if (VoicevoxUtils.dic.IsDic(notes[i].lyric)) { - phonemes.Add(new Phoneme { phoneme = VoicevoxUtils.dic.Lyrictodic(notes[i].lyric) }); - } else { - phonemes.Add(new Phoneme { phoneme = "error"}); + string val = "error"; + if (VoicevoxUtils.phoneme_List.kanas.ContainsKey(notes[i].lyric) || VoicevoxUtils.phoneme_List.paus.ContainsKey(notes[i].lyric)) { + if (VoicevoxUtils.phoneme_List.paus.TryGetValue(notes[i].lyric, out string str)) { + val = str; + } else if (VoicevoxUtils.dic.IsDic(notes[i].lyric)) { + val = VoicevoxUtils.dic.Lyrictodic(notes[i].lyric); + } else { + val = notes[i].lyric; + } } + phonemes.Add(new Phoneme { phoneme = val }); } } return new Result { phonemes = phonemes.ToArray() }; diff --git a/OpenUtau.Core/Voicevox/VoicevoxConfig.cs b/OpenUtau.Core/Voicevox/VoicevoxConfig.cs index 5e2c9ccb2..8af34d48c 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxConfig.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxConfig.cs @@ -167,61 +167,6 @@ public void SaveLicenses(string location) { } } - public class Phoneme_list { - public string[] vowels; - public string[] consonants; - public string[] kana; - } - - public class Dictionary_list { - public Dictionary dict = new Dictionary(); - - public void Loaddic(string location) { - try { - var parentDirectory = Directory.GetParent(location).ToString(); - var yamlPath = Path.Join(parentDirectory, "dictionary.yaml"); - if (File.Exists(yamlPath)) { - var yamlTxt = File.ReadAllText(yamlPath); - var yamlObj = Yaml.DefaultDeserializer.Deserialize>>>(yamlTxt); - var list = yamlObj["list"]; - dict = new Dictionary(); - - foreach (var item in list) { - foreach (var pair in item) { - dict[pair.Key] = pair.Value; - } - } - - } - } catch (Exception e) { - Log.Error($"Failed to read dictionary file. : {e}"); - } - } - public string Notetodic(Note[][] notes, int index) { - if (dict.TryGetValue(notes[index][0].lyric, out var lyric_)) { - if (string.IsNullOrEmpty(lyric_)) { - return ""; - } - return lyric_; - } - return notes[index][0].lyric; - } - - public string Lyrictodic(string lyric) { - if (dict.TryGetValue(lyric, out var lyric_)) { - if (string.IsNullOrEmpty(lyric_)) { - return ""; - } - return lyric_; - } - return lyric; - } - - public bool IsDic(string lyric) { - return dict.ContainsKey(lyric); - } - } - public class Style_infos { public int id; public string icon = string.Empty; diff --git a/OpenUtau.Core/Voicevox/VoicevoxSinger.cs b/OpenUtau.Core/Voicevox/VoicevoxSinger.cs index 3250d84da..c83441a9b 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxSinger.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxSinger.cs @@ -81,19 +81,18 @@ void Load() { table.Clear(); otos.Clear(); try { - var parentDirectory = Directory.GetParent(this.Location).ToString(); - var yamlPath = Path.Join(parentDirectory, "phonemes.yaml"); - var yamlTxt = File.ReadAllText(yamlPath); - var phonemes_list = Yaml.DefaultDeserializer.Deserialize(yamlTxt); //Prepared for planned changes or additions to phonemizers. - foreach (var str in phonemes_list.vowels) { - phonemes.Add(str); + //foreach (var str in VoicevoxUtils.phoneme_List.vowels) { + // phonemes.Add(str); + //} + //foreach (var str in VoicevoxUtils.phoneme_List.consonants) { + // phonemes.Add(str); + //} + foreach (var str in VoicevoxUtils.phoneme_List.kanas) { + phonemes.Add(str.Key); } - foreach (var str in phonemes_list.consonants) { - phonemes.Add(str); - } - foreach (var str in phonemes_list.kana) { - phonemes.Add(str); + foreach (var str in VoicevoxUtils.phoneme_List.paus) { + phonemes.Add(str.Key); } } catch (Exception e) { Log.Error(e, $"Failed to load phonemes.yaml for {Name}"); diff --git a/OpenUtau.Core/Voicevox/VoicevoxUtils.cs b/OpenUtau.Core/Voicevox/VoicevoxUtils.cs index 5f202119f..f00d9b79e 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxUtils.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxUtils.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.IO; using System.Linq; using Newtonsoft.Json; using Newtonsoft.Json.Linq; @@ -39,15 +40,97 @@ public class VoicevoxQueryNotes { public class VoicevoxQueryMain { public List notes = new List(); } + public class Phoneme_list { + public string[] vowels = "a i u e o A I U E O N pau cl".Split(); + public string[] consonants = "b by ch d dy f g gw gy h hy j k kw ky m my n ny p py r ry s sh t ts ty v w y z".Split(); + public Dictionary kanas = new Dictionary(); + public Dictionary paus = new Dictionary(); + public Phoneme_list() { + var kanaGroups = new List { + "あ ば びゃ ちゃ だ でゃ ふぁ が ぐゎ ぎゃ は ひゃ じゃ か くゎ きゃ ま みゃ な にゃ ぱ ぴゃ ら りゃ さ しゃ た つぁ てゃ ゔぁ わ や ざ".Split(), + "い び ち ぢ でぃ ふぃ ぎ ひ じ き み に ぴ り すぃ し てぃ つぃ ゔぃ うぃ ずぃ".Split(), + "う ぶ びゅ ちゅ どぅ でゅ ふ ぐ ぎゅ ひゅ じゅ く きゅ む みゅ ぬ にゅ ぷ ぴゅ る りゅ す しゅ つ つ てゅ ゔ ゆ ず".Split(), + "え べ びぇ ちぇ で でぇ ふぇ げ ぎぇ へ ひぇ じぇ け きぇ め みぇ ね にぇ ぺ ぴぇ れ りぇ せ しぇ て つぇ ゔぇ うぇ いぇ ぜ".Split(), + "お ぼ びょ ちょ ど でょ ふぉ ご ぎょ ほ ひょ じょ こ きょ も みょ の にょ ぽ ぴょ ろ りょ そ しょ と つぉ てょ ゔぉ を よ ぞ".Split(), + "ん ン".Split(), + "っ ッ".Split() + }; + + foreach (var group in kanaGroups) { + foreach (var kana in group) { + if (!kanas.ContainsKey(kana)) { + kanas.Add(kana.Normalize(), group[0].Normalize()); + } + } + } + string[] pauseGroups = "R pau AP SP".Split(); + + foreach (string group in pauseGroups) { + if (!paus.ContainsKey(group)) { + paus.Add(group.Normalize(), pauseGroups[0].Normalize()); + } + } + } + } + + public class Dictionary_list { + public Dictionary dict = new Dictionary(); + + public void Loaddic(string location) { + try { + var parentDirectory = Directory.GetParent(location).ToString(); + var yamlPath = Path.Join(parentDirectory, "dictionary.yaml"); + if (File.Exists(yamlPath)) { + var yamlTxt = File.ReadAllText(yamlPath); + var yamlObj = Yaml.DefaultDeserializer.Deserialize>>>(yamlTxt); + var list = yamlObj["list"]; + dict = new Dictionary(); + + foreach (var item in list) { + foreach (var pair in item) { + dict[pair.Key] = pair.Value; + } + } + + } + } catch (Exception e) { + Log.Error($"Failed to read dictionary file. : {e}"); + } + } + public string Notetodic(Note[][] notes, int index) { + if (dict.TryGetValue(notes[index][0].lyric, out var lyric_)) { + if (string.IsNullOrEmpty(lyric_)) { + return ""; + } + return lyric_; + } + return notes[index][0].lyric; + } + + public string Lyrictodic(string lyric) { + if (dict.TryGetValue(lyric, out var lyric_)) { + if (string.IsNullOrEmpty(lyric_)) { + return ""; + } + return lyric_; + } + return lyric; + } + + public bool IsDic(string lyric) { + return dict.ContainsKey(lyric); + } + } - internal static class VoicevoxUtils { + public static class VoicevoxUtils { public const string VOLC = "volc"; public const int headS = 1; public const int tailS = 1; public const double fps = 93.75; public const string defaultID = "6000"; public static Dictionary_list dic = new Dictionary_list(); + public static Phoneme_list phoneme_List = new Phoneme_list(); public static VoicevoxNote VoicevoxVoiceBase(VoicevoxQueryMain qNotes, string id) { var queryurl = new VoicevoxURL() { method = "POST", path = "/sing_frame_audio_query", query = new Dictionary { { "speaker", id } }, body = JsonConvert.SerializeObject(qNotes) }; @@ -83,11 +166,11 @@ public static VoicevoxQueryMain NoteGroupsToVoicevox(Note[][] notes, TimeAxis ti string lyric = dic.Notetodic(notes, index); int length = (int)Math.Round(((timeAxis.TickPosToMsPos(notes[index].Sum(n => n.duration)) / 1000f) * VoicevoxUtils.fps), MidpointRounding.AwayFromZero); //Avoid synthesis without at least two frames. - if (length < 2 ) { + if (length < 2) { length = 2; } int? tone = null; - if (!string.IsNullOrEmpty(lyric) || VoicevoxUtils.IsPau(lyric)) { + if (!string.IsNullOrEmpty(lyric)) { if (notes[index][0].phonemeAttributes != null) { if (notes[index][0].phonemeAttributes.Length > 0) { tone = notes[index][0].tone + notes[index][0].phonemeAttributes[0].toneShift; @@ -97,6 +180,8 @@ public static VoicevoxQueryMain NoteGroupsToVoicevox(Note[][] notes, TimeAxis ti } else { tone = notes[index][0].tone; } + } else { + lyric = ""; } qnotes.notes.Add(new VoicevoxQueryNotes { lyric = lyric, @@ -146,21 +231,13 @@ public static double[] SampleCurve(RenderPhrase phrase, float[] curve, double de return result; } - - public static bool IsHiraKana(string s) { - foreach(char c in s.ToCharArray()) { - if (!('\u3041' <= c && c <= '\u309F') || ('\u30A0' <= c && c <= '\u30FF') || c == '\u30FC' || c == '\u30A0') { - return false; - } - } - return true; + public static bool IsPau(string s) { + return phoneme_List.paus.ContainsKey(s); } - public static bool IsPau(string s) { - if (s.EndsWith("R") || s.ToLower().EndsWith("pau") || s.EndsWith("AP") || s.EndsWith("SP")) { - return true; - } - return false; + public static bool TryGetPau(string s, out string str) { + phoneme_List.paus.TryGetValue(s, out str); + return phoneme_List.paus.ContainsKey(s); } public static string getBaseSingerID(VoicevoxSinger singer) { diff --git a/OpenUtau.Plugin.Builtin/SimpleVoicevoxENtoJAPhonemizer.cs b/OpenUtau.Plugin.Builtin/SimpleVoicevoxENtoJAPhonemizer.cs new file mode 100644 index 000000000..b2162f266 --- /dev/null +++ b/OpenUtau.Plugin.Builtin/SimpleVoicevoxENtoJAPhonemizer.cs @@ -0,0 +1,366 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using OpenUtau.Api; +using OpenUtau.Core.G2p; +using OpenUtau.Core.Ustx; +using OpenUtau.Plugin.Builtin; +using WanaKanaNet; + +namespace OpenUtau.Core.Voicevox { + [Phonemizer("Simple Voicevox ENtoJA Phonemizer", "S-VOICEVOX EN to JA", "TUBS & ROKU10SHI", language: "EN")] + public class SimpleVoicevoxENtoJAPhonemizer : SyllableBasedPhonemizer { + + protected VoicevoxSinger singer; + + public override void SetSinger(USinger singer) { + base.SetSinger(singer); + this.singer = singer as VoicevoxSinger; + if (this.singer != null) { + this.singer.voicevoxConfig.Tag = this.Tag; + VoicevoxUtils.Loaddic(this.singer); + } + } + protected override string[] GetVowels() => vowels; + private static readonly string[] vowels = + "a i u e o ay ey oy ow aw".Split(); + protected override string[] GetConsonants() => consonants; + private static readonly string[] consonants = + "b by ch d dh f g gy h hy j k ky l ly m my n ny ng p py r ry s sh t ts th v w y z zh".Split(); + protected override string GetDictionaryName() => "cmudict-0_7b.txt"; + protected override Dictionary GetDictionaryPhonemesReplacement() => dictionaryPhonemesReplacement; + private static readonly Dictionary dictionaryPhonemesReplacement = new Dictionary { + { "aa", "a" }, + { "ae", "e" }, + { "ah", "a" }, + { "ao", "o" }, + { "aw", "aw" }, + { "ay", "ay" }, + { "b", "b" }, + { "ch", "ch" }, + { "d", "d" }, + { "dh", "dh" }, + { "eh", "e" }, + { "er", "o" }, + { "ey", "ey" }, + { "f", "f" }, + { "g", "g" }, + { "hh", "h" }, + { "ih", "e" }, + { "iy", "i" }, + { "jh", "j" }, + { "k", "k" }, + { "l", "l" }, + { "m", "m" }, + { "n", "n" }, + { "ng", "ng" }, + { "ow", "ow" }, + { "oy", "oy" }, + { "p", "p" }, + { "r", "r" }, + { "s", "s" }, + { "sh", "sh" }, + { "t", "t" }, + { "th", "th" }, + { "uh", "o" }, + { "uw", "u" }, + { "v", "v" }, + { "w", "w" }, + { "y", "y" }, + { "z", "z" }, + { "zh", "zh" }, + }; + + protected override IG2p LoadBaseDictionary() => new ArpabetG2p(); + + private Dictionary StartingConsonant => startingConsonant; + private static readonly Dictionary startingConsonant = new Dictionary { + { "", "" }, + { "b", "b" }, + { "by", "by" }, + { "ch", "ch" }, + { "d", "d" }, + { "dh", "d" }, + { "f", "f" }, + { "g", "g" }, + { "gy", "gy" }, + { "h", "h" }, + { "hy", "hy" }, + { "j", "j" }, + { "k", "k" }, + { "ky", "ky" }, + { "l", "r" }, + { "ly", "ry" }, + { "m", "m" }, + { "my", "my" }, + { "n", "n" }, + { "ny", "ny" }, + { "ng", "n" }, + { "p", "p" }, + { "py", "py" }, + { "r", "rr" }, + { "ry", "ry" }, + { "s", "s" }, + { "sh", "sh" }, + { "t", "t" }, + { "ts", "ts" }, + { "th", "s" }, + { "v", "v" }, + { "w", "w" }, + { "y", "y" }, + { "z", "z" }, + { "zh", "sh" }, + }; + + private Dictionary SoloConsonant => soloConsonant; + private static readonly Dictionary soloConsonant = new Dictionary { + { "b", "ぶ" }, + { "by", "び" }, + { "ch", "ちゅ" }, + { "d", "ど" }, + { "dh", "ず" }, + { "f", "ふ" }, + { "g", "ぐ" }, + { "gy", "ぎ" }, + { "h", "ほ" }, + { "hy", "ひ" }, + { "j", "じゅ" }, + { "k", "く" }, + { "ky", "き" }, + { "l", "う" }, + { "ly", "り" }, + { "m", "む" }, + { "my", "み" }, + { "n", "ん" }, + { "ny", "に" }, + { "ng", "ん" }, + { "p", "ぷ" }, + { "py", "ぴ" }, + { "r", "う" }, + { "ry", "り" }, + { "s", "す" }, + { "sh", "しゅ" }, + { "t", "と" }, + { "ts", "つ" }, + { "th", "す" }, + { "v", "ヴ" }, + { "w", "う" }, + { "y", "い" }, + { "z", "ず" }, + { "zh", "しゅ" }, + }; + + private string[] SpecialClusters = "ky gy ts ny hy by py my ry ly".Split(); + + private Dictionary AltCv => altCv; + private static readonly Dictionary altCv = new Dictionary { + {"si", "suli" }, + {"zi", "zuli" }, + {"ti", "teli" }, + {"tu", "tolu" }, + {"di", "deli" }, + {"du", "dolu" }, + {"hu", "holu" }, + {"yi", "i" }, + {"wu", "u" }, + {"wo", "ulo" }, + {"rra", "wa" }, + {"rri", "wi" }, + {"rru", "ru" }, + {"rre", "we" }, + {"rro", "ulo" }, + }; + + private Dictionary ConditionalAlt => conditionalAlt; + private static readonly Dictionary conditionalAlt = new Dictionary { + {"ulo", "wo"}, + {"va", "fa"}, + {"vi", "fi"}, + {"vu", "fu"}, + {"ヴ", "ふ"}, + {"ve", "fe"}, + {"vo", "fo"}, + }; + + private Dictionary ExtraCv => extraCv; + private static readonly Dictionary extraCv = new Dictionary { + {"kye", new [] { "ki", "e" } }, + {"gye", new [] { "gi", "e" } }, + {"suli", new [] { "se", "i" } }, + {"she", new [] { "si", "e" } }, + {"zuli", new [] { "ze", "i" } }, + {"je", new [] { "ji", "e" } }, + {"teli", new [] { "te", "i" } }, + {"tolu", new [] { "to", "u" } }, + {"che", new [] { "chi", "e" } }, + {"tsa", new [] { "tsu", "a" } }, + {"tsi", new [] { "tsu", "i" } }, + {"tse", new [] { "tsu", "e" } }, + {"tso", new [] { "tsu", "o" } }, + {"deli", new [] { "de", "i" } }, + {"dolu", new [] { "do", "u" } }, + {"nye", new [] { "ni", "e" } }, + {"hye", new [] { "hi", "e" } }, + {"holu", new [] { "ho", "u" } }, + {"fa", new [] { "fu", "a" } }, + {"fi", new [] { "fu", "i" } }, + {"fe", new [] { "fu", "e" } }, + {"fo", new [] { "fu", "o" } }, + {"bye", new [] { "bi", "e" } }, + {"pye", new [] { "pi", "e" } }, + {"mye", new [] { "mi", "e" } }, + {"ye", new [] { "i", "e" } }, + {"rye", new [] { "ri", "e" } }, + {"wi", new [] { "u", "i" } }, + {"we", new [] { "u", "e" } }, + {"ulo", new [] { "u", "o" } }, + }; + + private string[] affricates = "ts ch j".Split(); + + protected override string[] GetSymbols(Note note) { + List modified = new List(); + if (VoicevoxUtils.phoneme_List.paus.TryGetValue(note.lyric,out string str)) { + modified.Add(str); + } else { + string[] original = base.GetSymbols(note); + if (original == null) { + return null; + } + string[] diphthongs = new[] { "ay", "ey", "oy", "ow", "aw" }; + foreach (string s in original) { + if (diphthongs.Contains(s)) { + modified.AddRange(new string[] { s[0].ToString(), s[1].ToString() }); + } else { + modified.Add(s); + } + } + } + return modified.ToArray(); + } + + protected override List ProcessSyllable(Syllable syllable) { + // Skip processing if this note extends the prevous syllable + if (CanMakeAliasExtension(syllable)) { + return new List { null }; + } + + var cc = syllable.cc; + var v = syllable.v; + var phonemes = new List(); + if (VoicevoxUtils.phoneme_List.paus.TryGetValue(v, out string str)) { + phonemes.Add(str); + return phonemes; + } + + // Check CCs for special clusters + var adjustedCC = new List(); + for (var i = 0; i < cc.Length; i++) { + if (i == cc.Length - 1) { + adjustedCC.Add(cc[i]); + } else { + if (cc[i] == cc[i + 1]) { + adjustedCC.Add(cc[i]); + i++; + continue; + } + var diphone = $"{cc[i]}{cc[i + 1]}"; + if (SpecialClusters.Contains(diphone)) { + adjustedCC.Add(diphone); + i++; + } else { + adjustedCC.Add(cc[i]); + } + } + } + cc = adjustedCC.ToArray(); + + // Separate CCs and main CV + var finalCons = ""; + if (cc.Length > 0) { + finalCons = cc[cc.Length - 1]; + + var start = 0; + + for (var i = start; i < cc.Length - 1; i++) { + var cons = SoloConsonant[cc[i]]; + if (HasOto(cons, syllable.tone)) { + phonemes.Add(cons); + } + } + } + + // Convert to hiragana + var cv = $"{StartingConsonant[finalCons]}{v}"; + cv = AltCv.ContainsKey(cv) ? AltCv[cv] : cv; + var hiragana = ToHiragana(cv); + + // Check for nonstandard CV + var split = false; + if (HasOto(hiragana, syllable.vowelTone)) { + phonemes.Add(hiragana); + } else { + split = true; + } + // Handle nonstandard CV + if (split && ExtraCv.ContainsKey(cv)) { + var splitCv = ExtraCv[cv]; + for (var i = 0; i < splitCv.Length; i++) { + var converted = ToHiragana(splitCv[i]); + phonemes.Add(converted); + } + } + + return phonemes; + } + + protected override List ProcessEnding(Ending ending) { + var cc = ending.cc; + var phonemes = new List(); + + // Check CCs for special clusters + var adjustedCC = new List(); + for (var i = 0; i < cc.Length; i++) { + if (i == cc.Length - 1) { + adjustedCC.Add(cc[i]); + } else { + if (cc[i] == cc[i + 1]) { + adjustedCC.Add(cc[i]); + i++; + continue; + } + var diphone = $"{cc[i]}{cc[i + 1]}"; + if (SpecialClusters.Contains(diphone)) { + adjustedCC.Add(diphone); + i++; + } else { + adjustedCC.Add(cc[i]); + } + } + } + cc = adjustedCC.ToArray(); + + // Convert to hiragana + for (var i = 0; i < cc.Length; i++) { + var symbol = cc[i]; + + var solo = SoloConsonant[symbol]; + + if (HasOto(solo, ending.tone)) { + phonemes.Add(solo); + } else if (ConditionalAlt.ContainsKey(solo)) { + solo = ConditionalAlt[solo]; + phonemes.Add(solo); + } + } + + return phonemes; + } + + private string ToHiragana(string romaji) { + var hiragana = WanaKana.ToHiragana(romaji); + hiragana = hiragana.Replace("ゔ", "ヴ"); + return hiragana; + } + } +}