From 1370fda2e11035cb91844698dd463ea972cd1911 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=92=E7=8C=AB=E5=A4=A7=E7=A6=8F?= <93469977+rokujyushi@users.noreply.github.com> Date: Sat, 21 Sep 2024 01:27:00 +0900 Subject: [PATCH] Tempo change support --- OpenUtau.Core/Voicevox/VoicevoxRenderer.cs | 28 ++++++------- OpenUtau.Core/Voicevox/VoicevoxUtils.cs | 46 +++++++++++----------- 2 files changed, 36 insertions(+), 38 deletions(-) diff --git a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs index dc92c9232..18628871f 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs @@ -71,15 +71,14 @@ public Task Render(RenderPhrase phrase, Progress progress, int tra } try { Log.Information($"Starting Voicevox synthesis"); - VoicevoxNote vvNotes = new VoicevoxNote(); + VoicevoxNotes vvNotes = new VoicevoxNotes(); if (!singer.voicevoxConfig.Tag.Equals("VOICEVOX JA")) { - Note[][] notes = new Note[phrase.phones.Length][]; + VoicevoxNote[] notes = new VoicevoxNote[phrase.phones.Length]; for (int i = 0; i < phrase.phones.Length; i++) { - notes[i] = new Note[1]; - notes[i][0] = new Note() { + notes[i] = new VoicevoxNote() { lyric = phrase.phones[i].phoneme, - position = phrase.phones[i].position, - duration = phrase.phones[i].duration, + positionMs = phrase.phones[i].positionMs, + durationMs = phrase.phones[i].durationMs, tone = (int)(phrase.phones[i].tone + phrase.phones[0].toneShift) }; } @@ -178,8 +177,8 @@ public Task Render(RenderPhrase phrase, Progress progress, int tra } //Synthesize with parameters of phoneme, F0, and volume. Under development - static VoicevoxNote PhraseToVoicevoxNotes(RenderPhrase phrase) { - VoicevoxNote notes = new VoicevoxNote(); + static VoicevoxNotes PhraseToVoicevoxNotes(RenderPhrase phrase) { + VoicevoxNotes notes = new VoicevoxNotes(); int headFrames = (int)(VoicevoxUtils.headS * VoicevoxUtils.fps); int tailFrames = (int)(VoicevoxUtils.tailS * VoicevoxUtils.fps); @@ -258,22 +257,21 @@ RenderPitchResult IRenderer.LoadRenderedPitch(RenderPhrase phrase) { var singer = phrase.singer as VoicevoxSinger; if (singer != null) { string singerID = VoicevoxUtils.defaultID; - Note[][] notes = new Note[phrase.phones.Length][]; + VoicevoxNote[] notes = new VoicevoxNote[phrase.phones.Length]; for (int i = 0; i < phrase.phones.Length; i++) { - notes[i] = new Note[1]; - notes[i][0] = new Note() { + notes[i] = new VoicevoxNote() { lyric = phrase.phones[i].phoneme, - position = phrase.phones[i].position, - duration = phrase.phones[i].duration, - tone = phrase.phones[i].tone + phrase.phones[0].toneShift + positionMs = phrase.phones[i].positionMs, + durationMs = phrase.phones[i].durationMs, + tone = (int)(phrase.phones[i].tone + phrase.phones[0].toneShift) }; } var qNotes = VoicevoxUtils.NoteGroupsToVoicevox(notes, phrase.timeAxis); string baseSingerID = VoicevoxUtils.getBaseSingerID(singer); - VoicevoxNote vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, singerID); + VoicevoxNotes vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, singerID); int vvTotalFrames = 0 - vvNotes.phonemes[0].frame_length; vvNotes.phonemes.ForEach(x => vvTotalFrames += x.frame_length); var f0 = new double[vvTotalFrames]; diff --git a/OpenUtau.Core/Voicevox/VoicevoxUtils.cs b/OpenUtau.Core/Voicevox/VoicevoxUtils.cs index ea04d0f0a..a2c3195d2 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxUtils.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxUtils.cs @@ -14,12 +14,20 @@ */ namespace OpenUtau.Core.Voicevox { + + public struct VoicevoxNote { + public string lyric; + public double positionMs; + public double durationMs; + public int tone; + } + public class Phonemes { public string phoneme; public int frame_length; } - public class VoicevoxNote { + public class VoicevoxNotes { public List f0 = new List(); public List volume = new List(); public List phonemes = new List(); @@ -97,14 +105,14 @@ public void Loaddic(string location) { Log.Error($"Failed to read dictionary file. : {e}"); } } - public string Notetodic(Note[][] notes, int index) { - if (dict.TryGetValue(notes[index][0].lyric, out var lyric_)) { + public string Notetodic(VoicevoxNote[] notes, int index) { + if (dict.TryGetValue(notes[index].lyric, out var lyric_)) { if (string.IsNullOrEmpty(lyric_)) { return ""; } return lyric_; } - return notes[index][0].lyric; + return notes[index].lyric; } public string Lyrictodic(string lyric) { @@ -132,28 +140,28 @@ public static class VoicevoxUtils { public static Dictionary_list dic = new Dictionary_list(); public static Phoneme_list phoneme_List = new Phoneme_list(); - public static VoicevoxNote VoicevoxVoiceBase(VoicevoxQueryMain qNotes, string id) { + public static VoicevoxNotes VoicevoxVoiceBase(VoicevoxQueryMain qNotes, string id) { var queryurl = new VoicevoxURL() { method = "POST", path = "/sing_frame_audio_query", query = new Dictionary { { "speaker", id } }, body = JsonConvert.SerializeObject(qNotes) }; var response = VoicevoxClient.Inst.SendRequest(queryurl); - VoicevoxNote configs; + VoicevoxNotes vvNotes; var jObj = JObject.Parse(response.Item1); if (jObj.ContainsKey("detail")) { Log.Error($"Response was incorrect. : {jObj}"); } else { - configs = jObj.ToObject(); - return configs; + vvNotes = jObj.ToObject(); + return vvNotes; } - return new VoicevoxNote(); + return new VoicevoxNotes(); } public static void Loaddic(VoicevoxSinger singer) { dic.Loaddic(singer.Location); } - public static VoicevoxQueryMain NoteGroupsToVoicevox(Note[][] notes, TimeAxis timeAxis) { + public static VoicevoxQueryMain NoteGroupsToVoicevox(VoicevoxNote[] notes, TimeAxis timeAxis) { VoicevoxQueryMain qnotes = new VoicevoxQueryMain(); int index = 0; - int duration = 0; + double durationMs = 0; try { qnotes.notes.Add(new VoicevoxQueryNotes() { lyric = "", @@ -161,25 +169,17 @@ public static VoicevoxQueryMain NoteGroupsToVoicevox(Note[][] notes, TimeAxis ti key = null, vqnindex = -1 }); - duration = notes[index][0].position + notes[index][0].duration; + durationMs = notes[index].positionMs + notes[index].durationMs; while (index < notes.Length) { string lyric = dic.Notetodic(notes, index); - int length = (int)Math.Round(((timeAxis.TickPosToMsPos(notes[index].Sum(n => n.duration)) / 1000f) * VoicevoxUtils.fps), MidpointRounding.AwayFromZero); + int length = (int)Math.Round((notes[index].durationMs / 1000f) * VoicevoxUtils.fps, MidpointRounding.AwayFromZero); //Avoid synthesis without at least two frames. if (length < 2) { length = 2; } int? tone = null; if (!string.IsNullOrEmpty(lyric)) { - if (notes[index][0].phonemeAttributes != null) { - if (notes[index][0].phonemeAttributes.Length > 0) { - tone = notes[index][0].tone + notes[index][0].phonemeAttributes[0].toneShift; - } else { - tone = notes[index][0].tone; - } - } else { - tone = notes[index][0].tone; - } + tone = notes[index].tone; } else { lyric = ""; } @@ -189,7 +189,7 @@ public static VoicevoxQueryMain NoteGroupsToVoicevox(Note[][] notes, TimeAxis ti key = tone, vqnindex = index }); - duration += notes[index][0].duration; + durationMs += notes[index].durationMs; index++; } qnotes.notes.Add(new VoicevoxQueryNotes {