From ef6166169ce9b4a313e693274b191f69a757483c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=92=E7=8C=AB=E5=A4=A7=E7=A6=8F?= <93469977+rokujyushi@users.noreply.github.com> Date: Thu, 15 Aug 2024 01:15:03 +0900 Subject: [PATCH 1/3] Volume operation F0 read support --- OpenUtau.Core/Voicevox/VoicevoxRenderer.cs | 81 +++++++++++++--------- OpenUtau.Core/Voicevox/VoicevoxUtils.cs | 6 +- 2 files changed, 50 insertions(+), 37 deletions(-) diff --git a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs index 487a7029b..7abef7ab2 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs @@ -26,10 +26,10 @@ public class VoicevoxRenderer : IRenderer { static readonly HashSet supportedExp = new HashSet(){ Format.Ustx.DYN, - //PITD, + PITD, Format.Ustx.CLR, Format.Ustx.VOL, - //VOLC, + VOLC, //Format.Ustx.SHFC, Format.Ustx.SHFT }; @@ -91,18 +91,29 @@ public Task Render(RenderPhrase phrase, Progress progress, int tra } int vvTotalFrames = 0; + double frameMs = (1000d / VoicevoxUtils.fps); vvNotes.phonemes.ForEach(x => vvTotalFrames += x.frame_length); - if (!phrase.phones[0].direct) { - double frameMs = (1000d / VoicevoxUtils.fps); - vvNotes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, vvNotes.phonemes[0].frame_length, vvNotes.phonemes[^1].frame_length, x => MusicMath.ToneToFreq(x * 0.01)).ToList(); + vvNotes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, vvNotes.phonemes[0].frame_length, vvNotes.phonemes[^1].frame_length, 0, x => MusicMath.ToneToFreq(x * 0.01)).ToList(); } else { //Compatible with toneShift (key shift), for adjusting the range of tones when synthesizing vvNotes.f0 = vvNotes.f0.Select(f0 => f0 = f0 * Math.Pow(2, ((phrase.phones[0].toneShift * -1) / 12d))).ToList(); } //Volume parameter for synthesis. Scheduled to be revised - vvNotes.volume = vvNotes.volume.Select(vol => vol = vol * phrase.phones[0].volume).ToList(); + var volumeCurve = phrase.curves.FirstOrDefault(c => c.Item1 == VOLC); + if (volumeCurve != null) { + var volumes = VoicevoxUtils.SampleCurve(phrase, volumeCurve.Item2, 0, frameMs, vvTotalFrames, vvNotes.phonemes[0].frame_length, vvNotes.phonemes[^1].frame_length, -10, x => x * 0.01); + vvNotes.volume = vvNotes.volume.Select((vol, i) => vol = vol * volumes[i]).ToList(); + } else { + vvNotes.volume = vvNotes.volume.Select(vol => vol = vol * phrase.phones[0].volume).ToList(); + } + for (int i = 0; i < vvNotes.phonemes[0].frame_length; i++) { + vvNotes.volume[i] = 0; + } + for (int i = vvNotes.volume.Count - vvNotes.phonemes[vvNotes.phonemes.Count - 1].frame_length; i < vvNotes.volume.Count; i++) { + vvNotes.volume[i] = 0; + } } else { vvNotes = PhraseToVoicevoxNotes(phrase); } @@ -188,16 +199,13 @@ static VoicevoxNote PhraseToVoicevoxNotes(RenderPhrase phrase) { int vvTotalFrames = -(headFrames + tailFrames); notes.phonemes.ForEach(x => vvTotalFrames += x.frame_length); - double frameMs = 1 / 1000d * VoicevoxUtils.fps; + double frameMs = (1000d / VoicevoxUtils.fps); int totalFrames = (int)(vvTotalFrames / VoicevoxUtils.fps * 1000d); int frameRatio = vvTotalFrames / totalFrames; const int pitchInterval = 5; - //var curve = phrase.pitches.SelectMany(item => Enumerable.Repeat(item, 5)).ToArray(); - notes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, 0, 0, x => MusicMath.ToneToFreq(x * 0.01)).ToList(); - //notes.f0 = f0.Where((x, i) => i % frameRatio == 0).ToList(); - float[] f0Shifted = notes.f0.Select(f => (float)f).ToArray(); + notes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, notes.phonemes[0].frame_length, notes.phonemes[^1].frame_length, -(VoicevoxUtils.headS + 10), x => MusicMath.ToneToFreq(x * 0.01)).ToList(); float[] f0Shifted = notes.f0.Select(f => (float)f).ToArray(); if (phrase.toneShift != null) { for (int i = 0; i < notes.f0.Count; i++) { double posMs = phrase.positionMs - phrase.leadingMs + i * frameMs; @@ -212,8 +220,7 @@ static VoicevoxNote PhraseToVoicevoxNotes(RenderPhrase phrase) { var volumeCurve = phrase.curves.FirstOrDefault(c => c.Item1 == VOLC); if (volumeCurve != null) { - notes.volume = VoicevoxUtils.SampleCurve(phrase, volumeCurve.Item2, 0, frameMs, vvTotalFrames, 0, 0, x => MusicMath.DecibelToLinear(x)).ToList(); - //notes.volume = volume.Where((x, i) => i % frameRatio == 0).ToList(); + notes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, notes.phonemes[0].frame_length, notes.phonemes[^1].frame_length, -(VoicevoxUtils.headS + 10), x => MusicMath.ToneToFreq(x * 0.01)).ToList(); } else { notes.volume = Enumerable.Repeat(1d, vvTotalFrames).ToList(); } @@ -228,19 +235,19 @@ static VoicevoxNote PhraseToVoicevoxNotes(RenderPhrase phrase) { public UExpressionDescriptor[] GetSuggestedExpressions(USinger singer, URenderSettings renderSettings) { return new UExpressionDescriptor[] { }; //under development - //var result = new List { - // new UExpressionDescriptor{ - // name="volume (curve)", - // abbr=VOLC, - // type=UExpressionType.Curve, - // min=-20, - // max=20, - // defaultValue=0, - // isFlag=false, - // }, - //}; - - //return result.ToArray(); + var result = new List { + new UExpressionDescriptor{ + name="volume (curve)", + abbr=VOLC, + type=UExpressionType.Curve, + min=0, + max=200, + defaultValue=100, + isFlag=false, + }, + }; + + return result.ToArray(); } public override string ToString() => Renderers.VOICEVOX; @@ -250,15 +257,15 @@ RenderPitchResult IRenderer.LoadRenderedPitch(RenderPhrase phrase) { var singer = phrase.singer as VoicevoxSinger; if (singer != null) { string singerID = VoicevoxUtils.defaultID; - Note[][] notes = new Note[phrase.notes.Length][]; + Note[][] notes = new Note[phrase.phones.Length][]; - for (int i = 0; i < phrase.notes.Length; i++) { + for (int i = 0; i < phrase.phones.Length; i++) { notes[i] = new Note[1]; notes[i][0] = new Note() { - lyric = phrase.notes[i].lyric, - position = phrase.notes[i].position, - duration = phrase.notes[i].duration, - tone = phrase.notes[i].tone + phrase.phones[0].toneShift + lyric = phrase.phones[i].phoneme, + position = phrase.phones[i].position, + duration = phrase.phones[i].duration, + tone = phrase.phones[i].tone + phrase.phones[0].toneShift }; } @@ -266,8 +273,14 @@ RenderPitchResult IRenderer.LoadRenderedPitch(RenderPhrase phrase) { string baseSingerID = VoicevoxUtils.getBaseSingerID(singer); VoicevoxNote vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, singerID); - var result = new RenderPitchResult { tones = vvNotes.f0.Select(f => (float)MusicMath.FreqToTone(f)).ToArray() }; - result.ticks = new float[result.tones.Length]; + int vvTotalFrames = 0 - vvNotes.phonemes[0].frame_length; + vvNotes.phonemes.ForEach(x => vvTotalFrames += x.frame_length); + var f0 = new double[vvTotalFrames]; + for (int i = vvNotes.phonemes[0].frame_length;i< vvTotalFrames; i++) { + f0[i] = vvNotes.f0[i]; + } + + var result = new RenderPitchResult { tones = f0.Select(f => (float)MusicMath.FreqToTone(f * Math.Pow(2, ((phrase.phones[0].toneShift * -1) / 12d)))).ToArray(), ticks = new float[vvTotalFrames] }; var layout = Layout(phrase); var t = layout.positionMs - layout.leadingMs; for (int i = 0; i < result.tones.Length; i++) { diff --git a/OpenUtau.Core/Voicevox/VoicevoxUtils.cs b/OpenUtau.Core/Voicevox/VoicevoxUtils.cs index 5f202119f..c0251cb71 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxUtils.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxUtils.cs @@ -120,9 +120,9 @@ public static VoicevoxQueryMain NoteGroupsToVoicevox(Note[][] notes, TimeAxis ti return qnotes; } - public static double[] SampleCurve(RenderPhrase phrase, float[] curve, double defaultValue, double frameMs, int length, int headFrames, int tailFrames, Func convert) { + public static double[] SampleCurve(RenderPhrase phrase, float[] curve, double defaultValue, double frameMs, int length, int headFrames, int tailFrames, double offset, Func convert) { const int interval = 5; - var result = new double[length]; + var result = new double[length]; try { if (curve == null) { Array.Fill(result, defaultValue); @@ -130,7 +130,7 @@ public static double[] SampleCurve(RenderPhrase phrase, float[] curve, double de } for (int i = 0; i < length - headFrames - tailFrames; i++) { - double posMs = phrase.positionMs - phrase.leadingMs + i * frameMs; + double posMs = phrase.positionMs - phrase.leadingMs + (i * frameMs) + offset; int ticks = phrase.timeAxis.MsPosToTickPos(posMs) - (phrase.position - phrase.leading); int index = Math.Max(0, (int)((double)ticks / interval)); if (index < curve.Length) { From a2a850bffb4a4314af68ca600c2764c1fee450e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=92=E7=8C=AB=E5=A4=A7=E7=A6=8F?= <93469977+rokujyushi@users.noreply.github.com> Date: Thu, 15 Aug 2024 01:19:45 +0900 Subject: [PATCH 2/3] Organize hashes --- OpenUtau.Core/Voicevox/VoicevoxRenderer.cs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs index 7abef7ab2..e4ff1120c 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs @@ -300,7 +300,12 @@ ulong HashPhraseGroups(RenderPhrase phrase) { using (var writer = new BinaryWriter(stream)) { writer.Write(phrase.preEffectHash); writer.Write(phrase.phones[0].tone); + writer.Write(phrase.phones[0].direct); + //if (phrase.phones[0].direct) { writer.Write(phrase.phones[0].toneShift); + //} else { + // phrase.phones.ForEach(x => writer.Write(x.toneShift)); + //} writer.Write(phrase.phones[0].volume); return XXH64.DigestOf(stream.ToArray()); } From cbc52e10bd1db2b83f745fd3e58e0f4aa1fb80a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=BB=92=E7=8C=AB=E5=A4=A7=E7=A6=8F?= <93469977+rokujyushi@users.noreply.github.com> Date: Thu, 15 Aug 2024 06:23:25 +0900 Subject: [PATCH 3/3] Remove unnecessary codes --- OpenUtau.Core/Voicevox/VoicevoxRenderer.cs | 1 - 1 file changed, 1 deletion(-) diff --git a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs index e4ff1120c..71e7d98e1 100644 --- a/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs +++ b/OpenUtau.Core/Voicevox/VoicevoxRenderer.cs @@ -233,7 +233,6 @@ static VoicevoxNote PhraseToVoicevoxNotes(RenderPhrase phrase) { public UExpressionDescriptor[] GetSuggestedExpressions(USinger singer, URenderSettings renderSettings) { - return new UExpressionDescriptor[] { }; //under development var result = new List { new UExpressionDescriptor{