Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VOICEVOXRenderer Tempo change support #1287

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 13 additions & 15 deletions OpenUtau.Core/Voicevox/VoicevoxRenderer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,15 +71,14 @@ public Task<RenderResult> Render(RenderPhrase phrase, Progress progress, int tra
}
try {
Log.Information($"Starting Voicevox synthesis");
VoicevoxNote vvNotes = new VoicevoxNote();
VoicevoxNotes vvNotes = new VoicevoxNotes();
if (!singer.voicevoxConfig.Tag.Equals("VOICEVOX JA")) {
Note[][] notes = new Note[phrase.phones.Length][];
VoicevoxNote[] notes = new VoicevoxNote[phrase.phones.Length];
for (int i = 0; i < phrase.phones.Length; i++) {
notes[i] = new Note[1];
notes[i][0] = new Note() {
notes[i] = new VoicevoxNote() {
lyric = phrase.phones[i].phoneme,
position = phrase.phones[i].position,
duration = phrase.phones[i].duration,
positionMs = phrase.phones[i].positionMs,
durationMs = phrase.phones[i].durationMs,
tone = (int)(phrase.phones[i].tone + phrase.phones[0].toneShift)
};
}
Expand Down Expand Up @@ -178,8 +177,8 @@ public Task<RenderResult> Render(RenderPhrase phrase, Progress progress, int tra
}

//Synthesize with parameters of phoneme, F0, and volume. Under development
static VoicevoxNote PhraseToVoicevoxNotes(RenderPhrase phrase) {
VoicevoxNote notes = new VoicevoxNote();
static VoicevoxNotes PhraseToVoicevoxNotes(RenderPhrase phrase) {
VoicevoxNotes notes = new VoicevoxNotes();

int headFrames = (int)(VoicevoxUtils.headS * VoicevoxUtils.fps);
int tailFrames = (int)(VoicevoxUtils.tailS * VoicevoxUtils.fps);
Expand Down Expand Up @@ -258,22 +257,21 @@ RenderPitchResult IRenderer.LoadRenderedPitch(RenderPhrase phrase) {
var singer = phrase.singer as VoicevoxSinger;
if (singer != null) {
string singerID = VoicevoxUtils.defaultID;
Note[][] notes = new Note[phrase.phones.Length][];
VoicevoxNote[] notes = new VoicevoxNote[phrase.phones.Length];

for (int i = 0; i < phrase.phones.Length; i++) {
notes[i] = new Note[1];
notes[i][0] = new Note() {
notes[i] = new VoicevoxNote() {
lyric = phrase.phones[i].phoneme,
position = phrase.phones[i].position,
duration = phrase.phones[i].duration,
tone = phrase.phones[i].tone + phrase.phones[0].toneShift
positionMs = phrase.phones[i].positionMs,
durationMs = phrase.phones[i].durationMs,
tone = (int)(phrase.phones[i].tone + phrase.phones[0].toneShift)
};
}

var qNotes = VoicevoxUtils.NoteGroupsToVoicevox(notes, phrase.timeAxis);

string baseSingerID = VoicevoxUtils.getBaseSingerID(singer);
VoicevoxNote vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, singerID);
VoicevoxNotes vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, singerID);
int vvTotalFrames = 0 - vvNotes.phonemes[0].frame_length;
vvNotes.phonemes.ForEach(x => vvTotalFrames += x.frame_length);
var f0 = new double[vvTotalFrames];
Expand Down
46 changes: 23 additions & 23 deletions OpenUtau.Core/Voicevox/VoicevoxUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,20 @@
*/

namespace OpenUtau.Core.Voicevox {

public struct VoicevoxNote {
public string lyric;
public double positionMs;
public double durationMs;
public int tone;
}

public class Phonemes {
public string phoneme;
public int frame_length;

}
public class VoicevoxNote {
public class VoicevoxNotes {
public List<double> f0 = new List<double>();
public List<double> volume = new List<double>();
public List<Phonemes> phonemes = new List<Phonemes>();
Expand Down Expand Up @@ -97,14 +105,14 @@ public void Loaddic(string location) {
Log.Error($"Failed to read dictionary file. : {e}");
}
}
public string Notetodic(Note[][] notes, int index) {
if (dict.TryGetValue(notes[index][0].lyric, out var lyric_)) {
public string Notetodic(VoicevoxNote[] notes, int index) {
if (dict.TryGetValue(notes[index].lyric, out var lyric_)) {
if (string.IsNullOrEmpty(lyric_)) {
return "";
}
return lyric_;
}
return notes[index][0].lyric;
return notes[index].lyric;
}

public string Lyrictodic(string lyric) {
Expand Down Expand Up @@ -132,54 +140,46 @@ public static class VoicevoxUtils {
public static Dictionary_list dic = new Dictionary_list();
public static Phoneme_list phoneme_List = new Phoneme_list();

public static VoicevoxNote VoicevoxVoiceBase(VoicevoxQueryMain qNotes, string id) {
public static VoicevoxNotes VoicevoxVoiceBase(VoicevoxQueryMain qNotes, string id) {
var queryurl = new VoicevoxURL() { method = "POST", path = "/sing_frame_audio_query", query = new Dictionary<string, string> { { "speaker", id } }, body = JsonConvert.SerializeObject(qNotes) };
var response = VoicevoxClient.Inst.SendRequest(queryurl);
VoicevoxNote configs;
VoicevoxNotes vvNotes;
var jObj = JObject.Parse(response.Item1);
if (jObj.ContainsKey("detail")) {
Log.Error($"Response was incorrect. : {jObj}");
} else {
configs = jObj.ToObject<VoicevoxNote>();
return configs;
vvNotes = jObj.ToObject<VoicevoxNotes>();
return vvNotes;
}
return new VoicevoxNote();
return new VoicevoxNotes();
}

public static void Loaddic(VoicevoxSinger singer) {
dic.Loaddic(singer.Location);
}

public static VoicevoxQueryMain NoteGroupsToVoicevox(Note[][] notes, TimeAxis timeAxis) {
public static VoicevoxQueryMain NoteGroupsToVoicevox(VoicevoxNote[] notes, TimeAxis timeAxis) {
VoicevoxQueryMain qnotes = new VoicevoxQueryMain();
int index = 0;
int duration = 0;
double durationMs = 0;
try {
qnotes.notes.Add(new VoicevoxQueryNotes() {
lyric = "",
frame_length = (int)Math.Round((headS * fps), MidpointRounding.AwayFromZero),
key = null,
vqnindex = -1
});
duration = notes[index][0].position + notes[index][0].duration;
durationMs = notes[index].positionMs + notes[index].durationMs;
while (index < notes.Length) {
string lyric = dic.Notetodic(notes, index);
int length = (int)Math.Round(((timeAxis.TickPosToMsPos(notes[index].Sum(n => n.duration)) / 1000f) * VoicevoxUtils.fps), MidpointRounding.AwayFromZero);
int length = (int)Math.Round((notes[index].durationMs / 1000f) * VoicevoxUtils.fps, MidpointRounding.AwayFromZero);
//Avoid synthesis without at least two frames.
if (length < 2) {
length = 2;
}
int? tone = null;
if (!string.IsNullOrEmpty(lyric)) {
if (notes[index][0].phonemeAttributes != null) {
if (notes[index][0].phonemeAttributes.Length > 0) {
tone = notes[index][0].tone + notes[index][0].phonemeAttributes[0].toneShift;
} else {
tone = notes[index][0].tone;
}
} else {
tone = notes[index][0].tone;
}
tone = notes[index].tone;
} else {
lyric = "";
}
Expand All @@ -189,7 +189,7 @@ public static VoicevoxQueryMain NoteGroupsToVoicevox(Note[][] notes, TimeAxis ti
key = tone,
vqnindex = index
});
duration += notes[index][0].duration;
durationMs += notes[index].durationMs;
index++;
}
qnotes.notes.Add(new VoicevoxQueryNotes {
Expand Down
Loading