Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VOICEVOX Volume operation F0 read support #1241

Merged
merged 3 commits into from
Sep 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 52 additions & 35 deletions OpenUtau.Core/Voicevox/VoicevoxRenderer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ public class VoicevoxRenderer : IRenderer {

static readonly HashSet<string> supportedExp = new HashSet<string>(){
Format.Ustx.DYN,
//PITD,
PITD,
Format.Ustx.CLR,
Format.Ustx.VOL,
//VOLC,
VOLC,
//Format.Ustx.SHFC,
Format.Ustx.SHFT
};
Expand Down Expand Up @@ -91,18 +91,29 @@ public Task<RenderResult> Render(RenderPhrase phrase, Progress progress, int tra

}
int vvTotalFrames = 0;
double frameMs = (1000d / VoicevoxUtils.fps);
vvNotes.phonemes.ForEach(x => vvTotalFrames += x.frame_length);

if (!phrase.phones[0].direct) {
double frameMs = (1000d / VoicevoxUtils.fps);
vvNotes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, vvNotes.phonemes[0].frame_length, vvNotes.phonemes[^1].frame_length, x => MusicMath.ToneToFreq(x * 0.01)).ToList();
vvNotes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, vvNotes.phonemes[0].frame_length, vvNotes.phonemes[^1].frame_length, 0, x => MusicMath.ToneToFreq(x * 0.01)).ToList();
} else {
//Compatible with toneShift (key shift), for adjusting the range of tones when synthesizing
vvNotes.f0 = vvNotes.f0.Select(f0 => f0 = f0 * Math.Pow(2, ((phrase.phones[0].toneShift * -1) / 12d))).ToList();
}

//Volume parameter for synthesis. Scheduled to be revised
vvNotes.volume = vvNotes.volume.Select(vol => vol = vol * phrase.phones[0].volume).ToList();
var volumeCurve = phrase.curves.FirstOrDefault(c => c.Item1 == VOLC);
if (volumeCurve != null) {
var volumes = VoicevoxUtils.SampleCurve(phrase, volumeCurve.Item2, 0, frameMs, vvTotalFrames, vvNotes.phonemes[0].frame_length, vvNotes.phonemes[^1].frame_length, -10, x => x * 0.01);
vvNotes.volume = vvNotes.volume.Select((vol, i) => vol = vol * volumes[i]).ToList();
} else {
vvNotes.volume = vvNotes.volume.Select(vol => vol = vol * phrase.phones[0].volume).ToList();
}
for (int i = 0; i < vvNotes.phonemes[0].frame_length; i++) {
vvNotes.volume[i] = 0;
}
for (int i = vvNotes.volume.Count - vvNotes.phonemes[vvNotes.phonemes.Count - 1].frame_length; i < vvNotes.volume.Count; i++) {
vvNotes.volume[i] = 0;
}
} else {
vvNotes = PhraseToVoicevoxNotes(phrase);
}
Expand Down Expand Up @@ -188,16 +199,13 @@ static VoicevoxNote PhraseToVoicevoxNotes(RenderPhrase phrase) {

int vvTotalFrames = -(headFrames + tailFrames);
notes.phonemes.ForEach(x => vvTotalFrames += x.frame_length);
double frameMs = 1 / 1000d * VoicevoxUtils.fps;
double frameMs = (1000d / VoicevoxUtils.fps);
int totalFrames = (int)(vvTotalFrames / VoicevoxUtils.fps * 1000d);
int frameRatio = vvTotalFrames / totalFrames;
const int pitchInterval = 5;


//var curve = phrase.pitches.SelectMany(item => Enumerable.Repeat(item, 5)).ToArray();
notes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, 0, 0, x => MusicMath.ToneToFreq(x * 0.01)).ToList();
//notes.f0 = f0.Where((x, i) => i % frameRatio == 0).ToList();
float[] f0Shifted = notes.f0.Select(f => (float)f).ToArray();
notes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, notes.phonemes[0].frame_length, notes.phonemes[^1].frame_length, -(VoicevoxUtils.headS + 10), x => MusicMath.ToneToFreq(x * 0.01)).ToList(); float[] f0Shifted = notes.f0.Select(f => (float)f).ToArray();
if (phrase.toneShift != null) {
for (int i = 0; i < notes.f0.Count; i++) {
double posMs = phrase.positionMs - phrase.leadingMs + i * frameMs;
Expand All @@ -212,8 +220,7 @@ static VoicevoxNote PhraseToVoicevoxNotes(RenderPhrase phrase) {

var volumeCurve = phrase.curves.FirstOrDefault(c => c.Item1 == VOLC);
if (volumeCurve != null) {
notes.volume = VoicevoxUtils.SampleCurve(phrase, volumeCurve.Item2, 0, frameMs, vvTotalFrames, 0, 0, x => MusicMath.DecibelToLinear(x)).ToList();
//notes.volume = volume.Where((x, i) => i % frameRatio == 0).ToList();
notes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, notes.phonemes[0].frame_length, notes.phonemes[^1].frame_length, -(VoicevoxUtils.headS + 10), x => MusicMath.ToneToFreq(x * 0.01)).ToList();
} else {
notes.volume = Enumerable.Repeat(1d, vvTotalFrames).ToList();
}
Expand All @@ -226,21 +233,20 @@ static VoicevoxNote PhraseToVoicevoxNotes(RenderPhrase phrase) {


public UExpressionDescriptor[] GetSuggestedExpressions(USinger singer, URenderSettings renderSettings) {
return new UExpressionDescriptor[] { };
//under development
//var result = new List<UExpressionDescriptor> {
// new UExpressionDescriptor{
// name="volume (curve)",
// abbr=VOLC,
// type=UExpressionType.Curve,
// min=-20,
// max=20,
// defaultValue=0,
// isFlag=false,
// },
//};

//return result.ToArray();
var result = new List<UExpressionDescriptor> {
new UExpressionDescriptor{
name="volume (curve)",
abbr=VOLC,
type=UExpressionType.Curve,
min=0,
max=200,
defaultValue=100,
isFlag=false,
},
};

return result.ToArray();
}

public override string ToString() => Renderers.VOICEVOX;
Expand All @@ -250,24 +256,30 @@ RenderPitchResult IRenderer.LoadRenderedPitch(RenderPhrase phrase) {
var singer = phrase.singer as VoicevoxSinger;
if (singer != null) {
string singerID = VoicevoxUtils.defaultID;
Note[][] notes = new Note[phrase.notes.Length][];
Note[][] notes = new Note[phrase.phones.Length][];

for (int i = 0; i < phrase.notes.Length; i++) {
for (int i = 0; i < phrase.phones.Length; i++) {
notes[i] = new Note[1];
notes[i][0] = new Note() {
lyric = phrase.notes[i].lyric,
position = phrase.notes[i].position,
duration = phrase.notes[i].duration,
tone = phrase.notes[i].tone + phrase.phones[0].toneShift
lyric = phrase.phones[i].phoneme,
position = phrase.phones[i].position,
duration = phrase.phones[i].duration,
tone = phrase.phones[i].tone + phrase.phones[0].toneShift
};
}

var qNotes = VoicevoxUtils.NoteGroupsToVoicevox(notes, phrase.timeAxis);

string baseSingerID = VoicevoxUtils.getBaseSingerID(singer);
VoicevoxNote vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, singerID);
var result = new RenderPitchResult { tones = vvNotes.f0.Select(f => (float)MusicMath.FreqToTone(f)).ToArray() };
result.ticks = new float[result.tones.Length];
int vvTotalFrames = 0 - vvNotes.phonemes[0].frame_length;
vvNotes.phonemes.ForEach(x => vvTotalFrames += x.frame_length);
var f0 = new double[vvTotalFrames];
for (int i = vvNotes.phonemes[0].frame_length;i< vvTotalFrames; i++) {
f0[i] = vvNotes.f0[i];
}

var result = new RenderPitchResult { tones = f0.Select(f => (float)MusicMath.FreqToTone(f * Math.Pow(2, ((phrase.phones[0].toneShift * -1) / 12d)))).ToArray(), ticks = new float[vvTotalFrames] };
var layout = Layout(phrase);
var t = layout.positionMs - layout.leadingMs;
for (int i = 0; i < result.tones.Length; i++) {
Expand All @@ -287,7 +299,12 @@ ulong HashPhraseGroups(RenderPhrase phrase) {
using (var writer = new BinaryWriter(stream)) {
writer.Write(phrase.preEffectHash);
writer.Write(phrase.phones[0].tone);
writer.Write(phrase.phones[0].direct);
//if (phrase.phones[0].direct) {
writer.Write(phrase.phones[0].toneShift);
//} else {
// phrase.phones.ForEach(x => writer.Write(x.toneShift));
//}
writer.Write(phrase.phones[0].volume);
return XXH64.DigestOf(stream.ToArray());
}
Expand Down
6 changes: 3 additions & 3 deletions OpenUtau.Core/Voicevox/VoicevoxUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -120,17 +120,17 @@ public static VoicevoxQueryMain NoteGroupsToVoicevox(Note[][] notes, TimeAxis ti
return qnotes;
}

public static double[] SampleCurve(RenderPhrase phrase, float[] curve, double defaultValue, double frameMs, int length, int headFrames, int tailFrames, Func<double, double> convert) {
public static double[] SampleCurve(RenderPhrase phrase, float[] curve, double defaultValue, double frameMs, int length, int headFrames, int tailFrames, double offset, Func<double, double> convert) {
const int interval = 5;
var result = new double[length];
var result = new double[length];
try {
if (curve == null) {
Array.Fill(result, defaultValue);
return result;
}

for (int i = 0; i < length - headFrames - tailFrames; i++) {
double posMs = phrase.positionMs - phrase.leadingMs + i * frameMs;
double posMs = phrase.positionMs - phrase.leadingMs + (i * frameMs) + offset;
int ticks = phrase.timeAxis.MsPosToTickPos(posMs) - (phrase.position - phrase.leading);
int index = Math.Max(0, (int)((double)ticks / interval));
if (index < curve.Length) {
Expand Down
Loading