Skip to content

Commit

Permalink
Merge pull request #1241 from rokujyushi/AddFunction
Browse files Browse the repository at this point in the history
VOICEVOX Volume operation F0 read support
  • Loading branch information
stakira committed Sep 1, 2024
2 parents 6d36d9c + cbc52e1 commit 5fe45c0
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 38 deletions.
87 changes: 52 additions & 35 deletions OpenUtau.Core/Voicevox/VoicevoxRenderer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,10 @@ public class VoicevoxRenderer : IRenderer {

static readonly HashSet<string> supportedExp = new HashSet<string>(){
Format.Ustx.DYN,
//PITD,
PITD,
Format.Ustx.CLR,
Format.Ustx.VOL,
//VOLC,
VOLC,
//Format.Ustx.SHFC,
Format.Ustx.SHFT
};
Expand Down Expand Up @@ -91,18 +91,29 @@ public Task<RenderResult> Render(RenderPhrase phrase, Progress progress, int tra
}
int vvTotalFrames = 0;
double frameMs = (1000d / VoicevoxUtils.fps);
vvNotes.phonemes.ForEach(x => vvTotalFrames += x.frame_length);
if (!phrase.phones[0].direct) {
double frameMs = (1000d / VoicevoxUtils.fps);
vvNotes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, vvNotes.phonemes[0].frame_length, vvNotes.phonemes[^1].frame_length, x => MusicMath.ToneToFreq(x * 0.01)).ToList();
vvNotes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, vvNotes.phonemes[0].frame_length, vvNotes.phonemes[^1].frame_length, 0, x => MusicMath.ToneToFreq(x * 0.01)).ToList();
} else {
//Compatible with toneShift (key shift), for adjusting the range of tones when synthesizing
vvNotes.f0 = vvNotes.f0.Select(f0 => f0 = f0 * Math.Pow(2, ((phrase.phones[0].toneShift * -1) / 12d))).ToList();
}
//Volume parameter for synthesis. Scheduled to be revised
vvNotes.volume = vvNotes.volume.Select(vol => vol = vol * phrase.phones[0].volume).ToList();
var volumeCurve = phrase.curves.FirstOrDefault(c => c.Item1 == VOLC);
if (volumeCurve != null) {
var volumes = VoicevoxUtils.SampleCurve(phrase, volumeCurve.Item2, 0, frameMs, vvTotalFrames, vvNotes.phonemes[0].frame_length, vvNotes.phonemes[^1].frame_length, -10, x => x * 0.01);
vvNotes.volume = vvNotes.volume.Select((vol, i) => vol = vol * volumes[i]).ToList();
} else {
vvNotes.volume = vvNotes.volume.Select(vol => vol = vol * phrase.phones[0].volume).ToList();
}
for (int i = 0; i < vvNotes.phonemes[0].frame_length; i++) {
vvNotes.volume[i] = 0;
}
for (int i = vvNotes.volume.Count - vvNotes.phonemes[vvNotes.phonemes.Count - 1].frame_length; i < vvNotes.volume.Count; i++) {
vvNotes.volume[i] = 0;
}
} else {
vvNotes = PhraseToVoicevoxNotes(phrase);
}
Expand Down Expand Up @@ -188,16 +199,13 @@ static VoicevoxNote PhraseToVoicevoxNotes(RenderPhrase phrase) {

int vvTotalFrames = -(headFrames + tailFrames);
notes.phonemes.ForEach(x => vvTotalFrames += x.frame_length);
double frameMs = 1 / 1000d * VoicevoxUtils.fps;
double frameMs = (1000d / VoicevoxUtils.fps);
int totalFrames = (int)(vvTotalFrames / VoicevoxUtils.fps * 1000d);
int frameRatio = vvTotalFrames / totalFrames;
const int pitchInterval = 5;


//var curve = phrase.pitches.SelectMany(item => Enumerable.Repeat(item, 5)).ToArray();
notes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, 0, 0, x => MusicMath.ToneToFreq(x * 0.01)).ToList();
//notes.f0 = f0.Where((x, i) => i % frameRatio == 0).ToList();
float[] f0Shifted = notes.f0.Select(f => (float)f).ToArray();
notes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, notes.phonemes[0].frame_length, notes.phonemes[^1].frame_length, -(VoicevoxUtils.headS + 10), x => MusicMath.ToneToFreq(x * 0.01)).ToList(); float[] f0Shifted = notes.f0.Select(f => (float)f).ToArray();
if (phrase.toneShift != null) {
for (int i = 0; i < notes.f0.Count; i++) {
double posMs = phrase.positionMs - phrase.leadingMs + i * frameMs;
Expand All @@ -212,8 +220,7 @@ static VoicevoxNote PhraseToVoicevoxNotes(RenderPhrase phrase) {

var volumeCurve = phrase.curves.FirstOrDefault(c => c.Item1 == VOLC);
if (volumeCurve != null) {
notes.volume = VoicevoxUtils.SampleCurve(phrase, volumeCurve.Item2, 0, frameMs, vvTotalFrames, 0, 0, x => MusicMath.DecibelToLinear(x)).ToList();
//notes.volume = volume.Where((x, i) => i % frameRatio == 0).ToList();
notes.f0 = VoicevoxUtils.SampleCurve(phrase, phrase.pitches, 0, frameMs, vvTotalFrames, notes.phonemes[0].frame_length, notes.phonemes[^1].frame_length, -(VoicevoxUtils.headS + 10), x => MusicMath.ToneToFreq(x * 0.01)).ToList();
} else {
notes.volume = Enumerable.Repeat(1d, vvTotalFrames).ToList();
}
Expand All @@ -226,21 +233,20 @@ static VoicevoxNote PhraseToVoicevoxNotes(RenderPhrase phrase) {


public UExpressionDescriptor[] GetSuggestedExpressions(USinger singer, URenderSettings renderSettings) {
return new UExpressionDescriptor[] { };
//under development
//var result = new List<UExpressionDescriptor> {
// new UExpressionDescriptor{
// name="volume (curve)",
// abbr=VOLC,
// type=UExpressionType.Curve,
// min=-20,
// max=20,
// defaultValue=0,
// isFlag=false,
// },
//};

//return result.ToArray();
var result = new List<UExpressionDescriptor> {
new UExpressionDescriptor{
name="volume (curve)",
abbr=VOLC,
type=UExpressionType.Curve,
min=0,
max=200,
defaultValue=100,
isFlag=false,
},
};

return result.ToArray();
}

public override string ToString() => Renderers.VOICEVOX;
Expand All @@ -250,24 +256,30 @@ RenderPitchResult IRenderer.LoadRenderedPitch(RenderPhrase phrase) {
var singer = phrase.singer as VoicevoxSinger;
if (singer != null) {
string singerID = VoicevoxUtils.defaultID;
Note[][] notes = new Note[phrase.notes.Length][];
Note[][] notes = new Note[phrase.phones.Length][];

for (int i = 0; i < phrase.notes.Length; i++) {
for (int i = 0; i < phrase.phones.Length; i++) {
notes[i] = new Note[1];
notes[i][0] = new Note() {
lyric = phrase.notes[i].lyric,
position = phrase.notes[i].position,
duration = phrase.notes[i].duration,
tone = phrase.notes[i].tone + phrase.phones[0].toneShift
lyric = phrase.phones[i].phoneme,
position = phrase.phones[i].position,
duration = phrase.phones[i].duration,
tone = phrase.phones[i].tone + phrase.phones[0].toneShift
};
}

var qNotes = VoicevoxUtils.NoteGroupsToVoicevox(notes, phrase.timeAxis);

string baseSingerID = VoicevoxUtils.getBaseSingerID(singer);
VoicevoxNote vvNotes = VoicevoxUtils.VoicevoxVoiceBase(qNotes, singerID);
var result = new RenderPitchResult { tones = vvNotes.f0.Select(f => (float)MusicMath.FreqToTone(f)).ToArray() };
result.ticks = new float[result.tones.Length];
int vvTotalFrames = 0 - vvNotes.phonemes[0].frame_length;
vvNotes.phonemes.ForEach(x => vvTotalFrames += x.frame_length);
var f0 = new double[vvTotalFrames];
for (int i = vvNotes.phonemes[0].frame_length;i< vvTotalFrames; i++) {
f0[i] = vvNotes.f0[i];
}

var result = new RenderPitchResult { tones = f0.Select(f => (float)MusicMath.FreqToTone(f * Math.Pow(2, ((phrase.phones[0].toneShift * -1) / 12d)))).ToArray(), ticks = new float[vvTotalFrames] };
var layout = Layout(phrase);
var t = layout.positionMs - layout.leadingMs;
for (int i = 0; i < result.tones.Length; i++) {
Expand All @@ -287,7 +299,12 @@ ulong HashPhraseGroups(RenderPhrase phrase) {
using (var writer = new BinaryWriter(stream)) {
writer.Write(phrase.preEffectHash);
writer.Write(phrase.phones[0].tone);
writer.Write(phrase.phones[0].direct);
//if (phrase.phones[0].direct) {
writer.Write(phrase.phones[0].toneShift);
//} else {
// phrase.phones.ForEach(x => writer.Write(x.toneShift));
//}
writer.Write(phrase.phones[0].volume);
return XXH64.DigestOf(stream.ToArray());
}
Expand Down
6 changes: 3 additions & 3 deletions OpenUtau.Core/Voicevox/VoicevoxUtils.cs
Original file line number Diff line number Diff line change
Expand Up @@ -120,17 +120,17 @@ public static VoicevoxQueryMain NoteGroupsToVoicevox(Note[][] notes, TimeAxis ti
return qnotes;
}

public static double[] SampleCurve(RenderPhrase phrase, float[] curve, double defaultValue, double frameMs, int length, int headFrames, int tailFrames, Func<double, double> convert) {
public static double[] SampleCurve(RenderPhrase phrase, float[] curve, double defaultValue, double frameMs, int length, int headFrames, int tailFrames, double offset, Func<double, double> convert) {
const int interval = 5;
var result = new double[length];
var result = new double[length];
try {
if (curve == null) {
Array.Fill(result, defaultValue);
return result;
}

for (int i = 0; i < length - headFrames - tailFrames; i++) {
double posMs = phrase.positionMs - phrase.leadingMs + i * frameMs;
double posMs = phrase.positionMs - phrase.leadingMs + (i * frameMs) + offset;
int ticks = phrase.timeAxis.MsPosToTickPos(posMs) - (phrase.position - phrase.leading);
int index = Math.Max(0, (int)((double)ticks / interval));
if (index < curve.Length) {
Expand Down

0 comments on commit 5fe45c0

Please sign in to comment.