修改了袅袅、utau音源文件读入格式之类的

hontsev · Feb 27, 2018 · 5f69ec1 · 5f69ec1
1 parent 68f1d95
commit 5f69ec1
Show file tree

Hide file tree

Showing 20 changed files with 1,673 additions and 139 deletions.
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,6 @@
 /nntest/bin/Release
 /SoundEditor/obj
 /SoundEditor/bin/Debug
+/packages
+/*.suo
+/mySpeechSynthesizer/*.csproj
diff --git a/mySpeechSynthesizer/Analysis/NNAnalysis.cs b/mySpeechSynthesizer/Analysis/NNAnalysis.cs
@@ -60,7 +60,7 @@ public static byte[] readVoiceD(string directory, int begin = 0, int len = -1)
         /// </summary>
         /// <param name="directory">音源路径</param>
         /// <returns></returns>
-        public static NNTone[] getParamsFromNN(string directory)
+        public static NNTone[] getParams(string directory)
         {
             string file = directory + "inf.d";
             List<NNTone> res = new List<NNTone>();
@@ -87,10 +87,10 @@ public static NNTone[] getParamsFromNN(string directory)
         /// </summary>
         /// <param name="directory"></param>
         /// <returns></returns>
-        public static ToneList getToneListFromNN(string directory)
+        public static ToneList getToneList(string directory)
         {
 
-            NNTone[] ntlist = getParamsFromNN(directory);
+            NNTone[] ntlist = getParams(directory);
             byte[] datas = readVoiceD(directory);
 
             ToneList tl = new ToneList(datas);

diff --git a/mySpeechSynthesizer/Analysis/NNTone.cs b/mySpeechSynthesizer/Analysis/NNTone.cs
@@ -47,6 +47,11 @@ public class NNTone
         /// </summary>
         public int volume;
 
+        public NNTone()
+        {
+
+        }
+
         public NNTone(string oristr)
         {
             try

diff --git a/mySpeechSynthesizer/Analysis/UTAUAnalysis.cs b/mySpeechSynthesizer/Analysis/UTAUAnalysis.cs
@@ -0,0 +1,58 @@
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using System.IO;
+
+namespace mySpeechSynthesizer
+{
+    public class UTAUAnalysis
+    {
+        public static int samplingRate = 44100;
+
+        private static string[] getSoundList(string path)
+        {
+            var files = Directory.GetFiles(path,"*.wav");
+
+            return files;
+        }
+
+        private static string getSoundName(string filename)
+        {
+            string tmpname = Path.GetFileNameWithoutExtension(filename);
+
+            return tmpname;
+        }
+
+
+
+        /// <summary>
+        /// 从UTAU音源文件夹获取音源数据
+        /// 没有pitch
+        /// </summary>
+        /// <param name="path"></param>
+        /// <returns></returns>
+        public static ToneList getToneList(string path)
+        {
+            ToneList tlist = new ToneList(new byte[] { });
+
+            List<byte> alldata = new List<byte>();
+            string[] filelist = getSoundList(path);
+
+            foreach (var file in filelist)
+            {
+                byte[] tmpdata = WAVControl.getSampleByte(WAVControl.getSample(file));
+                int begin = alldata.Count;
+                int length =  tmpdata.Length;
+                string name = getSoundName(file);
+                foreach (var t in tmpdata) alldata.Add(t);
+                tlist.tones[name] = new ToneUnit(begin, length, 66);
+            }
+
+            tlist.oridata = alldata.ToArray();
+
+            return tlist;
+        }
+    }
+}
diff --git a/mySpeechSynthesizer/Hans/HanSynthesis.cs b/mySpeechSynthesizer/Hans/HanSynthesis.cs
@@ -0,0 +1,270 @@
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.IO;
+using System.Linq;
+using System.Text;
+using System.Threading;
+using System.Threading.Tasks;
+
+namespace mySpeechSynthesizer
+{
+
+    public class HanSynthesis
+    {
+
+        private PinYinConverter pyconv;
+
+        public double soundheight;
+        public double soundSpeed;
+
+        NNTone[] sounds;
+        SoundAnalysis sa = new SoundAnalysis();
+        public string filepath = "";
+        string output = @"output\tmp.wav";
+        string outputOri = @"output\tmp_origin.wav";
+        string outputTone = @"output\tmp_tone.wav";
+        public double defaultpitch = 69;
+
+        public HanSynthesis(string sourcePath,int speed=120,int height=100,int pitch=69)
+        {
+            pyconv = new PinYinConverter();
+            soundSpeed = speed;
+            soundheight = height;
+            defaultpitch = pitch;
+            //init
+            filepath = sourcePath;
+
+            //string soundpath = @"D:\Program Files\UTAU\voice\DFZZ_CHN.Autumn\-";
+            //soundpath = @"D:\Program Files\袅袅虚拟歌手v2.2%28附余袅袅音源%29\袅袅虚拟歌手v2.2\src\枸杞子音源库_v1.0";
+
+
+            sa.init(filepath, SourceType.Niaoniao);
+            sounds = sa.getNNTonesFromToneList();
+
+        }
+
+
+
+        public double[] getZipDatas(int toneNum, int beforeToneNum = -1, int nextToneNum = -1)
+        {
+            double[] datas;
+            switch (toneNum)
+            {
+                case 1:
+                    if (nextToneNum == 5)
+                    {
+                        datas = new double[] { 1.1, 1.1, 0.9 };
+                    }
+                    else
+                    {
+                        datas = new double[] { 1 };
+                    }
+
+                    break;
+                case 2:
+                    datas = new double[] { 0.9, 1 };
+                    break;
+                case 3:
+                    datas = new double[] { 0.95, 0.88, 0.9 };
+                    break;
+                case 4:
+                    datas = new double[] { 1.1, 0.98, 0.92 };
+                    break;
+                case 5:
+                default:
+                    datas = new double[] { 0.9, 0.87 };
+                    break;
+            }
+
+            //datas = new int[2] { 70, 130 };
+
+            return datas;
+        }
+
+
+
+        public int[] getSoundData(string name, double[] pitdata, int len, double[] volume)
+        {
+            SynTone st = new SynTone(name, pitdata, len, volume, 0, defaultpitch);
+            var res = sa.getSoundTone(st);
+            return res;
+        }
+
+        public void writeWAV(int[] wavdata, string filename = null)
+        {
+            if (filename == null) filename = outputTone;
+            WAVControl.writeWAV(wavdata, filename);
+        }
+
+        public void playSound(string filename)
+        {
+            System.Media.SoundPlayer player = new System.Media.SoundPlayer(filename);
+            player.PlaySync();
+        }
+
+        public int[] showSound(string str)
+        {
+            Stopwatch sw = new Stopwatch();
+            sw.Start();
+            string[] sentences = pyconv.cutSentencesAll(str);
+            sw.Stop();
+            int num = 0;
+            int nowindex = 0;
+
+
+            List<int> allres = new List<int>();
+
+            foreach (var sentence in sentences)
+            {
+                for (int s = 0; s < 10000 * ((double)100 / soundSpeed); s++)
+                {
+                    allres.Add(0);
+                }
+                List<List<string>> pinyin = pyconv.getPinYinList(sentence);
+                foreach (var p in pinyin)
+                {
+                    int d;
+                    int duration = 500;
+                    for (int i = 0; i < p.Count; i++)
+                    {
+                        if (i < p.Count - 1)
+                            //是一个词的中间字，因此间隔小
+                            d = 0;
+                        else
+                            d = 1;
+
+                        if (p[i].EndsWith("5"))
+                        {
+                            //轻声
+                            duration = 425;
+                            //if (p.Count == 1 && index > 0) index -= 1;
+                        }
+                        else if (p[i].EndsWith("3"))
+                        {
+                            //上声，念得长
+                            duration = 575;
+                        }
+                        else if (p[i].EndsWith("4") && i == p.Count - 1)
+                        {
+                            //去声且在结尾，念得短
+                            duration = 500;
+                        }
+                        else if (p[i].EndsWith("2") && i < p.Count - 1)
+                        {
+                            //阳平且在句中，念得短
+                            duration = 475;
+                        }
+                        else
+                        {
+                            duration = 500;
+                        }
+                        string beforep = "";
+                        int beforetonenum = -1;
+                        if (i >= 1)
+                        {
+                            beforep = p[i - 1];
+                            beforetonenum = int.Parse(beforep.Substring(beforep.Length - 1));
+                        }
+                        string nextp = "";
+                        int nexttonenum = -1;
+                        if (i < p.Count - 1)
+                        {
+                            nextp = p[i + 1];
+                            nexttonenum = int.Parse(nextp.Substring(nextp.Length - 1));
+                        }
+                        int tonenum = int.Parse(p[i].Substring(p[i].Length - 1));
+                        string ch = p[i].Substring(0, p[i].Length - 1);
+
+                        double sneeze = 0.22;
+                        double part = 1.0 * (sneeze / 2);
+                        int[] thisSoundFrame = getSoundData(ch, getZipDatas(tonenum, beforetonenum, nexttonenum), (int)(duration / (soundSpeed / 100)), new double[] { this.soundheight / 100 });
+                        for (int k = 0; k < thisSoundFrame.Length; k++)
+                        {
+                            thisSoundFrame[k] = (int)((double)thisSoundFrame[k] * soundheight / 100);
+                        }
+
+                        int partlen = (int)(thisSoundFrame.Length * part);
+
+                        for (int k = 0; k < partlen; k++)
+                        {
+                            allres[allres.Count - partlen + k] = (int)(allres[allres.Count - partlen + k] * 1 + thisSoundFrame[k] * 1);
+
+                        }
+                        for (int k = partlen; k < thisSoundFrame.Length; k++)
+                        {
+                            allres.Add(thisSoundFrame[k]);
+                        }
+
+
+                        if (d > 0)
+                        {
+                            for (int s = 0; s < 4200 * ((double)100 / soundSpeed); s++)
+                            {
+                                allres.Add(0);
+                            }
+                        }
+                        num++;
+                    }
+                }
+            }
+
+
+            return allres.ToArray();
+        }
+
+        public int outputCount = 0;
+        public bool isReading = false;
+        public void createMYSSwavs(object sentences)
+        {
+            string bufferpath = @"tmp_talking\";
+            string[] sentence = sentences as string[];
+            for (int i = 0; i < sentence.Length; i++)
+            {
+                int count = Interlocked.Increment(ref outputCount);
+                if (!isReading) break;
+                string filename = string.Format(@"{0}tmp_{1}.wav", bufferpath, count);
+                int[] tmp = showSound(sentence[i]);
+                writeWAV(tmp, filename);
+            }
+        }
+
+        /// <summary>
+        /// 主入口，将一句话朗读出来。会阻塞，建议用额外线程来运行
+        /// </summary>
+        /// <param name="str"></param>
+        public void readSentence(object str)
+        {
+            isReading = true;
+            //setGUIStatus(false);
+            //print("开始合成");
+            PinYinConverter pyconv = new PinYinConverter();
+            string[] sentences = pyconv.cutSentencesAll(str as string);
+            //print(string.Format("拆分句子完毕：共{0}个短句", sentences.Length));
+
+            string bufferpath = @"tmp_talking\";
+            if (Directory.Exists(bufferpath)) Directory.Delete(bufferpath, true);
+            Directory.CreateDirectory(bufferpath);
+            new Thread(createMYSSwavs).Start(sentences);
+            for (int i = 0; i < sentences.Length; i++)
+            {
+                try
+                {
+                    if (!isReading) break;
+                    string filename = string.Format(@"{0}tmp_{1}.wav", bufferpath, i);
+                    //int[] tmp = myssc.showSound(sentences[i], this.print);
+                    while (!File.Exists(filename)) if (!isReading) break;
+                    playSound(filename);
+                }
+                catch
+                {
+                    //break;
+                }
+
+            }
+            //setGUIStatus(true);
+            //isReading = true;
+            isReading = false;
+        }
+    }
+}