forked from open-mmlab/Amphion
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexp_config.json
33 lines (33 loc) · 1.26 KB
/
exp_config.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
{
"base_config": "config/valle.json",
"model_type": "VALLE",
"dataset": [
"libritts"
],
"dataset_path": {
"libritts": "[LibriTTS dataset path]"
},
"preprocess": {
"extract_phone": true,
"phone_extractor": "espeak", // "espeak, pypinyin, pypinyin_initials_finals, lexicon (only for language=en-us right now)"
"extract_acoustic_token": true,
"use_phone": true,
"use_acoustic_token": true,
"processed_dir": "Amphion/data/",
"sample_rate": 24000, // "Audio sampling rate."
"codec_hop_size": 320, // "Audio codec hop size."
"valid_file": "test.json",
},
"model": {
"prefix_mode": 1, // "The mode for how to prefix VALL-E NAR Decoder, 0: no prefix, 1: 0 to random, 2: random to random, 4: chunk of pre or post utterance.",
},
"log_dir": "Amphion/ckpts/tts/valle",
"train": {
"batch_size": 4,
"train_stage": 1, // 0: train all modules, For VALL_E, support 1: AR Decoder 2: NAR Decoder(s)
"max_epoch": 20, // "Number of epochs to train."
"use_dynamic_batchsize": true, // If use dynamic batch size
"max_tokens": 4000, // If use dynamic batch size
"max_sentences": 10 // If use dynamic batch size
}
}