-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.yaml
94 lines (71 loc) · 3.72 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
## Main class for the model configuration contaning the general hyper parameters for the training of speech enhancement
SE_Config:
SR : 16000
L_FRAME : 4096 #2048*2
L_HOP : 512
stage : 1
batchSize : 2
clean_files_dir : "/home/karan/datasets/edinburgh_se_dataset/clean_trainset_wav"
noisy_files_dir : "/home/karan/datasets/edinburgh_se_dataset/noisy_trainset_wav"
clean_files_eval_dir : "/home/karan/datasets/edinburgh_se_dataset/clean_testset_wav_16k"
noisy_files_eval_dir : "/home/karan/datasets/edinburgh_se_dataset/noisy_testset_wav_16k"
tuning_clean_files_dir : ""
tuning_noisy_files_dir : ""
lay1_stream1_bestchkpt_path : "/workspace/coherence_net/checkpoint/Ashwin_SE/layer1/Stream1_epoch250000.pt"
lay1_stream2_bestchkpt_path : "/workspace/coherence_net/checkpoint/Ashwin_SE/layer1/Stream2_epoch135000.pt"
lay1_stream3_bestchkpt_path : "/workspace/coherence_net/checkpoint/Ashwin_SE/layer1/Stream3_epoch430000.pt"
lay1_bestchkpt_path : ""
lay2_stream1_bestchkpt_path : ""
lay2_stream2_bestchkpt_path : ""
lay2_stream3_bestchkpt_path : ""
lay2_bestchkpt_path : ""
Coherence_Net_Config:
NAME : 'Ashwin_SE'
## Train Config
checkpoint_path : "checkpoint/"
LR : 0.0001
DECAY_RATE : 0.02
DECAY_STEP : 215000
FINAL_STEP : 250001
CKPT_STEP : 25000
train_split : 0.9 # 90 percent
stream1_steps : 20 # the number of epochs
stream2_steps : 20 # the number of epochs
stream3_steps : 20 # the number of epochs
stream_integerator_steps : 20 # the number of epochs
## Layer 1 Config
layer1:
num_layers : 2
num_streams_per_layer : 3
input_channels : 1 # The number of input channels
kernel_size : 3 # The kernel size for convolutions
embedding_dim : 128 # The embedding dim before the dcs
dcs_num_layers : 3 # number of conv layers in dcs
dcs_output_dim : 10 # The output embedding of the dcs blocks
dcs_dilations : [3, 6, 9] # The dilations in the dcs block the length should be equal to the number of dcs layers
num_sources : 2 # The number of sources to sperate
freq_bins : 2049 # the number of input freq bins
time_frames : 64 # The time frame of each input
max_pool_kernel : 2 # The max pool layer kernel
max_pool_stride : 2 # The max pool layer stride
stream_integerator_input_channels : 474 #num_streams_per_layer*(embedding_dim + dcs_num_layers*dcs_output_dim)
stream_integ_embedding : 96
output_channels : 1
## Layer 2 Config
layer2:
num_layers : 2
num_streams_per_layer : 3
kernel_size : 3 # The kernel size for convolutions
embedding_dim : 128 # The embedding dim before the dcs
dcs_num_layers : 3 # number of conv layers in dcs
dcs_output_dim : 10 # The output embedding of the dcs blocks
dcs_dilations : [3, 6, 9] # The dilations in the dcs block the length should be equal to the number of dcs layers
num_sources : 2 # The number of sources to sperate
freq_bins : 2049 # the number of input freq bins
time_frames : 64 # The time frame of each input
max_pool_kernel : 2 # The max pool layer kernel
max_pool_stride : 2 # The max pool layer stride
stream_integerator_input_channels : 474 #num_streams_per_layer*(embedding_dim + dcs_num_layers*dcs_output_dim)
input_channels : 474 #num_streams_per_layer*(embedding_dim + dcs_num_layers*dcs_output_dim) # The number of input channels to layer 2
stream_integ_embedding : 96
output_channels : 1