-
Notifications
You must be signed in to change notification settings - Fork 130
/
Copy pathrunTransferXvector.sh
executable file
·171 lines (138 loc) · 5.87 KB
/
runTransferXvector.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#!/bin/bash
# Copyright xmuspeech (Author:Snowdar 2018-07-27)
# This script is copyed from sre16/v1/local/xvector but more suitable for our own recipe.
set -e
stage=0
endstage=5
train_stage=0 # it will from 0 train-stage rather than -10
use_gpu=true
remove_egs=true
sleep_time=3
model_limit=8
traindata=data/plp_20_5.0/confusingThreeTrain_concat_volume_sp
outputname=lstm_concat_volume_sp_xv_plp_20_5.5_cmn
transferdir=exp/concat_volume_sp_xv_plp_20_5.0_cmn
. subtools/path.sh
. subtools/kaldi/utils/parse_options.sh
nnet_dir=exp/$outputname
egs_dir=exp/$outputname/egs
mkdir -p $nnet_dir
echo -e "SleepTime=$sleep_time\nLimit=$model_limit" > $nnet_dir/control.conf
transfermodel=$transferdir/transfer.raw # get from nnet3-copy by extra.config in stage 3
if [[ $stage -le 0 && 0 -le $endstage ]];then
rm -rf ${traindata}_nosil
rm -rf exp/features/${traindata}_nosil
subtools/kaldi/sid/nnet3/xvector/prepare_feats_for_egs.sh --nj 20 --cmd "run.pl" \
$traindata ${traindata}_nosil exp/features/${traindata}_nosil
fi
num_pdfs=$(awk '{print $2}' $traindata/utt2spk | sort | uniq -c | wc -l)
min_chunk=60
max_chunk=80
num_archives=150
if [[ $stage -le 1 && 1 -le $endstage ]];then
subtools/removeUtt.sh ${traindata}_nosil $max_chunk
fi
if [[ $stage -le 2 && 2 -le $endstage ]];then
subtools/kaldi/sid/nnet3/xvector/get_egs.sh --cmd "run.pl" \
--nj 20 \
--stage 0 \
--num-train-archives $num_archives \
--frames-per-iter-diagnostic 100000 \
--min-frames-per-chunk $min_chunk \
--max-frames-per-chunk $max_chunk \
--num-diagnostic-archives 3 \
--num-repeats 6000 \
"${traindata}_nosil" $egs_dir
fi
if [[ $stage -le 3 && 3 -le $endstage ]];then
num_targets=$(wc -w $egs_dir/pdf2num | awk '{print $1}')
feat_dim=$(cat $egs_dir/info/feat_dim)
max_chunk_size=10000
min_chunk_size=25
mkdir -p $nnet_dir/configs
cat <<EOF > $nnet_dir/configs/network.xconfig
# please note that it is important to have input layer with the name=input
# The frame-level layers
input dim=${feat_dim} name=input
relu-batchnorm-layer name=tdnn1 input=Append(-2,-1,0,1,2) dim=512
relu-batchnorm-layer name=tdnn2 input=Append(-2,0,2) dim=512
relu-batchnorm-layer name=tdnn3 input=Append(-3,0,3) dim=512
relu-batchnorm-layer name=tdnn4 dim=512
relu-batchnorm-layer name=tdnn5 dim=1500
# The stats pooling layer. Layers after this are segment-level.
# In the config below, the first and last argument (0, and ${max_chunk_size})
# means that we pool over an input segment starting at frame 0
# and ending at frame ${max_chunk_size} or earlier. The other arguments (1:1)
# mean that no subsampling is performed.
stats-layer name=stats config=mean+stddev(0:1:1:${max_chunk_size})
# This is where we usually extract the embedding (aka xvector) from.
relu-batchnorm-layer name=tdnn6 dim=512 input=stats
# This is where another layer the embedding could be extracted
# from, but usually the previous one works better.
relu-batchnorm-layer name=tdnn7 dim=512
output-layer name=output include-log-softmax=true dim=${num_targets}
EOF
subtools/kaldi/steps/nnet3/xconfig_to_configs.py \
--xconfig-file $nnet_dir/configs/network.xconfig \
--config-dir $nnet_dir/configs
cp $nnet_dir/configs/final.config $nnet_dir/nnet.config
# These three files will be used by sid/nnet3/xvector/extract_xvectors.sh
echo "output-node name=output input=tdnn6.affine" > $nnet_dir/extract_tdnn6.config
cp -f $nnet_dir/extract_tdnn6.config $nnet_dir/extract.config
echo "output-node name=output input=tdnn7.affine" > $nnet_dir/extract_tdnn7.config
echo "$max_chunk_size" > $nnet_dir/max_chunk_size
echo "$min_chunk_size" > $nnet_dir/min_chunk_size
cat <<EOF > $transferdir/extra.config
component name=output.affine type=NaturalGradientAffineComponent input-dim=512 output-dim=${num_targets} param-stddev=0.0 bias-stddev=0.0 max-change=1.5
component-node name=output.affine component=output.affine input=tdnn7.batchnorm
component name=output.log-softmax type=LogSoftmaxComponent dim=${num_targets}
component-node name=output.log-softmax component=output.log-softmax input=output.affine
output-node name=output input=output.log-softmax objective=linear
EOF
[ ! -f $transfermodel ] && nnet3-copy --nnet-config=$transferdir/extra.config $transferdir/final.raw $transfermodel
fi
dropout_schedule='0,[email protected],[email protected],0'
srand=123
if [[ $stage -le 4 && 4 -le $endstage ]]; then
cp -f $transfermodel $nnet_dir/0.raw
subtools/kaldi/steps/nnet3/train_raw_dnn.py --stage=$train_stage \
--cmd="run.pl" \
--trainer.optimization.proportional-shrink 10 \
--trainer.optimization.momentum=0.5 \
--trainer.optimization.num-jobs-initial=2 \
--trainer.optimization.num-jobs-final=8 \
--trainer.optimization.initial-effective-lrate=0.001 \
--trainer.optimization.final-effective-lrate=0.0001 \
--trainer.optimization.minibatch-size=128 \
--trainer.srand=$srand \
--trainer.max-param-change=2 \
--trainer.num-epochs=3 \
--trainer.dropout-schedule="$dropout_schedule" \
--trainer.shuffle-buffer-size=1000 \
--egs.frames-per-eg=1 \
--egs.dir="$egs_dir" \
--cleanup=true \
--cleanup.remove-egs=$remove_egs \
--cleanup.preserve-model-interval=10 \
--use-gpu=$use_gpu \
--dir=$nnet_dir || exit 1;
fi
if [[ $stage -le 5 && 5 -le $endstage ]]; then
prefix=plp_20_5.0
toEXdata="baseTrain_volume_sp test_1s_concat_sp"
layer="tdnn6 tdnn7"
nj=20
gpu=false
cache=1000
for x in $toEXdata ;do
for y in $layer ;do
num=0
[ -f $nnet_dir/$y/$x/xvector.scp ] && num=$(grep ERROR $nnet_dir/$y/$x/log/extract.*.log | wc -l)
[[ "$force" == "true" || ! -f $nnet_dir/$y/$x/xvector.scp || $num -gt 0 ]] && \
subtools/kaldi/sid/nnet3/xvector/extract_xvectors.sh --cache-capacity $cache --extract-config extract_${y}.config \
--use-gpu $gpu --nj $nj $nnet_dir data/${prefix}/$x $nnet_dir/$y/$x
> $nnet_dir/$y/$x/$prefix
echo "$y layer embeddings of data/$prefix/$x extracted done."
done
done
fi