From ca32c4e2bd77544c4ad4aa0bba25f0b812ea1a61 Mon Sep 17 00:00:00 2001 From: GoVivace Date: Mon, 31 Dec 2018 18:22:34 -0500 Subject: [PATCH] [egs] Add more modern DNN recipe for fisher_callhome_spanish (#2951) --- .../s5/conf/mfcc_hires.conf | 10 + .../s5/conf/online_cmvn.conf | 1 + .../s5/local/chain/run_tdnn_1g.sh | 288 ++++++++++++++++++ .../s5/local/nnet3/run_ivector_common.sh | 187 ++++++++++++ .../s5/local/rnnlm/train_rnnlm.sh | 101 ++++++ egs/fisher_callhome_spanish/s5/path.sh | 1 + egs/fisher_callhome_spanish/s5/rnnlm | 1 + egs/fisher_callhome_spanish/s5/run.sh | 154 +++++----- 8 files changed, 662 insertions(+), 81 deletions(-) create mode 100644 egs/fisher_callhome_spanish/s5/conf/mfcc_hires.conf create mode 100644 egs/fisher_callhome_spanish/s5/conf/online_cmvn.conf create mode 100755 egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh create mode 100755 egs/fisher_callhome_spanish/s5/local/nnet3/run_ivector_common.sh create mode 100755 egs/fisher_callhome_spanish/s5/local/rnnlm/train_rnnlm.sh create mode 120000 egs/fisher_callhome_spanish/s5/rnnlm diff --git a/egs/fisher_callhome_spanish/s5/conf/mfcc_hires.conf b/egs/fisher_callhome_spanish/s5/conf/mfcc_hires.conf new file mode 100644 index 00000000000..d870ab04c38 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5/conf/mfcc_hires.conf @@ -0,0 +1,10 @@ +# config for high-resolution MFCC features, intended for neural network training. +# Note: we keep all cepstra, so it has the same info as filterbank features, +# but MFCC is more easily compressible (because less correlated) which is why +# we prefer this method. +--use-energy=false # use average of log energy, not energy. +--sample-frequency=8000 # Switchboard is sampled at 8kHz +--num-mel-bins=40 # similar to Google's setup. +--num-ceps=40 # there is no dimensionality reduction. +--low-freq=40 # low cutoff frequency for mel bins +--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800) diff --git a/egs/fisher_callhome_spanish/s5/conf/online_cmvn.conf b/egs/fisher_callhome_spanish/s5/conf/online_cmvn.conf new file mode 100644 index 00000000000..7748a4a4dd3 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5/conf/online_cmvn.conf @@ -0,0 +1 @@ +# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh diff --git a/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh b/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh new file mode 100755 index 00000000000..c487f1bd222 --- /dev/null +++ b/egs/fisher_callhome_spanish/s5/local/chain/run_tdnn_1g.sh @@ -0,0 +1,288 @@ +#!/bin/bash + +# 1g is like 1f but upgrading to a "resnet-style TDNN-F model", i.e. +# with bypass resnet connections, and re-tuned. +# compute-wer --text --mode=present ark:exp/chain/multipsplice_tdnn/decode_fsp_train_test/scoring_kaldi/test_filt.txt ark,p:- +# %WER 22.21 [ 8847 / 39831, 1965 ins, 2127 del, 4755 sub ] +# %SER 56.98 [ 3577 / 6278 ] +# Scored 6278 sentences, 0 not present in hyp. + +# steps/info/chain_dir_info.pl exp/chain/multipsplice_tdnn +# exp/chain/multipsplice_tdnn: num-iters=296 nj=1..2 num-params=8.2M dim=40+100->2489 combine=-0.170->-0.165 (over 8) xent:train/valid[196,295,final]=(-2.30,-1.93,-1.83/-2.24,-1.96,-1.86) logprob:train/valid[196,295,final]=(-0.208,-0.169,-0.164/-0.189,-0.161,-0.158) + +set -e -o pipefail + +# First the options that are passed through to run_ivector_common.sh +# (some of which are also used in this script directly). +stage=0 +nj=30 +train_set=train +test_sets="test dev" +gmm=tri5a # this is the source gmm-dir that we'll use for alignments; it + # should have alignments for the specified training data. +num_threads_ubm=32 +nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. + +# Options which are not passed through to run_ivector_common.sh +affix=1g #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. +common_egs_dir= +reporting_email= + +# LSTM/chain options +train_stage=-10 +xent_regularize=0.1 +dropout_schedule='0,0@0.20,0.3@0.50,0' + +# training chunk-options +chunk_width=140,100,160 +# we don't need extra left/right context for TDNN systems. +chunk_left_context=0 +chunk_right_context=0 + +# training options +srand=0 +remove_egs=true + +#decode options +test_online_decoding=false # if true, it will run the last decoding stage. + +# End configuration section. +echo "$0 $@" # Print the command line for logging + + +. ./cmd.sh +. ./path.sh +. ./utils/parse_options.sh + + +if ! cuda-compiled; then + cat <$lang/topo + fi +fi + +if [ $stage -le 17 ]; then + # Get the alignments as lattices (gives the chain training more freedom). + # use the same num-jobs as the alignments + steps/align_fmllr_lats.sh --nj 100 --cmd "$train_cmd" ${lores_train_data_dir} \ + data/lang $gmm_dir $lat_dir + rm $lat_dir/fsts.*.gz # save space +fi + +if [ $stage -le 18 ]; then + # Build a tree using our new topology. We know we have alignments for the + # speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use + # those. The num-leaves is always somewhat less than the num-leaves from + # the GMM baseline. + if [ -f $tree_dir/final.mdl ]; then + echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it." + exit 1; + fi + steps/nnet3/chain/build_tree.sh \ + --frame-subsampling-factor 3 \ + --context-opts "--context-width=2 --central-position=1" \ + --cmd "$train_cmd" 3500 ${lores_train_data_dir} \ + $lang $ali_dir $tree_dir +fi + + +if [ $stage -le 19 ]; then + mkdir -p $dir + echo "$0: creating neural net configs using the xconfig parser"; + + num_targets=$(tree-info $tree_dir/tree |grep num-pdfs|awk '{print $2}') + learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python) + tdnn_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim-continuous=true" + tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66" + linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0" + prefinal_opts="l2-regularize=0.01" + output_opts="l2-regularize=0.005" + + mkdir -p $dir/configs + cat < $dir/configs/network.xconfig + input dim=100 name=ivector + input dim=40 name=input + + # please note that it is important to have input layer with the name=input + # as the layer immediately preceding the fixed-affine-layer to enable + # the use of short notation for the descriptor + fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat + + # the first splicing is moved before the lda layer, so no splicing here + relu-batchnorm-dropout-layer name=tdnn1 $tdnn_opts dim=1024 + tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=1 + tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=0 + tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1024 bottleneck-dim=128 time-stride=3 + linear-component name=prefinal-l dim=192 $linear_opts + + + prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192 + output-layer name=output include-log-softmax=false dim=$num_targets $output_opts + + prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1024 small-dim=192 + output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts +EOF + steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/ +fi + + +if [ $stage -le 20 ]; then + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then + utils/create_split_dir.pl \ + /export/b0{3,4,5,6}/$USER/kaldi-data/egs/wsj-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage + fi + + steps/nnet3/chain/train.py --stage=$train_stage \ + --cmd "$decode_cmd" \ + --feat.online-ivector-dir $train_ivector_dir \ + --feat.cmvn-opts "--norm-means=false --norm-vars=false" \ + --chain.xent-regularize $xent_regularize \ + --chain.leaky-hmm-coefficient 0.1 \ + --chain.l2-regularize 0.0 \ + --chain.apply-deriv-weights false \ + --chain.lm-opts="--num-extra-lm-states=2000" \ + --trainer.dropout-schedule $dropout_schedule \ + --trainer.srand $srand \ + --trainer.max-param-change 2.0 \ + --trainer.num-epochs 4 \ + --trainer.frames-per-iter 5000000 \ + --trainer.optimization.num-jobs-initial 1 \ + --trainer.optimization.num-jobs-final=2 \ + --trainer.optimization.initial-effective-lrate 0.0005 \ + --trainer.optimization.final-effective-lrate 0.00005 \ + --trainer.num-chunk-per-minibatch 128,64 \ + --trainer.optimization.momentum 0.0 \ + --egs.chunk-width $chunk_width \ + --egs.chunk-left-context 0 \ + --egs.chunk-right-context 0 \ + --egs.dir "$common_egs_dir" \ + --egs.opts "--frames-overlap-per-eg 0" \ + --cleanup.remove-egs $remove_egs \ + --use-gpu true \ + --feat-dir $train_data_dir \ + --tree-dir $tree_dir \ + --lat-dir exp/tri5a_lats_nodup_sp \ + --dir $dir || exit 1; +fi + +if [ $stage -le 21 ]; then + # The reason we are using data/lang_test here, instead of $lang, is just to + # emphasize that it's not actually important to give mkgraph.sh the + # lang directory with the matched topology (since it gets the + # topology file from the model). So you could give it a different + # lang directory, one that contained a wordlist and LM of your choice, + # as long as phones.txt was compatible. + #LM was trained only on Fisher Spanish train subset. + + utils/mkgraph.sh \ + --self-loop-scale 1.0 data/lang_test \ + $tree_dir $tree_dir/graph_fsp_train || exit 1; + +fi + +rnnlmdir=exp/rnnlm_lstm_tdnn_1b +if [ $stage -le 22 ]; then + local/rnnlm/train_rnnlm.sh --dir $rnnlmdir || exit 1; +fi + +if [ $stage -le 23 ]; then + frames_per_chunk=$(echo $chunk_width | cut -d, -f1) + rm $dir/.error 2>/dev/null || true + + for data in $test_sets; do + ( + nspk=$(wc -l 7." + exit 1 +fi + + +if [ $stage -le 8 ]; then + echo "$0: preparing directory for speed-perturbed data" + utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp +fi + +if [ $stage -le 9 ]; then + echo "$0: creating high-resolution MFCC features" + + # this shows how you can split across multiple file-systems. we'll split the + # MFCC dir across multiple locations. You might want to be careful here, if you + # have multiple copies of Kaldi checked out and run the same recipe, not to let + # them overwrite each other. + mfccdir=data/${train_set}_sp_hires/data + if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $mfccdir/storage ]; then + utils/create_split_dir.pl /export/b0{5,6,7,8}/$USER/kaldi-data/mfcc/wsj-$(date +'%m_%d_%H_%M')/s5/$mfccdir/storage $mfccdir/storage + fi + + for datadir in ${train_set}_sp ${test_sets}; do + utils/copy_data_dir.sh data/$datadir data/${datadir}_hires + done + + # do volume-perturbation on the training data prior to extracting hires + # features; this helps make trained nnets more invariant to test data volume. + utils/data/perturb_data_dir_volume.sh data/${train_set}_sp_hires + + for datadir in ${train_set}_sp ${test_sets}; do + steps/make_mfcc.sh --nj $nj --mfcc-config conf/mfcc_hires.conf \ + --cmd "$train_cmd" data/${datadir}_hires + steps/compute_cmvn_stats.sh data/${datadir}_hires + utils/fix_data_dir.sh data/${datadir}_hires + done +fi + +if [ $stage -le 10 ]; then + echo "$0: computing a subset of data to train the diagonal UBM." + + mkdir -p exp/nnet3${nnet3_affix}/diag_ubm + temp_data_root=exp/nnet3${nnet3_affix}/diag_ubm + + # train a diagonal UBM using a subset of about a quarter of the data + num_utts_total=$(wc -l $text_dir/ami.txt + cat $dev | cut -d ' ' -f2- > $text_dir/dev.txt +fi + +if [ $stage -le 1 ]; then + cp $wordlist $dir/config/ + n=`cat $dir/config/words.txt | wc -l` + echo " $n" >> $dir/config/words.txt + + # words that are not present in words.txt but are in the training or dev data, will be + # mapped to during training. + echo "" >$dir/config/oov.txt + + cat > $dir/config/data_weights.txt <$dir/config/unigram_probs.txt + + # choose features + rnnlm/choose_features.py --unigram-probs=$dir/config/unigram_probs.txt \ + --use-constant-feature=true \ + --top-word-features 10000 \ + --min-frequency 1.0e-03 \ + --special-words=',,,,[noise],[laughter]' \ + $dir/config/words.txt > $dir/config/features.txt + +lstm_opts="l2-regularize=$comp_l2" +tdnn_opts="l2-regularize=$comp_l2" +output_opts="l2-regularize=$output_l2" + + cat >$dir/config/xconfig <&2 "The standard file $KALDI_ROOT/tools/config/common_path.sh is not present -> Exit!" && exit 1 . $KALDI_ROOT/tools/config/common_path.sh export LC_ALL=C +export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/home/dpovey/libs diff --git a/egs/fisher_callhome_spanish/s5/rnnlm b/egs/fisher_callhome_spanish/s5/rnnlm new file mode 120000 index 00000000000..fb754622d5e --- /dev/null +++ b/egs/fisher_callhome_spanish/s5/rnnlm @@ -0,0 +1 @@ +../../wsj/s5/rnnlm \ No newline at end of file diff --git a/egs/fisher_callhome_spanish/s5/run.sh b/egs/fisher_callhome_spanish/s5/run.sh index 57902a98fed..6e2752a7b68 100755 --- a/egs/fisher_callhome_spanish/s5/run.sh +++ b/egs/fisher_callhome_spanish/s5/run.sh @@ -1,20 +1,22 @@ #!/bin/bash # +# Copyright 2018 Nagendra Goel, Saikiran Valluri Apache 2.0 # Copyright 2014 Gaurav Kumar. Apache 2.0 # Recipe for Fisher/Callhome-Spanish -# Made to integrate KALDI with JOSHUA for end-to-end ASR and SMT stage=0 +train_stage=-20 +train_sgmm2=false # call the next line with the directory where the Spanish Fisher data is # (the values below are just an example). -sfisher_speech=/veu4/jadrian/data/LDC/LDC2010S01 -sfisher_transcripts=/veu4/jadrian/data/LDC/LDC2010T04 -spanish_lexicon=/veu4/jadrian/data/LDC/LDC96L16 +sfisher_speech=/export/corpora/LDC/LDC2010S01 +sfisher_transcripts=/export/corpora/LDC/LDC2010T04 +spanish_lexicon=/export/corpora/LDC/LDC96L16 split=local/splits/split_fisher -callhome_speech=/veu4/jadrian/data/LDC/LDC96S35 -callhome_transcripts=/veu4/jadrian/data/LDC/LDC96T17 +callhome_speech=/export/corpora/LDC/LDC96S35 +callhome_transcripts=/export/corpora/LDC/LDC96T17 split_callhome=local/splits/split_callhome mfccdir=`pwd`/mfcc @@ -25,7 +27,7 @@ if [ -f path.sh ]; then . ./path.sh; fi set -e -if [ $stage -lt 1 ]; then +if [ $stage -le 1 ]; then local/fsp_data_prep.sh $sfisher_speech $sfisher_transcripts local/callhome_data_prep.sh $callhome_speech $callhome_transcripts @@ -95,7 +97,7 @@ if [ $stage -lt 1 ]; then local/callhome_create_splits.sh $split_callhome fi -if [ $stage -lt 2 ]; then +if [ $stage -le 2 ]; then # Now compute CMVN stats for the train, dev and test subsets steps/compute_cmvn_stats.sh data/dev exp/make_mfcc/dev $mfccdir steps/compute_cmvn_stats.sh data/test exp/make_mfcc/test $mfccdir @@ -124,90 +126,95 @@ if [ $stage -lt 2 ]; then utils/subset_data_dir.sh --speakers data/train 90000 data/train_100k fi +if [ $stage -le 3 ]; then + steps/train_mono.sh --nj 10 --cmd "$train_cmd" \ + data/train_10k_nodup data/lang exp/mono0a -steps/train_mono.sh --nj 10 --cmd "$train_cmd" \ - data/train_10k_nodup data/lang exp/mono0a + steps/align_si.sh --nj 30 --cmd "$train_cmd" \ + data/train_30k data/lang exp/mono0a exp/mono0a_ali || exit 1; -steps/align_si.sh --nj 30 --cmd "$train_cmd" \ - data/train_30k data/lang exp/mono0a exp/mono0a_ali || exit 1; - -steps/train_deltas.sh --cmd "$train_cmd" \ + steps/train_deltas.sh --cmd "$train_cmd" \ 2500 20000 data/train_30k data/lang exp/mono0a_ali exp/tri1 || exit 1; -(utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph - steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri1/graph data/dev exp/tri1/decode_dev)& + (utils/mkgraph.sh data/lang_test exp/tri1 exp/tri1/graph + steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri1/graph data/dev exp/tri1/decode_dev)& -steps/align_si.sh --nj 30 --cmd "$train_cmd" \ - data/train_30k data/lang exp/tri1 exp/tri1_ali || exit 1; + steps/align_si.sh --nj 30 --cmd "$train_cmd" \ + data/train_30k data/lang exp/tri1 exp/tri1_ali || exit 1; -steps/train_deltas.sh --cmd "$train_cmd" \ + steps/train_deltas.sh --cmd "$train_cmd" \ 2500 20000 data/train_30k data/lang exp/tri1_ali exp/tri2 || exit 1; -( - utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph || exit 1; - steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri2/graph data/dev exp/tri2/decode_dev || exit 1; -)& - + ( + utils/mkgraph.sh data/lang_test exp/tri2 exp/tri2/graph || exit 1; + steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri2/graph data/dev exp/tri2/decode_dev || exit 1; + )& +fi -steps/align_si.sh --nj 30 --cmd "$train_cmd" \ - data/train_100k data/lang exp/tri2 exp/tri2_ali || exit 1; +if [ $stage -le 4 ]; then + steps/align_si.sh --nj 30 --cmd "$train_cmd" \ + data/train_100k data/lang exp/tri2 exp/tri2_ali || exit 1; # Train tri3a, which is LDA+MLLT, on 100k data. -steps/train_lda_mllt.sh --cmd "$train_cmd" \ + steps/train_lda_mllt.sh --cmd "$train_cmd" \ --splice-opts "--left-context=3 --right-context=3" \ 3000 40000 data/train_100k data/lang exp/tri2_ali exp/tri3a || exit 1; -( - utils/mkgraph.sh data/lang_test exp/tri3a exp/tri3a/graph || exit 1; - steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri3a/graph data/dev exp/tri3a/decode_dev || exit 1; -)& - + ( + utils/mkgraph.sh data/lang_test exp/tri3a exp/tri3a/graph || exit 1; + steps/decode.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri3a/graph data/dev exp/tri3a/decode_dev || exit 1; + )& +fi +if [ $stage -le 5 ]; then # Next we'll use fMLLR and train with SAT (i.e. on # fMLLR features) -steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ - data/train_100k data/lang exp/tri3a exp/tri3a_ali || exit 1; + steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ + data/train_100k data/lang exp/tri3a exp/tri3a_ali || exit 1; -steps/train_sat.sh --cmd "$train_cmd" \ - 4000 60000 data/train_100k data/lang exp/tri3a_ali exp/tri4a || exit 1; + steps/train_sat.sh --cmd "$train_cmd" \ + 4000 60000 data/train_100k data/lang exp/tri3a_ali exp/tri4a || exit 1; -( - utils/mkgraph.sh data/lang_test exp/tri4a exp/tri4a/graph - steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri4a/graph data/dev exp/tri4a/decode_dev + ( + utils/mkgraph.sh data/lang_test exp/tri4a exp/tri4a/graph + steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri4a/graph data/dev exp/tri4a/decode_dev )& -steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ - data/train data/lang exp/tri4a exp/tri4a_ali || exit 1; + steps/align_fmllr.sh --nj 30 --cmd "$train_cmd" \ + data/train data/lang exp/tri4a exp/tri4a_ali || exit 1; # Reduce the number of gaussians -steps/train_sat.sh --cmd "$train_cmd" \ - 5000 120000 data/train data/lang exp/tri4a_ali exp/tri5a || exit 1; + steps/train_sat.sh --cmd "$train_cmd" \ + 5000 120000 data/train data/lang exp/tri4a_ali exp/tri5a || exit 1; -( - utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph - steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri5a/graph data/dev exp/tri5a/decode_dev - steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri5a/graph data/test exp/tri5a/decode_test + ( + utils/mkgraph.sh data/lang_test exp/tri5a exp/tri5a/graph + steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri5a/graph data/dev exp/tri5a/decode_dev + steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri5a/graph data/test exp/tri5a/decode_test # Decode CALLHOME - steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri5a/graph data/callhome_test exp/tri5a/decode_callhome_test - steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri5a/graph data/callhome_dev exp/tri5a/decode_callhome_dev - steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ - exp/tri5a/graph data/callhome_train exp/tri5a/decode_callhome_train -) & - + steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri5a/graph data/callhome_test exp/tri5a/decode_callhome_test + steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri5a/graph data/callhome_dev exp/tri5a/decode_callhome_dev + steps/decode_fmllr.sh --nj 25 --cmd "$decode_cmd" --config conf/decode.config \ + exp/tri5a/graph data/callhome_train exp/tri5a/decode_callhome_train + ) & + + + steps/align_fmllr.sh \ + --boost-silence 0.5 --nj 32 --cmd "$train_cmd" \ + data/train data/lang exp/tri5a exp/tri5a_ali +fi -steps/align_fmllr.sh \ - --boost-silence 0.5 --nj 32 --cmd "$train_cmd" \ - data/train data/lang exp/tri5a exp/tri5a_ali +if $train_sgmm2; then steps/train_ubm.sh \ --cmd "$train_cmd" 750 \ @@ -258,22 +265,7 @@ for iter in 1 2 3 4; do done ) & -dnn_cpu_parallel_opts=(--minibatch-size 128 --max-change 10 --num-jobs-nnet 8 --num-threads 16 \ - --parallel-opts "--num-threads 16") -dnn_gpu_parallel_opts=(--minibatch-size 512 --max-change 40 --num-jobs-nnet 4 --num-threads 1 \ - --parallel-opts "--gpu 1") - -steps/nnet2/train_pnorm_ensemble.sh \ - --mix-up 5000 --initial-learning-rate 0.008 --final-learning-rate 0.0008\ - --num-hidden-layers 4 --pnorm-input-dim 2000 --pnorm-output-dim 200\ - --cmd "$train_cmd" \ - "${dnn_gpu_parallel_opts[@]}" \ - --ensemble-size 4 --initial-beta 0.1 --final-beta 5 \ - data/train data/lang exp/tri5a_ali exp/tri6a_dnn +fi -( - steps/nnet2/decode.sh --nj 13 --cmd "$decode_cmd" --num-threads 4 \ - --scoring-opts "--min-lmwt 8 --max-lmwt 16" --transform-dir exp/tri5a/decode_dev exp/tri5a/graph data/dev exp/tri6a_dnn/decode_dev -) & -wait +local/chain/run_tdnn_1g.sh --stage $stage --train-stage $train_stage || exit 1; exit 0;