-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathdecode_parallel.sh
executable file
·110 lines (94 loc) · 2.4 KB
/
decode_parallel.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env bash
# Copyright 2020 Jian Wu
# License: Apache 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
set -eu
nj=20
dict=""
cuda=false
space=""
nbest=1
channel=-1
max_len=500
min_len=1
max_len_ratio=1
min_len_ratio=0
len_norm=true
len_penalty=0
cov_penalty=0
cov_threshold=0
eos_threshold=0
beam_size=16
function="beam_search"
temperature=1
am_tag="best"
lm_tag="best"
lm=""
lm_weight=0
ctc_weight=0
spm=""
segment=""
dump_align=""
text=""
score=false
echo "$0 $*"
. ./utils/parse_options.sh || exit 1
[ $# -ne 3 ] && echo "Script format error: $0 <exp-dir> <tst-scp> <dec-dir>" && exit 1
exp_dir=$1
tst_scp=$2
dec_dir=$3
log_dir=$dec_dir/log && mkdir -p $log_dir
[ ! -f $tst_scp ] && echo "$0: missing test wave script: $tst_scp" && exit 0
[ ! -d $exp_dir ] && echo "$0: missing experiment directory: $exp_dir" && exit 0
if [ -z $dec_prefix ]; then
# generate random string
random_str=$(date +%s%N | md5sum | cut -c 1-9)
dec_prefix=beam${beam_size}_${random_str}
fi
wav_sp_scp=""
for n in $(seq $nj); do wav_sp_scp="$wav_sp_scp $log_dir/wav.$n.scp"; done
./utils/split_scp.pl $tst_scp $wav_sp_scp || exit 1
gpu=-1
for i in $(seq $nj); do
[ $cuda ] && gpu=$((i-1))
cmd/decode.py \
$log_dir/wav.${i}.scp \
$log_dir/${dec_prefix}.${i}.decode \
--segment "$segment" \
--beam-size $beam_size \
--am $exp_dir \
--device-id $gpu \
--channel $channel \
--am-tag $am_tag \
--lm-tag $lm_tag \
--dict "$dict" \
--lm "$lm" \
--spm "$spm" \
--lm-weight $lm_weight \
--ctc-weight $ctc_weight \
--temperature $temperature \
--space "$space" \
--nbest $nbest \
--dump-nbest $log_dir/${dec_prefix}.JOB.nbest \
--dump-align "$dump_align" \
--max-len $max_len \
--min-len $min_len \
--max-len-ratio $max_len_ratio \
--min-len-ratio $min_len_ratio \
--function $function \
--len-norm $len_norm \
--len-penalty $len_penalty \
--cov-penalty $cov_penalty \
--cov-threshold $cov_threshold \
--eos-threshold $eos_threshold \
> $log_dir/decode.${dec_prefix}.${i}.log 2>&1 &
done
wait
for x in nbest decode; do
cat $log_dir/${dec_prefix}.*.${x} | sort -k1 > $dec_dir/${dec_prefix}.${x}
done
if $score ; then
[ -z $text ] && echo "for --score true, you must given --text <reference-transcription>" && exit -1
./cmd/compute_wer.py $dec_dir/${dec_prefix}.decode $text | \
tee $dec_dir/${dec_prefix}.wer
fi
echo "$0 $*: Done"