forked from ictnlp/StreamSpeech
-
Notifications
You must be signed in to change notification settings - Fork 0
/
pred.offline-s2st.sh
64 lines (49 loc) · 2.7 KB
/
pred.offline-s2st.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
export CUDA_VISIBLE_DEVICES=0
ROOT=/data/zhangshaolei/StreamSpeech
DATA_ROOT=/data/zhangshaolei/datasets/cvss/cvss-c
PRETRAIN_ROOT=/data/zhangshaolei/pretrain_models
VOCODER_CKPT=$PRETRAIN_ROOT/unit-based_HiFi-GAN_vocoder/mHuBERT.layer11.km1000.en/g_00500000
VOCODER_CFG=$PRETRAIN_ROOT/unit-based_HiFi-GAN_vocoder/mHuBERT.layer11.km1000.en/config.json
LANG=fr
DATA=${DATA_ROOT}/${LANG}-en/fbank2unit
SPLIT=test
BEAM=1
file=/data/zhangshaolei/StreamSpeech_model/streamspeech.offline.${LANG}-en.pt
# file=/data/zhangshaolei/StreamSpeech_model/unity.${LANG}-en.pt
mkdir res
output_dir=res/streamspeech.offline.${LANG}-en
mkdir -p $output_dir
PYTHONPATH=$ROOT/fairseq fairseq-generate ${DATA} \
--user-dir researches/ctc_unity \
--config-yaml config_gcmvn.yaml --multitask-config-yaml config_mtl_asr_st_ctcst.yaml \
--task speech_to_speech_ctc --target-is-code --target-code-size 1000 --vocoder code_hifigan \
--path $file --gen-subset $SPLIT \
--beam-mt $BEAM --beam 1 --max-len-a 1 \
--max-tokens 10000 \
--required-batch-size-multiple 1 \
--results-path $output_dir > $output_dir/generate-$SPLIT.log 2>&1
grep '^A-' $output_dir/generate-$SPLIT.log | sort -t'-' -k2,2n | cut -f2 > $output_dir/generate-$SPLIT.asr
echo '################### ASR source text BLEU ###################' >> $output_dir/res.txt
sacrebleu $DATA/$SPLIT.src -i $output_dir/generate-$SPLIT.asr -w 3 >> $output_dir/res.txt
wer $DATA/$SPLIT.src $output_dir/generate-$SPLIT.asr >> $output_dir/res.txt
grep '^D-' $output_dir/generate-$SPLIT.log | sort -t'-' -k2,2n | cut -f2 > $output_dir/generate-$SPLIT.tgt
echo '################### Speech-to-text target text BLEU ###################' >> $output_dir/res.txt
sacrebleu $DATA/$SPLIT.txt -i $output_dir/generate-$SPLIT.tgt -w 3 >> $output_dir/res.txt
grep "^D\-" $output_dir/generate-$SPLIT.txt | \
sed 's/^D-//ig' | sort -nk1 | cut -f3 \
> $output_dir/generate-$SPLIT.unit
echo '################### Speech-to-unit target unit BLEU ###################' >> $output_dir/res.txt
sacrebleu $DATA/$SPLIT.unit -i $output_dir/generate-$SPLIT.unit -w 3 >> $output_dir/res.txt
python $ROOT/fairseq/examples/speech_to_speech/generate_waveform_from_code.py \
--in-code-file $output_dir/generate-$SPLIT.unit \
--vocoder $VOCODER_CKPT --vocoder-cfg $VOCODER_CFG \
--results-path $output_dir/pred_wav --dur-prediction
cd $ROOT/asr_bleu
python compute_asr_bleu.py \
--lang en \
--audio_dirpath ../$output_dir/pred_wav \
--reference_path $DATA/$SPLIT.txt \
--reference_format txt > ../$output_dir/asr_bleu.log 2>&1
cd ..
echo '################### Speech-to-speech target speech ASR-BLEU ###################' >> $output_dir/res.txt
tail -n 1 $output_dir/asr_bleu.log >> $output_dir/res.txt