Skip to content

Commit

Permalink
Add HLG decoding with OpenFst on CPU for aishell conformer_ctc (#1279)
Browse files Browse the repository at this point in the history
  • Loading branch information
csukuangfj authored Oct 1, 2023
1 parent 48cc41b commit f14b673
Show file tree
Hide file tree
Showing 16 changed files with 146 additions and 25 deletions.
80 changes: 79 additions & 1 deletion .github/scripts/run-pre-trained-conformer-ctc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ log() {
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
}

cd egs/librispeech/ASR
pushd egs/librispeech/ASR

# repo_url=https://github.com/csukuangfj/icefall-asr-conformer-ctc-bpe-500
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09
Expand Down Expand Up @@ -112,3 +112,81 @@ log "Decoding with HLG on CPU with OpenFst"
$repo/test_wavs/1089-134686-0001.wav \
$repo/test_wavs/1221-135766-0001.wav \
$repo/test_wavs/1221-135766-0002.wav

rm -rf $repo

popd

log "Test aishell"

pushd egs/aishell/ASR

repo_url=https://huggingface.co/csukuangfj/icefall_asr_aishell_conformer_ctc
log "Downloading pre-trained model from $repo_url"
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
repo=$(basename $repo_url)
pushd $repo

git lfs pull --include "exp/pretrained.pt"
git lfs pull --include "data/lm/G_3_gram_char.fst.txt"

popd

log "Display test files"
tree $repo/
ls -lh $repo/test_wavs/*.wav

log "CTC decoding"

log "Exporting model with torchscript"

pushd $repo/exp
ln -s pretrained.pt epoch-99.pt
popd

./conformer_ctc/export.py \
--epoch 99 \
--avg 1 \
--exp-dir $repo/exp \
--tokens $repo/data/lang_char/tokens.txt \
--jit 1

ls -lh $repo/exp

log "Generating H.fst, HL.fst"

./local/prepare_lang_fst.py --lang-dir $repo/data/lang_char --ngram-G $repo/data/lm/G_3_gram_char.fst.txt

ls -lh $repo/data/lang_char

log "Decoding with H on CPU with OpenFst"

./conformer_ctc/jit_pretrained_decode_with_H.py \
--nn-model $repo/exp/cpu_jit.pt \
--H $repo/data/lang_char/H.fst \
--tokens $repo/data/lang_char/tokens.txt \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/2.wav

log "Decoding with HL on CPU with OpenFst"

./conformer_ctc/jit_pretrained_decode_with_HL.py \
--nn-model $repo/exp/cpu_jit.pt \
--HL $repo/data/lang_char/HL.fst \
--words $repo/data/lang_char/words.txt \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/2.wav

log "Decoding with HLG on CPU with OpenFst"

./conformer_ctc/jit_pretrained_decode_with_HLG.py \
--nn-model $repo/exp/cpu_jit.pt \
--HLG $repo/data/lang_char/HLG.fst \
--words $repo/data/lang_char/words.txt \
$repo/test_wavs/0.wav \
$repo/test_wavs/1.wav \
$repo/test_wavs/2.wav

rm -rf $repo
2 changes: 1 addition & 1 deletion .github/workflows/run-yesno-recipe.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
- name: Install Python dependencies
run: |
grep -v '^#' ./requirements-ci.txt | grep -v kaldifst | xargs -n 1 -L 1 pip install
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
pip uninstall -y protobuf
pip install --no-binary protobuf protobuf==3.20.*
Expand Down
21 changes: 10 additions & 11 deletions egs/aishell/ASR/conformer_ctc/export.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@
import logging
from pathlib import Path

import k2
import torch
from conformer import Conformer

from icefall.checkpoint import average_checkpoints, load_checkpoint
from icefall.lexicon import Lexicon
from icefall.utils import AttributeDict, str2bool
from icefall.utils import AttributeDict, num_tokens, str2bool


def get_parser():
Expand Down Expand Up @@ -63,11 +63,10 @@ def get_parser():
)

parser.add_argument(
"--lang-dir",
"--tokens",
type=str,
default="data/lang_char",
help="""It contains language related input files such as "lexicon.txt"
""",
required=True,
help="Path to the tokens.txt.",
)

parser.add_argument(
Expand Down Expand Up @@ -98,16 +97,16 @@ def get_params() -> AttributeDict:
def main():
args = get_parser().parse_args()
args.exp_dir = Path(args.exp_dir)
args.lang_dir = Path(args.lang_dir)

params = get_params()
params.update(vars(args))

logging.info(params)
# Load tokens.txt here
token_table = k2.SymbolTable.from_file(params.tokens)

lexicon = Lexicon(params.lang_dir)
max_token_id = max(lexicon.tokens)
num_classes = max_token_id + 1 # +1 for the blank
num_classes = num_tokens(token_table) + 1 # +1 for the blank

logging.info(params)

device = torch.device("cpu")
if torch.cuda.is_available():
Expand Down
Empty file modified egs/aishell/ASR/conformer_ctc/test_transformer.py
100644 → 100755
Empty file.
1 change: 1 addition & 0 deletions egs/aishell/ASR/local/prepare_lang_fst.py
5 changes: 5 additions & 0 deletions egs/aishell/ASR/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then
./local/prepare_lang.py --lang-dir $lang_phone_dir
fi


# Train a bigram P for MMI training
if [ ! -f $lang_phone_dir/transcript_words.txt ]; then
log "Generate data to train phone based bigram P"
Expand Down Expand Up @@ -203,6 +204,10 @@ if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then
if [ ! -f $lang_char_dir/L_disambig.pt ]; then
./local/prepare_char.py --lang-dir $lang_char_dir
fi

if [ ! -f $lang_char_dir/HLG.fst ]; then
./local/prepare_lang_fst.py --lang-dir $lang_phone_dir --ngram-G ./data/lm/G_3_gram.fst.txt
fi
fi

if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then
Expand Down
16 changes: 14 additions & 2 deletions egs/librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_H.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,26 @@
Usage:
(1) LibriSpeech conformer_ctc
./conformer_ctc/jit_pretrained_decode_with_H.py \
--nn-model ./conformer_ctc/exp/cpu_jit.pt \
--H ./data/lang_bpe_500/H.fst \
--tokens ./data/lang_bpe_500/tokens.txt \
./download/LibriSpeech/test-clean/1089/134686/1089-134686-0002.flac \
./download/LibriSpeech/test-clean/1221/135766/1221-135766-0001.flac
(2) AIShell conformer_ctc
./conformer_ctc/jit_pretrained_decode_with_H.py \
--nn-model ./conformer_ctc/exp/cpu_jit.pt \
--H ./data/lang_char/H.fst \
--tokens ./data/lang_char/tokens.txt \
./BAC009S0764W0121.wav \
./BAC009S0764W0122.wav \
./BAC009S0764W0123.wav
Note that to generate ./conformer_ctc/exp/cpu_jit.pt,
you can use ./export.py --jit 1
"""
Expand All @@ -23,12 +36,11 @@
import math
from typing import Dict, List

import kaldi_hmm_gmm
import kaldifeat
import kaldifst
import torch
import torchaudio
from kaldi_hmm_gmm import DecodableCtc, FasterDecoder, FasterDecoderOptions
from kaldi_decoder import DecodableCtc, FasterDecoder, FasterDecoderOptions
from torch.nn.utils.rnn import pad_sequence


Expand Down
16 changes: 14 additions & 2 deletions egs/librispeech/ASR/conformer_ctc/jit_pretrained_decode_with_HL.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,26 @@
Usage:
(1) LibriSpeech conformer_ctc
./conformer_ctc/jit_pretrained_decode_with_HL.py \
--nn-model ./conformer_ctc/exp/cpu_jit.pt \
--HL ./data/lang_bpe_500/HL.fst \
--words ./data/lang_bpe_500/words.txt \
./download/LibriSpeech/test-clean/1089/134686/1089-134686-0002.flac \
./download/LibriSpeech/test-clean/1221/135766/1221-135766-0001.flac
(2) AIShell conformer_ctc
./conformer_ctc/jit_pretrained_decode_with_HL.py \
--nn-model ./conformer_ctc/exp/cpu_jit.pt \
--HL ./data/lang_char/HL.fst \
--words ./data/lang_char/words.txt \
./BAC009S0764W0121.wav \
./BAC009S0764W0122.wav \
./BAC009S0764W0123.wav
Note that to generate ./conformer_ctc/exp/cpu_jit.pt,
you can use ./export.py --jit 1
"""
Expand All @@ -23,12 +36,11 @@
import math
from typing import Dict, List

import kaldi_hmm_gmm
import kaldifeat
import kaldifst
import torch
import torchaudio
from kaldi_hmm_gmm import DecodableCtc, FasterDecoder, FasterDecoderOptions
from kaldi_decoder import DecodableCtc, FasterDecoder, FasterDecoderOptions
from torch.nn.utils.rnn import pad_sequence


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,25 @@
Usage:
(1) LibriSpeech conformer_ctc
./conformer_ctc/jit_pretrained_decode_with_HLG.py \
--nn-model ./conformer_ctc/exp/cpu_jit.pt \
--HLG ./data/lang_bpe_500/HLG.fst \
--words ./data/lang_bpe_500/words.txt \
./download/LibriSpeech/test-clean/1089/134686/1089-134686-0002.flac \
./download/LibriSpeech/test-clean/1221/135766/1221-135766-0001.flac
(2) AIShell conformer_ctc
./conformer_ctc/jit_pretrained_decode_with_HLG.py \
--nn-model ./conformer_ctc/exp/cpu_jit.pt \
--HLG ./data/lang_char/HLG.fst \
--words ./data/lang_char/words.txt \
./BAC009S0764W0121.wav \
./BAC009S0764W0122.wav \
./BAC009S0764W0123.wav
Note that to generate ./conformer_ctc/exp/cpu_jit.pt,
you can use ./export.py --jit 1
"""
Expand All @@ -23,12 +35,11 @@
import math
from typing import Dict, List

import kaldi_hmm_gmm
import kaldifeat
import kaldifst
import torch
import torchaudio
from kaldi_hmm_gmm import DecodableCtc, FasterDecoder, FasterDecoderOptions
from kaldi_decoder import DecodableCtc, FasterDecoder, FasterDecoderOptions
from torch.nn.utils.rnn import pad_sequence


Expand Down
2 changes: 1 addition & 1 deletion egs/yesno/ASR/tdnn/jit_pretrained_decode_with_H.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import kaldifst
import torch
import torchaudio
from kaldi_hmm_gmm import DecodableCtc, FasterDecoder, FasterDecoderOptions
from kaldi_decoder import DecodableCtc, FasterDecoder, FasterDecoderOptions
from torch.nn.utils.rnn import pad_sequence


Expand Down
2 changes: 1 addition & 1 deletion egs/yesno/ASR/tdnn/jit_pretrained_decode_with_HL.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import kaldifst
import torch
import torchaudio
from kaldi_hmm_gmm import DecodableCtc, FasterDecoder, FasterDecoderOptions
from kaldi_decoder import DecodableCtc, FasterDecoder, FasterDecoderOptions
from torch.nn.utils.rnn import pad_sequence


Expand Down
6 changes: 3 additions & 3 deletions icefall/ctc/README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
# Introduction

This folder uses [kaldifst][kaldifst] for graph construction
and decoders from [kaldi-hmm-gmm][kaldi-hmm-gmm] for CTC decoding.
and decoders from [kaldi-decoder][kaldi-decoder] for CTC decoding.

It supports only `CPU`.

You can use

```bash
pip install kaldifst kaldi-hmm-gmm
pip install kaldifst kaldi-decoder
```
to install the dependencies.

[kaldi-hmm-gmm]: https://github.com/csukuangfj/kaldi-hmm-gmm
[kaldi-decoder]: https://github.com/i2-fsa/kaldi-decoder
[kaldifst]: https://github.com/k2-fsa/kaldifst
[k2]: https://github.com/k2-fsa/k2
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
kaldifst
kaldilm
kaldialign
kaldi-hmm-gmm
kaldi-decoder
sentencepiece>=0.1.96
tensorboard
typeguard
Expand Down

0 comments on commit f14b673

Please sign in to comment.