From 97a4902d27d51b144f56c0a339789b4bb157792d Mon Sep 17 00:00:00 2001 From: Miguel Del Rio Date: Mon, 9 Oct 2023 21:03:12 +0000 Subject: [PATCH] adding tests --- test/data/align_1.aligned.punc_case.nlp | 15 ++++++++++++++ test/data/align_1.hyp.punc_case.ctm | 13 ++++++++++++ test/data/twenty.aligned.punc_case.nlp | 8 ++++++++ test/data/twenty.hyp.punc_case.txt | 2 ++ test/fstalign_Test.cc | 27 ++++++++++++++++++++++++- 5 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 test/data/align_1.aligned.punc_case.nlp create mode 100644 test/data/align_1.hyp.punc_case.ctm create mode 100644 test/data/twenty.aligned.punc_case.nlp create mode 100644 test/data/twenty.hyp.punc_case.txt diff --git a/test/data/align_1.aligned.punc_case.nlp b/test/data/align_1.aligned.punc_case.nlp new file mode 100644 index 0000000..2ff0ddf --- /dev/null +++ b/test/data/align_1.aligned.punc_case.nlp @@ -0,0 +1,15 @@ +token|speaker|ts|endTs|punctuation|prepunctuation|case|tags|wer_tags|oldTs|oldEndTs|ali_comment|confidence +a|1|1.0000|2.0000|||CA|[]|[]|||sub(A)| +b|1|3.0000|4.0000|||LC|[]|[]|||| +c|1|5.0000|6.0000|||LC|[]|[]|||| +d|1|7.0000|8.0000|,||LC|[]|[]|||| +,|1|7.0000|8.0000|||LC|[]|[]|||| +|1|9.0000|10.0000|.||LC|['0:FALLBACK']|[]|||sub()| +e|1|11.0000|12.0000|||LC|[]|[]|||| +f|1|13.0000|14.0000|||LC|[]|[]|||| +g|1|15.0000|16.0000|||LC|[]|[]|||| +h|1|17.0000|18.0000|||LC|[]|[]|||| +|1|||,||LC|[]|[]|||del| +,|1|||||LC|[]|[]|||del| +i|1|21.0000|22.0000|||LC|[]|[]|||sub(I)| +j|1|23.0000|24.0000|||LC|[]|[]|||sub(J)| diff --git a/test/data/align_1.hyp.punc_case.ctm b/test/data/align_1.hyp.punc_case.ctm new file mode 100644 index 0000000..4da53d8 --- /dev/null +++ b/test/data/align_1.hyp.punc_case.ctm @@ -0,0 +1,13 @@ +recording.wav 1 1 1 A +recording.wav 1 3 1 b +recording.wav 1 5 1 c +recording.wav 1 7 1 d +recording.wav 1 7 1 , +recording.wav 1 9 1 +recording.wav 1 11 1 e +recording.wav 1 11 1 . +recording.wav 1 13 1 f +recording.wav 1 15 1 g +recording.wav 1 17 1 h +recording.wav 1 21 1 I +recording.wav 1 23 1 J diff --git a/test/data/twenty.aligned.punc_case.nlp b/test/data/twenty.aligned.punc_case.nlp new file mode 100644 index 0000000..fa9542e --- /dev/null +++ b/test/data/twenty.aligned.punc_case.nlp @@ -0,0 +1,8 @@ +token|speaker|ts|endTs|punctuation|prepunctuation|case|tags|wer_tags|oldTs|oldEndTs|ali_comment|confidence +20|2|0.0000|0.0000|||CA|['1:CARDINAL']|['1']|84.6600|85.0600|sub(in)| +in|2|0.0000|0.0000|||LC|[]|[]|89.1600|89.2800|sub(Twenty)| +2020|2|0.0000|0.0000|||CA|['0:YEAR']|['0', '2']|89.7400|89.9900|sub(tHiRtY)| +is|2|0.0000|0.0000|||LC|[]|[]|89.1600|89.2800|| +one|2|0.0000|0.0000|||CA|['3:CARDINAL']|['3']|89.7400|89.9900|,push_last| +twenty|2|0.0000|0.0000|||LC|['3:CARDINAL']|['3']|89.7400|89.9900|sub(two),push_last| +three|2|0.0000|0.0000|||LC|['3:CARDINAL']|['3']|89.7400|89.9900|,push_last| diff --git a/test/data/twenty.hyp.punc_case.txt b/test/data/twenty.hyp.punc_case.txt new file mode 100644 index 0000000..9bbad89 --- /dev/null +++ b/test/data/twenty.hyp.punc_case.txt @@ -0,0 +1,2 @@ +in Twenty tHiRtY , is one TWENTY two three + diff --git a/test/fstalign_Test.cc b/test/fstalign_Test.cc index 315d0c0..4f0d509 100644 --- a/test/fstalign_Test.cc +++ b/test/fstalign_Test.cc @@ -679,7 +679,7 @@ TEST_CASE_METHOD(UniqueTestsFixture, "main-adapted-composition()") { REQUIRE_THAT(result, Contains("WER: INS:0 DEL:3 SUB:3")); } - SECTION("wer with case and punctuation(nlp output)") { + SECTION("NLP Hypothesis: wer with case and punctuation(nlp output)") { const auto result = exec(command("wer", approach, "short_punc.ref.nlp", "short_punc.hyp.nlp", sbs_output, nlp_output, TEST_SYNONYMS)+" --use-punctuation --use-case"); const auto testFile = std::string{TEST_DATA} + "short.aligned.punc_case.nlp"; @@ -689,6 +689,31 @@ TEST_CASE_METHOD(UniqueTestsFixture, "main-adapted-composition()") { REQUIRE_THAT(result, Contains("WER: INS:2 DEL:7 SUB:4")); } + SECTION("CTM Hypothesis: wer with case and punctuation(nlp output)") { + const auto result = + exec(command("wer", approach, "align_1.ref.nlp", "align_1.hyp.punc_case.ctm", sbs_output, nlp_output, TEST_SYNONYMS)+" --use-punctuation --use-case"); + const auto testFile = std::string{TEST_DATA} + "align_1.aligned.punc_case.nlp"; + + REQUIRE(compareFiles(nlp_output.c_str(), testFile.c_str())); + REQUIRE_THAT(result, Contains("WER: 6/14 = 0.4286")); + REQUIRE_THAT(result, Contains("WER: INS:1 DEL:2 SUB:3")); + } + + SECTION("TXT Hypothesis: wer with case and punctuation(nlp output)") { + const auto result = + exec(command("wer", approach, "twenty.ref.testing.nlp", "twenty.hyp.punc_case.txt", sbs_output, nlp_output, TEST_SYNONYMS, + "twenty.ref.testing.norm.json")+" --use-punctuation --use-case"); + const auto testFile = std::string{TEST_DATA} + "twenty.aligned.punc_case.nlp"; + + REQUIRE(compareFiles(nlp_output.c_str(), testFile.c_str())); + REQUIRE_THAT(result, Contains("WER: 6/7 = 0.8571")); + REQUIRE_THAT(result, Contains("WER: INS:2 DEL:0 SUB:4")); + REQUIRE_THAT(result, Contains("Wer Entity ID 1 WER: 1/1 = 1.0000")); + REQUIRE_THAT(result, Contains("Wer Entity ID 0 WER: 1/1 = 1.0000")); + REQUIRE_THAT(result, Contains("Wer Entity ID 2 WER: 1/1 = 1.0000")); + REQUIRE_THAT(result, Contains("Wer Entity ID 3 WER: 2/3 = 0.6667")); + } + // alignment tests SECTION("align_1") {