From c883ac33a609260c575e38351a3ab826d0c2096a Mon Sep 17 00:00:00 2001 From: Miguel Del Rio Date: Wed, 11 Oct 2023 21:04:33 +0000 Subject: [PATCH] updating tests --- src/Nlp.cpp | 1 + test/data/align_1.aligned.punc_case.nlp | 7 ++++--- test/data/short.aligned.punc.nlp | 20 ++++++++++---------- test/data/short.aligned.punc_case.nlp | 20 ++++++++++---------- test/fstalign_Test.cc | 4 ++-- 5 files changed, 27 insertions(+), 25 deletions(-) diff --git a/src/Nlp.cpp b/src/Nlp.cpp index c0dce53..06cdfe8 100644 --- a/src/Nlp.cpp +++ b/src/Nlp.cpp @@ -93,6 +93,7 @@ NlpFstLoader::NlpFstLoader(std::vector &records, Json::Value norma mSpeakers.push_back(speaker); RawNlpRecord punc_row; punc_row.token = punc_row.punctuation; + punc_row.speakerId = speaker; punc_row.punctuation = ""; mNlpRows.push_back(punc_row); } diff --git a/test/data/align_1.aligned.punc_case.nlp b/test/data/align_1.aligned.punc_case.nlp index 2ff0ddf..a614c37 100644 --- a/test/data/align_1.aligned.punc_case.nlp +++ b/test/data/align_1.aligned.punc_case.nlp @@ -3,13 +3,14 @@ a|1|1.0000|2.0000|||CA|[]|[]|||sub(A)| b|1|3.0000|4.0000|||LC|[]|[]|||| c|1|5.0000|6.0000|||LC|[]|[]|||| d|1|7.0000|8.0000|,||LC|[]|[]|||| -,|1|7.0000|8.0000|||LC|[]|[]|||| +,|1|7.0000|8.0000|||||[]|||| |1|9.0000|10.0000|.||LC|['0:FALLBACK']|[]|||sub()| -e|1|11.0000|12.0000|||LC|[]|[]|||| +.|1|11.0000|12.0000|||||[]|||sub(e)| +e|1|11.0000|12.0000|||LC|[]|[]|||sub(.)| f|1|13.0000|14.0000|||LC|[]|[]|||| g|1|15.0000|16.0000|||LC|[]|[]|||| h|1|17.0000|18.0000|||LC|[]|[]|||| |1|||,||LC|[]|[]|||del| -,|1|||||LC|[]|[]|||del| +,|1|||||||[]|||del| i|1|21.0000|22.0000|||LC|[]|[]|||sub(I)| j|1|23.0000|24.0000|||LC|[]|[]|||sub(J)| diff --git a/test/data/short.aligned.punc.nlp b/test/data/short.aligned.punc.nlp index 8cfc53e..421d1ac 100644 --- a/test/data/short.aligned.punc.nlp +++ b/test/data/short.aligned.punc.nlp @@ -1,20 +1,20 @@ token|speaker|ts|endTs|punctuation|prepunctuation|case|tags|wer_tags|oldTs|oldEndTs|ali_comment|confidence |2|0.0000|0.0000|||LC|[]|[]|||| Yeah|1|0.0000|0.0000|,||UC|[]|[]|||| -,|1|0.0000|0.0000|||UC|[]|[]|||| +,|1|0.0000|0.0000|||||[]|||| yeah|1|||,||LC|[]|[]|||del| -,|1|||||LC|[]|[]|||del| +,|1|||||||[]|||del| right|1|0.0000|0.0000|.||LC|[]|[]|||| -.|1|||||LC|[]|[]|||del| +.|1|||||||[]|||del| Yeah|1|||,||UC|[]|[]|||del| -,|1|||||UC|[]|[]|||del| +,|1|||||||[]|||del| all|1|||||LC|[]|[]|||del| right|1|||,||LC|[]|[]|||del| -,|1|0.0000|0.0000|||LC|[]|[]|||sub(i'll)| +,|1|0.0000|0.0000|||||[]|||sub(i'll)| probably|1|0.0000|0.0000|||LC|[]|[]|||sub(do)| just|1|0.0000|0.0000|||LC|[]|[]|||| that|1|0.0000|0.0000|.||LC|[]|[]|||| -.|1|0.0000|0.0000|||LC|[]|[]|||sub(?)| +.|1|0.0000|0.0000|||||[]|||sub(?)| Are|3|0.0000|0.0000|||UC|[]|[]|||| there|3|0.0000|0.0000|||LC|[]|[]|||| any|3|0.0000|0.0000|||LC|[]|[]|||| @@ -25,19 +25,19 @@ to|3|0.0000|0.0000|||LC|[]|[]|||| mind|3|0.0000|0.0000|||LC|[]|[]|||| or|3|0.0000|0.0000|||LC|[]|[]|||| Yeah|1|0.0000|0.0000|,||UC|[]|[]|||| -,|1|0.0000|0.0000|||UC|[]|[]|||| +,|1|0.0000|0.0000|||||[]|||| sure|1|0.0000|0.0000|.||LC|[]|[]|||| -.|1|0.0000|0.0000|||LC|[]|[]|||| +.|1|0.0000|0.0000|||||[]|||| When|1|0.0000|0.0000|||UC|[]|[]|||| I|1|0.0000|0.0000|||CA|[]|[]|||| hear|1|0.0000|0.0000|||LC|[]|[]|||| Foobar|1|0.0000|0.0000|,||UC|[]|[]|||| -,|1|0.0000|0.0000|||UC|[]|[]|||| +,|1|0.0000|0.0000|||||[]|||| I|1|0.0000|0.0000|||CA|[]|[]|||| think|1|0.0000|0.0000|||LC|[]|[]|||| about|1|0.0000|0.0000|||LC|[]|[]|||| just|1|0.0000|0.0000|||LC|[]|[]|||| that|1|0.0000|0.0000|:||LC|[]|[]|||| -:|1|0.0000|0.0000|||LC|[]|[]|||| +:|1|0.0000|0.0000|||||[]|||| foo|1|0.0000|0.0000|||LC|[]|[]|||sub(,)| a|1|0.0000|0.0000|||LC|[]|[]|||| diff --git a/test/data/short.aligned.punc_case.nlp b/test/data/short.aligned.punc_case.nlp index e8b58a9..affb08c 100644 --- a/test/data/short.aligned.punc_case.nlp +++ b/test/data/short.aligned.punc_case.nlp @@ -1,20 +1,20 @@ token|speaker|ts|endTs|punctuation|prepunctuation|case|tags|wer_tags|oldTs|oldEndTs|ali_comment|confidence |2|0.0000|0.0000|||LC|[]|[]|||| Yeah|1|0.0000|0.0000|,||UC|[]|[]|||| -,|1|0.0000|0.0000|||UC|[]|[]|||| +,|1|0.0000|0.0000|||||[]|||| yeah|1|||,||LC|[]|[]|||del| -,|1|||||LC|[]|[]|||del| +,|1|||||||[]|||del| right|1|0.0000|0.0000|.||LC|[]|[]|||| -.|1|||||LC|[]|[]|||del| +.|1|||||||[]|||del| Yeah|1|||,||UC|[]|[]|||del| -,|1|||||UC|[]|[]|||del| +,|1|||||||[]|||del| all|1|||||LC|[]|[]|||del| right|1|||,||LC|[]|[]|||del| -,|1|0.0000|0.0000|||LC|[]|[]|||sub(I'll)| +,|1|0.0000|0.0000|||||[]|||sub(I'll)| probably|1|0.0000|0.0000|||LC|[]|[]|||sub(do)| just|1|0.0000|0.0000|||LC|[]|[]|||| that|1|0.0000|0.0000|.||LC|[]|[]|||| -.|1|0.0000|0.0000|||LC|[]|[]|||sub(?)| +.|1|0.0000|0.0000|||||[]|||sub(?)| Are|3|0.0000|0.0000|||UC|[]|[]|||| there|3|0.0000|0.0000|||LC|[]|[]|||| any|3|0.0000|0.0000|||LC|[]|[]|||| @@ -25,19 +25,19 @@ to|3|0.0000|0.0000|||LC|[]|[]|||| mind|3|0.0000|0.0000|||LC|[]|[]|||| or|3|0.0000|0.0000|||LC|[]|[]|||| Yeah|1|0.0000|0.0000|,||UC|[]|[]|||| -,|1|0.0000|0.0000|||UC|[]|[]|||| +,|1|0.0000|0.0000|||||[]|||| sure|1|0.0000|0.0000|.||LC|[]|[]|||| -.|1|0.0000|0.0000|||LC|[]|[]|||| +.|1|0.0000|0.0000|||||[]|||| When|1|0.0000|0.0000|||UC|[]|[]|||| I|1|0.0000|0.0000|||CA|[]|[]|||| hear|1|0.0000|0.0000|||LC|[]|[]|||| Foobar|1|0.0000|0.0000|,||UC|[]|[]|||| -,|1|0.0000|0.0000|||UC|[]|[]|||| +,|1|0.0000|0.0000|||||[]|||| I|1|0.0000|0.0000|||CA|[]|[]|||| think|1|0.0000|0.0000|||LC|[]|[]|||| about|1|0.0000|0.0000|||LC|[]|[]|||| just|1|0.0000|0.0000|||LC|[]|[]|||| that|1|0.0000|0.0000|:||LC|[]|[]|||| -:|1|0.0000|0.0000|||LC|[]|[]|||| +:|1|0.0000|0.0000|||||[]|||| foo|1|0.0000|0.0000|||LC|[]|[]|||sub(,)| a|1|0.0000|0.0000|||LC|[]|[]|||| diff --git a/test/fstalign_Test.cc b/test/fstalign_Test.cc index 4f0d509..4b23e63 100644 --- a/test/fstalign_Test.cc +++ b/test/fstalign_Test.cc @@ -695,8 +695,8 @@ TEST_CASE_METHOD(UniqueTestsFixture, "main-adapted-composition()") { const auto testFile = std::string{TEST_DATA} + "align_1.aligned.punc_case.nlp"; REQUIRE(compareFiles(nlp_output.c_str(), testFile.c_str())); - REQUIRE_THAT(result, Contains("WER: 6/14 = 0.4286")); - REQUIRE_THAT(result, Contains("WER: INS:1 DEL:2 SUB:3")); + REQUIRE_THAT(result, Contains("WER: 7/15 = 0.4667")); + REQUIRE_THAT(result, Contains("WER: INS:0 DEL:2 SUB:5")); } SECTION("TXT Hypothesis: wer with case and punctuation(nlp output)") {