-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathunittests.cpp
61 lines (51 loc) · 2.42 KB
/
unittests.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#include "Tokenizers.h"
#include <gtest/gtest.h>
#include <fstream>
std::list<std::string> load_text(std::string filename) {
std::list<std::string> texts;
std::ifstream input(filename);
std::string lineBuffer;
while (std::getline(input, lineBuffer)) {
texts.push_back(lineBuffer);
}
return texts;
};
class SubwordTextEncoderTest : public ::testing::Test {
protected:
void SetUp() override {
vocab = load_text("./readme.txt");
TextEncoderVocabFilled.build_vocabulary(vocab);
}
std::list<std::string> vocab;
std::string name = "Test";
int vocab_size = 1000;
tokenizers::SubwordTextEncoder TextEncoderVocabEmpty = tokenizers::SubwordTextEncoder(vocab_size, name);
tokenizers::SubwordTextEncoder TextEncoderVocabFilled = tokenizers::SubwordTextEncoder(vocab_size, name);
std::list<int> hello_encoded{1073, 1102, 1109, 1109, 1112};
std::string hello_decoded = "Hello";
};
TEST_F(SubwordTextEncoderTest, VocabEmptyOnInitialisation) {
EXPECT_EQ(TextEncoderVocabEmpty.get_vocab_size(), 0) << "Vocabulary size should be 0 on initialisation.";
}
TEST_F(SubwordTextEncoderTest, NameIsInitialised) {
EXPECT_EQ(TextEncoderVocabEmpty.get_name(), name) << "Name is not initialised correctly.";
}
TEST_F(SubwordTextEncoderTest, VocabsizeIsInitialised) {
EXPECT_EQ(TextEncoderVocabEmpty.get_vocab_size(), 0) << "Vocab Size is not initialised correctly.";
}
TEST_F(SubwordTextEncoderTest, VocabBuilds) {
EXPECT_EQ(TextEncoderVocabFilled.get_vocabulary().size(), vocab_size) << "Vocabulary is not building.";
}
TEST_F(SubwordTextEncoderTest, EncodesHello) {
ASSERT_EQ(TextEncoderVocabFilled.encode("Hello").size(), 5) << "Encoding did not work. Size different than expected.";
EXPECT_EQ(TextEncoderVocabFilled.encode("Hello"), hello_encoded) << "Encoding is not correct. Do you need to update the test phrase?";
}
TEST_F(SubwordTextEncoderTest, DecodesHello) {
ASSERT_EQ(TextEncoderVocabFilled.decode(hello_encoded).length(), hello_decoded.length()) << "Decoding did not work. Size different to expected value. Returned value: " << TextEncoderVocabFilled.decode(hello_encoded) << " Expected Length: " << hello_decoded;
EXPECT_EQ(TextEncoderVocabFilled.decode(hello_encoded), hello_decoded) << "Decoding is not correct. Do you need to update the test phrase?";
}
int main(int argc, char* argv[])
{
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}