-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathVocabulary.hpp
39 lines (33 loc) · 956 Bytes
/
Vocabulary.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#pragma once
#include <string>
#include <unordered_map>
#include <vector>
class Vocabulary{
public:
Vocabulary(){};
Vocabulary(const std::string& trainFile, const int tokenFreqThreshold);
Vocabulary(const std::string& trainFile, const int tokenFreqThreshold, const bool useSubword);
Vocabulary(const std::string& trainFile, const bool NMTRNNG);
class Token;
std::unordered_map<std::string, int> tokenIndex;
std::vector<Vocabulary::Token*> tokenList;
int eosIndex;
int unkIndex;
int symIndex;
};
class Vocabulary::Token{
public:
Token(const std::string& str_, const int count_):
str(str_), count(count_)
{};
Token(const std::string& str_, const int count_, const bool subword_):
str(str_), count(count_), subword(subword_)
{};
Token(const std::string& str_, const int count_, const int action_):
str(str_), count(count_), action(action_)
{};
std::string str;
int count;
int action;
bool subword;
};