Skip to content

Commit

Permalink
updating version
Browse files Browse the repository at this point in the history
  • Loading branch information
pique0822 committed Oct 9, 2023
1 parent 7045a4f commit 966a4a0
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 8 deletions.
23 changes: 16 additions & 7 deletions src/Nlp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,14 @@ NlpFstLoader::NlpFstLoader(std::vector<RawNlpRecord> &records, Json::Value norma
Json::Value wer_sidecar)
: NlpFstLoader(records, normalization, wer_sidecar, true) {}

NlpFstLoader::NlpFstLoader(std::vector<RawNlpRecord> &records, Json::Value normalization,
NlpFstLoader::NlpFstLoader(std::vector<RawNlpRecord> &records, Json::Value normalization,
Json::Value wer_sidecar, bool processLabels, bool use_punctuation, bool use_case)
: FstLoader() {
mJsonNorm = normalization;
mWerSidecar = wer_sidecar;
mUsePunctuation = use_punctuation;
mUseCase = use_case;

std::string last_label;
bool firstTk = true;

Expand Down Expand Up @@ -81,8 +84,10 @@ NlpFstLoader::NlpFstLoader(std::vector<RawNlpRecord> &records, Json::Value norma
mJsonNorm[curr_label_id]["candidates"][last_idx]["verbalization"].append(curr_tk);
}
} else {
std::string lower_cased = UnicodeLowercase(curr_tk);
mToken.push_back(lower_cased);
if (!mUseCase) {
curr_tk = UnicodeLowercase(curr_tk);
}
mToken.push_back(curr_tk);
mSpeakers.push_back(speaker);
if (use_punctuation && punctuation != "") {
mToken.push_back(punctuation);
Expand Down Expand Up @@ -118,8 +123,10 @@ void NlpFstLoader::addToSymbolTable(fst::SymbolTable &symbol) const {
auto candidate = candidates[i]["verbalization"];
for (auto tk_itr : candidate) {
std::string token = tk_itr.asString();
std::string lower_cased = UnicodeLowercase(token);
AddSymbolIfNeeded(symbol, lower_cased);
if (!mUseCase) {
token = UnicodeLowercase(token);
}
AddSymbolIfNeeded(symbol, token);
}
}
}
Expand Down Expand Up @@ -250,11 +257,13 @@ so we add 2 states
auto candidate = candidates[i]["verbalization"];
for (auto tk_itr : candidate) {
std::string ltoken = std::string(tk_itr.asString());
std::string lower_cased = UnicodeLowercase(ltoken);
if (!mUseCase) {
ltoken = UnicodeLowercase(ltoken);
}
transducer.AddState();
nextState++;

int token_sym = symbol.Find(lower_cased);
int token_sym = symbol.Find(ltoken);
if (token_sym == -1) {
token_sym = symbol.Find(options.symUnk);
}
Expand Down
2 changes: 2 additions & 0 deletions src/Nlp.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ class NlpFstLoader : public FstLoader {
Json::Value mJsonNorm;
Json::Value mWerSidecar;
virtual const std::string &getToken(int index) const { return mToken.at(index); }
private:
bool mUsePunctuation, mUseCase;
};

#endif /* NLP_H_ */
2 changes: 1 addition & 1 deletion src/version.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#pragma once

#define FSTALIGNER_VERSION_MAJOR 1
#define FSTALIGNER_VERSION_MINOR 10
#define FSTALIGNER_VERSION_MINOR 11
#define FSTALIGNER_VERSION_PATCH 0

0 comments on commit 966a4a0

Please sign in to comment.