diff --git a/MODULE.bazel b/MODULE.bazel index 58faa0db..266b1e64 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -9,6 +9,7 @@ bazel_dep(name = "googletest", version = "1.15.2") bazel_dep(name = "highway", version = "1.1.0") bazel_dep(name = "nlohmann_json", version = "3.11.3") bazel_dep(name = "platforms", version = "0.0.10") +bazel_dep(name = "protobuf", version = "28.3") bazel_dep(name = "rules_cc", version = "0.0.9") bazel_dep(name = "rules_license", version = "0.0.7") bazel_dep(name = "google_benchmark", version = "1.8.5") @@ -27,9 +28,9 @@ http_archive( build_file = "@//bazel:sentencepiece.bazel", patch_args = ["-p1"], patches = ["@//bazel:sentencepiece.patch"], - sha256 = "8409b0126ebd62b256c685d5757150cf7fcb2b92a2f2b98efb3f38fc36719754", - strip_prefix = "sentencepiece-0.1.96", - urls = ["https://github.com/google/sentencepiece/archive/refs/tags/v0.1.96.zip"], + sha256 = "67c34082deb3d89f589d8614b1ae58aa4af250c79e18737d8103242ec62a28f9", + strip_prefix = "sentencepiece-0.2.0", + urls = ["https://github.com/google/sentencepiece/archive/refs/tags/v0.2.0.zip"], ) # For sentencepiece diff --git a/bazel/sentencepiece.bazel b/bazel/sentencepiece.bazel index ab728877..aff6aebc 100644 --- a/bazel/sentencepiece.bazel +++ b/bazel/sentencepiece.bazel @@ -66,6 +66,7 @@ cc_library( "src/char_model.h", "src/filesystem.h", "src/freelist.h", + "src/init.h", "src/model_factory.h", "src/model_interface.h", "src/normalizer.h", @@ -75,7 +76,7 @@ cc_library( "src/util.h", "src/word_model.h", ], - defines = ["_USE_TF_STRING_VIEW"], + defines = ["_USE_TF_STRING_VIEW", "_USE_EXTERNAL_PROTOBUF"], includes = [ ".", "src", @@ -88,10 +89,12 @@ cc_library( ":sentencepiece_model_cc_proto", "@abseil-cpp//absl/container:flat_hash_map", "@abseil-cpp//absl/container:flat_hash_set", + "@abseil-cpp//absl/flags:parse", "@abseil-cpp//absl/memory", "@abseil-cpp//absl/status", "@abseil-cpp//absl/strings", "@abseil-cpp//absl/strings:str_format", + "@protobuf//:protobuf_lite", "@darts_clone", ], ) diff --git a/bazel/sentencepiece.patch b/bazel/sentencepiece.patch index 798c3d4d..8077a884 100644 --- a/bazel/sentencepiece.patch +++ b/bazel/sentencepiece.patch @@ -1,10 +1,10 @@ diff --git a/src/bpe_model.cc b/src/bpe_model.cc -index 22cd115..97e0bda 100644 +index 889fc62..f78e5b7 100644 --- a/src/bpe_model.cc +++ b/src/bpe_model.cc -@@ -21,7 +21,7 @@ +@@ -22,7 +22,7 @@ + #include - #include "bpe_model.h" #include "freelist.h" -#include "third_party/absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_map.h" @@ -12,51 +12,37 @@ index 22cd115..97e0bda 100644 namespace sentencepiece { diff --git a/src/bpe_model_trainer.cc b/src/bpe_model_trainer.cc -index 964d44e..64878cd 100644 +index de86f14..60d0374 100644 --- a/src/bpe_model_trainer.cc +++ b/src/bpe_model_trainer.cc -@@ -18,7 +18,8 @@ +@@ -20,9 +20,9 @@ #include - #include "bpe_model_trainer.h" + #include "pretokenizer_for_training.h" -#include "third_party/absl/container/flat_hash_set.h" +-#include "third_party/absl/strings/str_join.h" +-#include "third_party/absl/strings/str_replace.h" +#include "absl/container/flat_hash_set.h" -+#include "absl/status/status.h" ++#include "absl/strings/str_join.h" ++#include "absl/strings/str_replace.h" #include "util.h" namespace sentencepiece { -@@ -171,7 +172,7 @@ void Trainer::UpdateActiveSymbols() { - active_symbols_.insert(symbols.begin(), symbols.begin() + size); - } - --util::Status Trainer::Train() { -+absl::Status Trainer::Train() { - RETURN_IF_ERROR(status()); - - CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces()); diff --git a/src/bpe_model_trainer.h b/src/bpe_model_trainer.h -index e011a37..a17e580 100644 +index 15ca479..62f0368 100644 --- a/src/bpe_model_trainer.h +++ b/src/bpe_model_trainer.h -@@ -20,7 +20,8 @@ +@@ -21,8 +21,8 @@ #include #include "sentencepiece_model.pb.h" +-#include "third_party/absl/container/btree_set.h" -#include "third_party/absl/container/flat_hash_map.h" ++#include "absl/container/btree_set.h" +#include "absl/container/flat_hash_map.h" -+#include "absl/status/status.h" #include "trainer_interface.h" namespace sentencepiece { -@@ -35,7 +36,7 @@ class Trainer : public TrainerInterface { - : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec, - denormalizer_spec) {} - -- util::Status Train() override; -+ absl::Status Train() override; - - private: - // Symbol represents a character or symbol bigram. diff --git a/src/bpe_model_trainer_test.cc b/src/bpe_model_trainer_test.cc index 173eb9c..2a43c3a 100644 --- a/src/bpe_model_trainer_test.cc @@ -73,12 +59,12 @@ index 173eb9c..2a43c3a 100644 namespace sentencepiece { diff --git a/src/builder.cc b/src/builder.cc -index 378aaa0..fd8edf8 100644 +index 822f6fc..c20e0e7 100644 --- a/src/builder.cc +++ b/src/builder.cc -@@ -18,10 +18,11 @@ +@@ -19,10 +19,10 @@ + #include - #include "builder.h" #include "filesystem.h" -#include "third_party/absl/strings/str_join.h" -#include "third_party/absl/strings/str_replace.h" @@ -88,11 +74,10 @@ index 378aaa0..fd8edf8 100644 +#include "absl/strings/str_replace.h" +#include "absl/strings/str_split.h" +#include "absl/strings/strip.h" -+#include "absl/status/status.h" #ifdef ENABLE_NFKC_COMPILE #include -@@ -36,7 +37,7 @@ +@@ -37,7 +37,7 @@ #include "normalization_rule.h" #include "normalizer.h" @@ -101,191 +86,19 @@ index 378aaa0..fd8edf8 100644 #include "util.h" namespace sentencepiece { -@@ -145,7 +146,7 @@ Builder::Chars Normalize(const Builder::CharsMap &chars_map, - } // namespace - - // static --util::Status Builder::CompileCharsMap(const CharsMap &chars_map, -+absl::Status Builder::CompileCharsMap(const CharsMap &chars_map, - std::string *output) { - CHECK_OR_RETURN(output); - CHECK_OR_RETURN(!chars_map.empty()); -@@ -212,7 +213,7 @@ util::Status Builder::CompileCharsMap(const CharsMap &chars_map, - } - - // static --util::Status Builder::DecompileCharsMap(absl::string_view blob, -+absl::Status Builder::DecompileCharsMap(absl::string_view blob, - Builder::CharsMap *chars_map) { - CHECK_OR_RETURN(chars_map); - chars_map->clear(); -@@ -265,7 +266,7 @@ util::Status Builder::DecompileCharsMap(absl::string_view blob, - } - - // static --util::Status Builder::GetPrecompiledCharsMap(const std::string &name, -+absl::Status Builder::GetPrecompiledCharsMap(const std::string &name, - std::string *output) { - CHECK_OR_RETURN(output); - -@@ -282,12 +283,12 @@ util::Status Builder::GetPrecompiledCharsMap(const std::string &name, - return util::OkStatus(); - } - } -- return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC) -+ return util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC) - << "No precompiled charsmap is found: " << name; - } - - // static --util::Status Builder::BuildNFKCMap(CharsMap *chars_map) { -+absl::Status Builder::BuildNFKCMap(CharsMap *chars_map) { - #ifdef ENABLE_NFKC_COMPILE - LOG(INFO) << "Running BuildNFKCMap"; - -@@ -345,7 +346,7 @@ util::Status Builder::BuildNFKCMap(CharsMap *chars_map) { - return util::OkStatus(); - } - --util::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) { -+absl::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) { - #ifdef ENABLE_NFKC_COMPILE - LOG(INFO) << "Running BuildNmtNFKCMap"; - -@@ -420,7 +421,7 @@ util::Status Builder::BuildNmtNFKCMap(CharsMap *chars_map) { - } - - // static --util::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) { -+absl::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) { - #ifdef ENABLE_NFKC_COMPILE - for (auto &c : *chars_map) { - std::vector trg; -@@ -445,7 +446,7 @@ util::Status Builder::MergeUnicodeCaseFoldMap(Builder::CharsMap *chars_map) { - } - - // static --util::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) { -+absl::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) { - #ifdef ENABLE_NFKC_COMPILE - CharsMap nfkc_map; - RETURN_IF_ERROR(Builder::BuildNFKCMap(&nfkc_map)); -@@ -460,7 +461,7 @@ util::Status Builder::BuildNFKC_CFMap(CharsMap *chars_map) { - } - - // static --util::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) { -+absl::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) { - #ifdef ENABLE_NFKC_COMPILE - CharsMap nfkc_map; - RETURN_IF_ERROR(Builder::BuildNmtNFKCMap(&nfkc_map)); -@@ -475,7 +476,7 @@ util::Status Builder::BuildNmtNFKC_CFMap(CharsMap *chars_map) { - } - - // static --util::Status Builder::LoadCharsMap(absl::string_view filename, -+absl::Status Builder::LoadCharsMap(absl::string_view filename, - CharsMap *chars_map) { - LOG(INFO) << "Loading mapping file: " << filename.data(); - CHECK_OR_RETURN(chars_map); -@@ -510,7 +511,7 @@ util::Status Builder::LoadCharsMap(absl::string_view filename, - } - - // static --util::Status Builder::SaveCharsMap(absl::string_view filename, -+absl::Status Builder::SaveCharsMap(absl::string_view filename, - const Builder::CharsMap &chars_map) { - auto output = filesystem::NewWritableFile(filename); - RETURN_IF_ERROR(output->status()); -@@ -540,7 +541,7 @@ util::Status Builder::SaveCharsMap(absl::string_view filename, - } - - // static --util::Status Builder::RemoveRedundantMap(CharsMap *chars_map) { -+absl::Status Builder::RemoveRedundantMap(CharsMap *chars_map) { - CHECK_OR_RETURN(chars_map); - - CharsMap new_chars_map; diff --git a/src/builder.h b/src/builder.h -index 49d2884..8ad872c 100644 +index 094da72..ea5171e 100644 --- a/src/builder.h +++ b/src/builder.h -@@ -22,7 +22,8 @@ +@@ -22,7 +22,7 @@ #include "common.h" #include "sentencepiece_model.pb.h" #include "sentencepiece_processor.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/strings/string_view.h" -+#include "absl/status/status.h" namespace sentencepiece { namespace normalizer { -@@ -43,15 +44,15 @@ class Builder { - // String-to-string mapping. - using CharsMap = std::map; - -- static util::Status CompileCharsMap(const CharsMap &chars_map, -+ static absl::Status CompileCharsMap(const CharsMap &chars_map, - std::string *output); - - // Decompiles `blob` into `chars_map`. -- static util::Status DecompileCharsMap(absl::string_view blob, -+ static absl::Status DecompileCharsMap(absl::string_view blob, - CharsMap *chars_map); - - // Returns a pre-compiled binary index with `name`. -- static util::Status GetPrecompiledCharsMap(const std::string &name, -+ static absl::Status GetPrecompiledCharsMap(const std::string &name, - std::string *output); - - // Makes a normalization mapping based on NFKC. -@@ -89,30 +90,30 @@ class Builder { - // normalizer is the goal of SentencePiece. - // - // TODO(taku): Make NFC, NFD, and NFKD mapping if necessary. -- static util::Status BuildNFKCMap(CharsMap *chars_map); -+ static absl::Status BuildNFKCMap(CharsMap *chars_map); - - // Makes an NFKC-based mapping with NMT specific modifications around - // whitespaces. -- static util::Status BuildNmtNFKCMap(CharsMap *chars_map); -+ static absl::Status BuildNmtNFKCMap(CharsMap *chars_map); - - // Merge Unicode case folding mapping into `chars_map`. -- static util::Status MergeUnicodeCaseFoldMap(CharsMap *chars_map); -+ static absl::Status MergeUnicodeCaseFoldMap(CharsMap *chars_map); - - // Makes NFKC with Unicode case folding. -- static util::Status BuildNFKC_CFMap(CharsMap *chars_map); -+ static absl::Status BuildNFKC_CFMap(CharsMap *chars_map); - - // Makes NMT NFKC with Unicode case folding. -- static util::Status BuildNmtNFKC_CFMap(CharsMap *chars_map); -+ static absl::Status BuildNmtNFKC_CFMap(CharsMap *chars_map); - - // Builds Chars map save in `filename`. - // Format: - // src_uchar1 src_uchar2 ... trg_uchar1 trg_uchar2... - // (src|trg)_ucharX must be a hex of Unicode code point. -- static util::Status LoadCharsMap(absl::string_view filename, -+ static absl::Status LoadCharsMap(absl::string_view filename, - CharsMap *chars_map); - - // Saves Chars map to `filename` as TSV. -- static util::Status SaveCharsMap(absl::string_view filename, -+ static absl::Status SaveCharsMap(absl::string_view filename, - const CharsMap &chars_map); - - private: -@@ -121,7 +122,7 @@ class Builder { - // Removes redundant rules from `chars_map`. - // When char_maps have "aa" => "bb" and "a" => "b", the first - // rule is not necessary since the second rule can cover the first rule. -- static util::Status RemoveRedundantMap(CharsMap *chars_map); -+ static absl::Status RemoveRedundantMap(CharsMap *chars_map); - }; - } // namespace normalizer - } // namespace sentencepiece diff --git a/src/builder_test.cc b/src/builder_test.cc index 4acb7b3..1dee5c7 100644 --- a/src/builder_test.cc @@ -299,46 +112,6 @@ index 4acb7b3..1dee5c7 100644 #include "util.h" namespace sentencepiece { -diff --git a/src/char_model_trainer.cc b/src/char_model_trainer.cc -index f438d78..4f4c603 100644 ---- a/src/char_model_trainer.cc -+++ b/src/char_model_trainer.cc -@@ -16,12 +16,13 @@ - - #include "char_model.h" - #include "char_model_trainer.h" -+#include "absl/status/status.h" - #include "util.h" - - namespace sentencepiece { - namespace character { - --util::Status Trainer::Train() { -+absl::Status Trainer::Train() { - RETURN_IF_ERROR(status()); - - CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces()); -diff --git a/src/char_model_trainer.h b/src/char_model_trainer.h -index e563819..a5d021c 100644 ---- a/src/char_model_trainer.h -+++ b/src/char_model_trainer.h -@@ -17,6 +17,7 @@ - - #include "sentencepiece_model.pb.h" - #include "trainer_interface.h" -+#include "absl/status/status.h" - - namespace sentencepiece { - namespace character { -@@ -30,7 +31,7 @@ class Trainer : public TrainerInterface { - : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec, - denormalizer_spec) {} - -- util::Status Train() override; -+ absl::Status Train() override; - }; - } // namespace character - } // namespace sentencepiece diff --git a/src/char_model_trainer_test.cc b/src/char_model_trainer_test.cc index 8c2e4b7..e8b4979 100644 --- a/src/char_model_trainer_test.cc @@ -355,30 +128,23 @@ index 8c2e4b7..e8b4979 100644 namespace sentencepiece { diff --git a/src/common.h b/src/common.h -index 7595634..339f831 100644 +index d7e6186..bfe369c 100644 --- a/src/common.h +++ b/src/common.h -@@ -146,6 +146,7 @@ inline const char *BaseName(const char *path) { - } // namespace logging - } // namespace sentencepiece - -+#ifndef LOG - #define LOG(severity) \ - (::sentencepiece::logging::GetMinLogLevel() > \ - ::sentencepiece::logging::LOG_##severity) \ -@@ -156,6 +157,7 @@ inline const char *BaseName(const char *path) { - std::cerr << ::sentencepiece::logging::BaseName(__FILE__) << "(" \ - << __LINE__ << ") " \ - << "LOG(" << #severity << ") " -+#endif // LOG - - #define CHECK(condition) \ - (condition) ? 0 \ +@@ -26,7 +26,7 @@ + #include + + #include "config.h" +-#include "third_party/absl/strings/string_view.h" ++#include "absl/strings/string_view.h" + + #if defined(_WIN32) && !defined(__CYGWIN__) + #define OS_WIN diff --git a/src/compile_charsmap_main.cc b/src/compile_charsmap_main.cc -index c5a5188..e5db1d7 100644 +index da15328..b806319 100644 --- a/src/compile_charsmap_main.cc +++ b/src/compile_charsmap_main.cc -@@ -22,8 +22,9 @@ +@@ -22,8 +22,8 @@ #include "filesystem.h" #include "init.h" #include "sentencepiece_processor.h" @@ -386,24 +152,14 @@ index c5a5188..e5db1d7 100644 -#include "third_party/absl/strings/string_view.h" +#include "absl/flags/flag.h" +#include "absl/strings/string_view.h" -+#include "absl/status/status.h" using sentencepiece::normalizer::Builder; -@@ -160,7 +161,7 @@ int main(int argc, char **argv) { - - const std::vector>> -+ std::function>> - kRuleList = {{"nfkc", Builder::BuildNFKCMap}, - {"nmt_nfkc", Builder::BuildNmtNFKCMap}, - {"nfkc_cf", Builder::BuildNFKC_CFMap}, diff --git a/src/error.cc b/src/error.cc -index a226d98..ab4675d 100644 +index d3792dc..19ef6f3 100644 --- a/src/error.cc +++ b/src/error.cc -@@ -20,8 +20,8 @@ +@@ -21,8 +21,8 @@ #ifdef _USE_EXTERNAL_ABSL // Naive workaround to define minloglevel on external absl package. // We want to define them in other cc file. @@ -414,106 +170,19 @@ index a226d98..ab4675d 100644 ABSL_FLAG(int32, minloglevel, 0, "Messages logged at a lower level than this don't actually."); #endif -diff --git a/src/filesystem.cc b/src/filesystem.cc -index 833c8f7..9a1b6c9 100644 ---- a/src/filesystem.cc -+++ b/src/filesystem.cc -@@ -15,7 +15,8 @@ - #include - - #include "filesystem.h" --#include "third_party/absl/memory/memory.h" -+#include "absl/status/status.h" -+#include "absl/memory/memory.h" - #include "util.h" - - #if defined(OS_WIN) && defined(UNICODE) && defined(_UNICODE) -@@ -36,7 +37,7 @@ class PosixReadableFile : public ReadableFile { - is_binary ? std::ios::binary | std::ios::in - : std::ios::in)) { - if (!*is_) -- status_ = util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC) -+ status_ = util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC) - << "\"" << filename.data() << "\": " << util::StrError(errno); - } - -@@ -44,7 +45,7 @@ class PosixReadableFile : public ReadableFile { - if (is_ != &std::cin) delete is_; - } - -- util::Status status() const { return status_; } -+ absl::Status status() const { return status_; } - - bool ReadLine(std::string *line) { - return static_cast(std::getline(*is_, *line)); -@@ -61,7 +62,7 @@ class PosixReadableFile : public ReadableFile { - } - - private: -- util::Status status_; -+ absl::Status status_; - std::istream *is_; - }; - -@@ -75,7 +76,7 @@ class PosixWritableFile : public WritableFile { - : std::ios::out)) { - if (!*os_) - status_ = -- util::StatusBuilder(util::StatusCode::kPermissionDenied, GTL_LOC) -+ util::StatusBuilder(absl::StatusCode::kPermissionDenied, GTL_LOC) - << "\"" << filename.data() << "\": " << util::StrError(errno); - } - -@@ -83,7 +84,7 @@ class PosixWritableFile : public WritableFile { - if (os_ != &std::cout) delete os_; - } - -- util::Status status() const { return status_; } -+ absl::Status status() const { return status_; } - - bool Write(absl::string_view text) { - os_->write(text.data(), text.size()); -@@ -93,7 +94,7 @@ class PosixWritableFile : public WritableFile { - bool WriteLine(absl::string_view text) { return Write(text) && Write("\n"); } - - private: -- util::Status status_; -+ absl::Status status_; - std::ostream *os_; - }; - diff --git a/src/filesystem.h b/src/filesystem.h -index e572b4b..6e8e305 100644 +index e572b4b..dbcce48 100644 --- a/src/filesystem.h +++ b/src/filesystem.h -@@ -23,7 +23,8 @@ +@@ -23,7 +23,7 @@ #include "common.h" #include "sentencepiece_processor.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/strings/string_view.h" -+#include "absl/status/status.h" namespace sentencepiece { namespace filesystem { -@@ -33,7 +34,7 @@ class ReadableFile { - explicit ReadableFile(absl::string_view filename, bool is_binary = false) {} - virtual ~ReadableFile() {} - -- virtual util::Status status() const = 0; -+ virtual absl::Status status() const = 0; - virtual bool ReadLine(std::string *line) = 0; - virtual bool ReadAll(std::string *line) = 0; - }; -@@ -44,7 +45,7 @@ class WritableFile { - explicit WritableFile(absl::string_view filename, bool is_binary = false) {} - virtual ~WritableFile() {} - -- virtual util::Status status() const = 0; -+ virtual absl::Status status() const = 0; - virtual bool Write(absl::string_view text) = 0; - virtual bool WriteLine(absl::string_view text) = 0; - }; diff --git a/src/filesystem_test.cc b/src/filesystem_test.cc index 790e756..39ece99 100644 --- a/src/filesystem_test.cc @@ -528,7 +197,7 @@ index 790e756..39ece99 100644 namespace sentencepiece { diff --git a/src/init.h b/src/init.h -index 090a2d9..acfda8a 100644 +index 6ae047e..14edb8c 100644 --- a/src/init.h +++ b/src/init.h @@ -16,8 +16,8 @@ @@ -540,41 +209,26 @@ index 090a2d9..acfda8a 100644 +#include "absl/flags/flag.h" +#include "absl/flags/parse.h" - ABSL_DECLARE_FLAG(int32, minloglevel); - -diff --git a/src/model_factory.cc b/src/model_factory.cc -index be99501..040c00c 100644 ---- a/src/model_factory.cc -+++ b/src/model_factory.cc -@@ -15,7 +15,7 @@ - #include "bpe_model.h" - #include "char_model.h" - #include "model_factory.h" --#include "third_party/absl/memory/memory.h" -+#include "absl/memory/memory.h" - #include "unigram_model.h" - #include "word_model.h" - + #ifdef _USE_EXTERNAL_PROTOBUF + #include "google/protobuf/message_lite.h" diff --git a/src/model_interface.cc b/src/model_interface.cc -index c49be1e..22c6378 100644 +index bb52f9a..7c52398 100644 --- a/src/model_interface.cc +++ b/src/model_interface.cc -@@ -16,8 +16,8 @@ +@@ -17,7 +17,7 @@ + #include - #include "model_interface.h" #include "sentencepiece_model.pb.h" --#include "third_party/absl/memory/memory.h" -#include "third_party/absl/strings/str_format.h" -+#include "absl/memory/memory.h" +#include "absl/strings/str_format.h" #include "util.h" namespace sentencepiece { diff --git a/src/model_interface.h b/src/model_interface.h -index aef5b53..c7858fb 100644 +index 06e9243..89f0e26 100644 --- a/src/model_interface.h +++ b/src/model_interface.h -@@ -25,9 +25,10 @@ +@@ -25,9 +25,9 @@ #include "normalizer.h" #include "sentencepiece_model.pb.h" #include "sentencepiece_processor.h" @@ -583,45 +237,17 @@ index aef5b53..c7858fb 100644 -#include "third_party/darts_clone/darts.h" +#include "absl/container/flat_hash_map.h" +#include "absl/strings/string_view.h" -+#include "absl/status/status.h" +#include "include/darts.h" #include "util.h" namespace sentencepiece { -@@ -69,7 +70,7 @@ class ModelInterface { - - // Returns Status. - // Encode/Decode functions are valid only when status is OK. -- virtual util::Status status() const { return status_; } -+ virtual absl::Status status() const { return status_; } - - virtual const ModelProto &model_proto() const { return *model_proto_; } - -@@ -82,7 +83,7 @@ class ModelInterface { - // normally users do not need to call this function. This function is provided - // just in case that a user want to manually choose which encoder version to - // use. -- virtual util::Status SetEncoderVersion(EncoderVersion encoder_version) { -+ virtual absl::Status SetEncoderVersion(EncoderVersion encoder_version) { - encoder_version_ = encoder_version; - return util::OkStatus(); - } -@@ -261,7 +262,7 @@ class ModelInterface { - EncoderVersion encoder_version_ = EncoderVersion::kOptimized; - - // status. -- util::Status status_; -+ absl::Status status_; - }; - } // namespace sentencepiece - #endif // MODEL_INTERFACE_H_ diff --git a/src/model_interface_test.cc b/src/model_interface_test.cc -index 69ee4e6..26a1e05 100644 +index 09e41d3..725bfa4 100644 --- a/src/model_interface_test.cc +++ b/src/model_interface_test.cc -@@ -15,7 +15,7 @@ +@@ -16,7 +16,7 @@ + #include "model_factory.h" - #include "model_interface.h" #include "testharness.h" -#include "third_party/absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_map.h" @@ -629,166 +255,90 @@ index 69ee4e6..26a1e05 100644 namespace sentencepiece { diff --git a/src/normalizer.cc b/src/normalizer.cc -index 100b875..c553906 100644 +index b50e867..55db8f5 100644 --- a/src/normalizer.cc +++ b/src/normalizer.cc -@@ -18,11 +18,12 @@ +@@ -18,10 +18,10 @@ #include #include "common.h" --#include "third_party/absl/memory/memory.h" -#include "third_party/absl/strings/match.h" -#include "third_party/absl/strings/string_view.h" -#include "third_party/absl/strings/strip.h" -#include "third_party/darts_clone/darts.h" -+#include "absl/memory/memory.h" +#include "absl/strings/match.h" +#include "absl/strings/string_view.h" +#include "absl/strings/strip.h" -+#include "absl/status/status.h" +#include "include/darts.h" #include "util.h" namespace sentencepiece { -@@ -71,7 +72,7 @@ void Normalizer::Init() { - } - } - --util::Status Normalizer::Normalize(absl::string_view input, -+absl::Status Normalizer::Normalize(absl::string_view input, - std::string *normalized, - std::vector *norm_to_orig) const { - norm_to_orig->clear(); -@@ -274,7 +275,7 @@ std::string Normalizer::EncodePrecompiledCharsMap( - } - - // static --util::Status Normalizer::DecodePrecompiledCharsMap( -+absl::Status Normalizer::DecodePrecompiledCharsMap( - absl::string_view blob, absl::string_view *trie_blob, - absl::string_view *normalized, std::string *buffer) { - uint32 trie_blob_size = 0; diff --git a/src/normalizer.h b/src/normalizer.h -index 622bbd2..21d1385 100644 +index c79813c..fa82d58 100644 --- a/src/normalizer.h +++ b/src/normalizer.h -@@ -24,8 +24,9 @@ +@@ -24,8 +24,8 @@ #include "common.h" #include "sentencepiece_model.pb.h" #include "sentencepiece_processor.h" -#include "third_party/absl/strings/string_view.h" -#include "third_party/darts_clone/darts.h" +#include "absl/strings/string_view.h" -+#include "absl/status/status.h" +#include "include/darts.h" - #include "util.h" namespace sentencepiece { -@@ -75,7 +76,7 @@ class Normalizer { - - // Returns Status. - // Normalizes function is valid only when status is OK. -- virtual util::Status status() const { return status_; } -+ virtual absl::Status status() const { return status_; } - - // Normalizes a plain utf8 string into an internal representation for - // Sentencepiece model. |norm_to_orig| stores the byte-alignment from -@@ -86,7 +87,7 @@ class Normalizer { - // - Adds a prefix space. - // - Replaces a space with a meta symbol. - // - Removing heading, tailing and other redundant spaces. -- virtual util::Status Normalize(absl::string_view input, -+ virtual absl::Status Normalize(absl::string_view input, - std::string *normalized, - std::vector *norm_to_orig) const; - -@@ -121,7 +122,7 @@ class Normalizer { - absl::string_view normalized); - - // Decodes blob into trie_blob and normalized string. -- static util::Status DecodePrecompiledCharsMap(absl::string_view blob, -+ static absl::Status DecodePrecompiledCharsMap(absl::string_view blob, - absl::string_view *trie_blob, - absl::string_view *normalized, - std::string *buffer = nullptr); -@@ -153,7 +154,7 @@ class Normalizer { - #endif - - // Normalizer's status. -- util::Status status_; -+ absl::Status status_; - }; - } // namespace normalizer - } // namespace sentencepiece + namespace normalizer { diff --git a/src/pretokenizer_for_training.cc b/src/pretokenizer_for_training.cc -index 049658e..8021511 100644 +index d4f492c..54bdff1 100644 --- a/src/pretokenizer_for_training.cc +++ b/src/pretokenizer_for_training.cc -@@ -14,7 +14,7 @@ +@@ -15,7 +15,7 @@ + #include - #include "pretokenizer_for_training.h" -#include "third_party/absl/strings/str_replace.h" +#include "absl/strings/str_replace.h" namespace sentencepiece { namespace pretokenizer { diff --git a/src/pretokenizer_for_training.h b/src/pretokenizer_for_training.h -index 2d3bc82..b4a6de3 100644 +index fa54f95..b5f9ae9 100644 --- a/src/pretokenizer_for_training.h +++ b/src/pretokenizer_for_training.h -@@ -21,7 +21,8 @@ +@@ -21,7 +21,7 @@ #include "common.h" #include "sentencepiece.pb.h" #include "sentencepiece_processor.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/strings/string_view.h" -+#include "absl/status/status.h" namespace sentencepiece { namespace pretokenizer { -@@ -30,7 +31,7 @@ class PretokenizerForTrainingInterface { - public: - PretokenizerForTrainingInterface() {} - virtual ~PretokenizerForTrainingInterface() {} -- virtual util::Status status() const = 0; -+ virtual absl::Status status() const = 0; - - // Puts kUPPBoundaryStr before and after the pre-tokenizer's segmentation - // when there are no spaces between these tokens. diff --git a/src/pretokenizer_for_training_test.cc b/src/pretokenizer_for_training_test.cc -index 80f4787..de89fe3 100644 +index 99db0c5..25e8aef 100644 --- a/src/pretokenizer_for_training_test.cc +++ b/src/pretokenizer_for_training_test.cc -@@ -13,8 +13,9 @@ - // limitations under the License.! +@@ -14,9 +14,9 @@ #include "pretokenizer_for_training.h" + #include "testharness.h" -#include "third_party/absl/strings/str_cat.h" +-#include "third_party/absl/strings/str_join.h" +-#include "third_party/absl/strings/str_split.h" +#include "absl/strings/str_cat.h" ++#include "absl/strings/str_join.h" ++#include "absl/strings/str_split.h" #include "trainer_interface.h" -+#include "absl/status/status.h" namespace sentencepiece { - namespace pretokenizer { -@@ -28,7 +29,7 @@ class MockPretokenizer : public PretokenizerForTrainingInterface { - return spt_; - } - -- util::Status status() const override { return util::OkStatus(); } -+ absl::Status status() const override { return util::OkStatus(); } - - void SetOutput(const SentencePieceText &spt) { spt_ = spt; } - diff --git a/src/sentencepiece_processor.cc b/src/sentencepiece_processor.cc -index 1e4e7a0..78ae527 100644 +index 5d2c857..2afa889 100644 --- a/src/sentencepiece_processor.cc +++ b/src/sentencepiece_processor.cc -@@ -23,14 +23,15 @@ +@@ -30,13 +30,13 @@ + #include "model_interface.h" #include "normalizer.h" #include "sentencepiece.pb.h" - #include "sentencepiece_processor.h" --#include "third_party/absl/memory/memory.h" -#include "third_party/absl/strings/numbers.h" -#include "third_party/absl/strings/str_cat.h" -#include "third_party/absl/strings/str_join.h" @@ -796,7 +346,6 @@ index 1e4e7a0..78ae527 100644 -#include "third_party/absl/strings/str_split.h" -#include "third_party/absl/strings/string_view.h" -#include "third_party/absl/strings/strip.h" -+#include "absl/memory/memory.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" @@ -804,536 +353,33 @@ index 1e4e7a0..78ae527 100644 +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/strings/strip.h" -+#include "absl/status/status.h" #include "unigram_model.h" #include "util.h" -@@ -52,7 +53,7 @@ const char kReplacementCharacter[] = "\xef\xbf\xbd"; - SentencePieceProcessor::SentencePieceProcessor() {} - SentencePieceProcessor::~SentencePieceProcessor() {} - --util::Status SentencePieceProcessor::Load(absl::string_view filename) { -+absl::Status SentencePieceProcessor::Load(absl::string_view filename) { - auto model_proto = absl::make_unique(); - RETURN_IF_ERROR(io::LoadModelProto(filename, model_proto.get())); - return Load(std::move(model_proto)); -@@ -62,13 +63,13 @@ void SentencePieceProcessor::LoadOrDie(absl::string_view filename) { - CHECK_OK(Load(filename)); - } - --util::Status SentencePieceProcessor::Load(const ModelProto &model_proto) { -+absl::Status SentencePieceProcessor::Load(const ModelProto &model_proto) { - auto model_proto_copy = absl::make_unique(); - *model_proto_copy = model_proto; - return Load(std::move(model_proto_copy)); - } - --util::Status SentencePieceProcessor::LoadFromSerializedProto( -+absl::Status SentencePieceProcessor::LoadFromSerializedProto( - absl::string_view serialized) { - auto model_proto = absl::make_unique(); - CHECK_OR_RETURN( -@@ -76,7 +77,7 @@ util::Status SentencePieceProcessor::LoadFromSerializedProto( - return Load(std::move(model_proto)); - } - --util::Status SentencePieceProcessor::Load( -+absl::Status SentencePieceProcessor::Load( - std::unique_ptr model_proto) { - model_proto_ = std::move(model_proto); - model_ = ModelFactory::Create(*model_proto_); -@@ -117,7 +118,7 @@ util::Status SentencePieceProcessor::Load( - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::SetEncoderVersion( -+absl::Status SentencePieceProcessor::SetEncoderVersion( - EncoderVersion encoder_version) { - return model_->SetEncoderVersion(encoder_version); - } -@@ -126,17 +127,17 @@ EncoderVersion SentencePieceProcessor::GetEncoderVersion() const { - return model_->GetEncoderVersion(); - } - --util::Status SentencePieceProcessor::SetEncodeExtraOptions( -+absl::Status SentencePieceProcessor::SetEncodeExtraOptions( - absl::string_view extra_options) { - return ParseExtraOptions(extra_options, &encode_extra_options_); - } - --util::Status SentencePieceProcessor::SetDecodeExtraOptions( -+absl::Status SentencePieceProcessor::SetDecodeExtraOptions( - absl::string_view extra_options) { - return ParseExtraOptions(extra_options, &decode_extra_options_); - } - --util::Status SentencePieceProcessor::status() const { -+absl::Status SentencePieceProcessor::status() const { - CHECK_OR_RETURN(model_) << "Model is not initialized."; - CHECK_OR_RETURN(normalizer_) << "Normalizer is not initialized."; - RETURN_IF_ERROR(model_->status()); -@@ -144,7 +145,7 @@ util::Status SentencePieceProcessor::status() const { - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::SetVocabulary( -+absl::Status SentencePieceProcessor::SetVocabulary( - const std::vector &valid_vocab) { - RETURN_IF_ERROR(status()); - -@@ -174,7 +175,7 @@ util::Status SentencePieceProcessor::SetVocabulary( - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::ResetVocabulary() { -+absl::Status SentencePieceProcessor::ResetVocabulary() { - RETURN_IF_ERROR(status()); - for (auto &piece : *(model_proto_->mutable_pieces())) { - if (piece.type() == ModelProto::SentencePiece::UNUSED) -@@ -184,7 +185,7 @@ util::Status SentencePieceProcessor::ResetVocabulary() { - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename, -+absl::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename, - int threshold) { - auto input = filesystem::NewReadableFile(filename); - RETURN_IF_ERROR(input->status()); -@@ -221,7 +222,7 @@ util::Status SentencePieceProcessor::LoadVocabulary(absl::string_view filename, - - ////////////////////////////////////////////////////////////// - // Simple API. --util::Status SentencePieceProcessor::Encode( -+absl::Status SentencePieceProcessor::Encode( - absl::string_view input, std::vector *pieces) const { - CHECK_OR_RETURN_STATUS_STL(pieces); - -@@ -234,7 +235,7 @@ util::Status SentencePieceProcessor::Encode( - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::Encode(absl::string_view input, -+absl::Status SentencePieceProcessor::Encode(absl::string_view input, - std::vector *ids) const { - CHECK_OR_RETURN_STATUS_STL(ids); - -@@ -247,7 +248,7 @@ util::Status SentencePieceProcessor::Encode(absl::string_view input, - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::Decode( -+absl::Status SentencePieceProcessor::Decode( - const std::vector &pieces, std::string *detokenized) const { - CHECK_OR_RETURN_STATUS_STL(detokenized); - -@@ -258,7 +259,7 @@ util::Status SentencePieceProcessor::Decode( - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::Decode(const std::vector &ids, -+absl::Status SentencePieceProcessor::Decode(const std::vector &ids, - std::string *detokenized) const { - CHECK_OR_RETURN_STATUS_STL(detokenized); - -@@ -269,7 +270,7 @@ util::Status SentencePieceProcessor::Decode(const std::vector &ids, - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::NBestEncode( -+absl::Status SentencePieceProcessor::NBestEncode( - absl::string_view input, int nbest_size, - std::vector> *pieces) const { - CHECK_OR_RETURN_STATUS_STL(pieces); -@@ -287,7 +288,7 @@ util::Status SentencePieceProcessor::NBestEncode( - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::NBestEncode( -+absl::Status SentencePieceProcessor::NBestEncode( - absl::string_view input, int nbest_size, - std::vector> *ids) const { - CHECK_OR_RETURN_STATUS_STL(ids); -@@ -305,7 +306,7 @@ util::Status SentencePieceProcessor::NBestEncode( - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::SampleEncode( -+absl::Status SentencePieceProcessor::SampleEncode( - absl::string_view input, int nbest_size, float alpha, - std::vector *pieces) const { - CHECK_OR_RETURN_STATUS_STL(pieces); -@@ -319,7 +320,7 @@ util::Status SentencePieceProcessor::SampleEncode( - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::SampleEncode(absl::string_view input, -+absl::Status SentencePieceProcessor::SampleEncode(absl::string_view input, - int nbest_size, float alpha, - std::vector *ids) const { - CHECK_OR_RETURN_STATUS_STL(ids); -@@ -333,7 +334,7 @@ util::Status SentencePieceProcessor::SampleEncode(absl::string_view input, - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::PopulateSentencePieceText( -+absl::Status SentencePieceProcessor::PopulateSentencePieceText( - absl::string_view input, absl::string_view normalized, - const std::vector &norm_to_orig, const EncodeResult &result, - SentencePieceText *spt) const { -@@ -424,7 +425,7 @@ util::Status SentencePieceProcessor::PopulateSentencePieceText( - return util::OkStatus(); - } // namespace sentencepiece - --util::Status SentencePieceProcessor::Encode(absl::string_view input, -+absl::Status SentencePieceProcessor::Encode(absl::string_view input, - SentencePieceText *spt) const { - CHECK_OR_RETURN_STATUS_PROTO(spt); - -@@ -439,7 +440,7 @@ util::Status SentencePieceProcessor::Encode(absl::string_view input, - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::NBestEncode( -+absl::Status SentencePieceProcessor::NBestEncode( - absl::string_view input, int nbest_size, - NBestSentencePieceText *nbest_spt) const { - CHECK_OR_RETURN_STATUS_PROTO(nbest_spt); -@@ -464,7 +465,7 @@ util::Status SentencePieceProcessor::NBestEncode( - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::SampleEncode( -+absl::Status SentencePieceProcessor::SampleEncode( - absl::string_view input, int nbest_size, float alpha, - SentencePieceText *spt) const { - CHECK_OR_RETURN_STATUS_PROTO(spt); -@@ -503,7 +504,7 @@ util::Status SentencePieceProcessor::SampleEncode( - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::SampleEncodeAndScore( -+absl::Status SentencePieceProcessor::SampleEncodeAndScore( - absl::string_view input, int samples, float theta, bool wor, - bool include_best, NBestSentencePieceText *samples_spt) const { - CHECK_OR_RETURN(model_->IsSampleEncodeAndScoreAvailable()) -@@ -527,7 +528,7 @@ util::Status SentencePieceProcessor::SampleEncodeAndScore( - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::CalculateEntropy(absl::string_view input, -+absl::Status SentencePieceProcessor::CalculateEntropy(absl::string_view input, - float theta, - float *entropy) const { - CHECK_OR_RETURN(model_->IsCalculateEntropyAvailable()) -@@ -540,7 +541,7 @@ util::Status SentencePieceProcessor::CalculateEntropy(absl::string_view input, - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::Decode( -+absl::Status SentencePieceProcessor::Decode( - const std::vector &pieces, SentencePieceText *spt) const { - CHECK_OR_RETURN_STATUS_PROTO(spt); - -@@ -591,7 +592,7 @@ util::Status SentencePieceProcessor::Decode( - }; - - auto ProcessBytePieces = [&](int token_index_begin, -- int token_index_end) -> util::Status { -+ int token_index_end) -> absl::Status { - if (token_index_begin >= token_index_end) { - return util::OkStatus(); - } -@@ -661,14 +662,14 @@ util::Status SentencePieceProcessor::Decode( - return util::OkStatus(); - } - --util::Status SentencePieceProcessor::Decode(const std::vector &ids, -+absl::Status SentencePieceProcessor::Decode(const std::vector &ids, - SentencePieceText *spt) const { - std::vector pieces; - const int num_pieces = GetPieceSize(); - pieces.reserve(ids.size()); - for (const int id : ids) { - if (id < 0 || id >= num_pieces) { -- return util::Status(util::StatusCode::kOutOfRange, -+ return absl::Status(absl::StatusCode::kOutOfRange, - absl::StrCat("Invalid id: ", id)); - } - pieces.emplace_back(IdToPiece(id)); -@@ -783,7 +784,7 @@ int SentencePieceProcessor::pad_id() const { - } - - // static --util::Status SentencePieceProcessor::ApplyExtraOptions( -+absl::Status SentencePieceProcessor::ApplyExtraOptions( - const std::vector &extra_options, - SentencePieceText *spt) const { - for (const auto &extra_option : extra_options) { -@@ -818,7 +819,7 @@ util::Status SentencePieceProcessor::ApplyExtraOptions( - } - - // static --util::Status SentencePieceProcessor::ParseExtraOptions( -+absl::Status SentencePieceProcessor::ParseExtraOptions( - absl::string_view _extra_option, - std::vector *extra_options) const { - absl::string_view extra_option(_extra_option.data(), _extra_option.size()); -@@ -877,7 +878,7 @@ void SetRandomGeneratorSeed(unsigned int seed); - - namespace io { - --util::Status LoadModelProto(absl::string_view filename, -+absl::Status LoadModelProto(absl::string_view filename, - ModelProto *model_proto) { - if (filename.empty()) { - return util::NotFoundError("model file path should not be empty."); -@@ -893,7 +894,7 @@ util::Status LoadModelProto(absl::string_view filename, - return util::OkStatus(); - } - --util::Status SaveModelProto(absl::string_view filename, -+absl::Status SaveModelProto(absl::string_view filename, - const ModelProto &model_proto) { - if (filename.empty()) { - return util::NotFoundError("model file path should not be empty."); -diff --git a/src/sentencepiece_processor.h b/src/sentencepiece_processor.h -index e8bd5f5..346fb0e 100644 ---- a/src/sentencepiece_processor.h -+++ b/src/sentencepiece_processor.h -@@ -20,9 +20,10 @@ - #include - #include - #include -+#include "absl/status/status.h" - - #if defined(_USE_INTERNAL_STRING_VIEW) --#include "third_party/absl/strings/string_view.h" -+#include "absl/strings/string_view.h" - #elif defined(_USE_TF_STRING_VIEW) - #include "absl/strings/string_view.h" - #else -@@ -185,7 +186,7 @@ class SentencePieceProcessor { - - // Loads model from `filename`. - // Returns false if `filename` cannot be loaded. -- virtual util::Status Load(absl::string_view filename); -+ virtual absl::Status Load(absl::string_view filename); - - // Loads model from `filename`. - // Crash if `filename` cannot be loaded. -@@ -193,24 +194,24 @@ class SentencePieceProcessor { - - // Loads model from `model_proto`. - // `model_proto` is copied. -- virtual util::Status Load(const ModelProto &model_proto); -+ virtual absl::Status Load(const ModelProto &model_proto); - - // Loads model from `model_proto`. - // `model_proto` is moved. -- virtual util::Status Load(std::unique_ptr model_proto); -+ virtual absl::Status Load(std::unique_ptr model_proto); - - // Loads model from `serialized`, which is a string-serialized model proto. - // Useful to load the model from a platform independent blob object. -- virtual util::Status LoadFromSerializedProto(absl::string_view serialized); -+ virtual absl::Status LoadFromSerializedProto(absl::string_view serialized); - - // Returns the status. Encode/Decode methods are valid when status is OK. -- virtual util::Status status() const; -+ virtual absl::Status status() const; - - // Sets encode extra_option sequence. -- virtual util::Status SetEncodeExtraOptions(absl::string_view extra_option); -+ virtual absl::Status SetEncodeExtraOptions(absl::string_view extra_option); - - // Sets decode extra_option sequence. -- virtual util::Status SetDecodeExtraOptions(absl::string_view extra_option); -+ virtual absl::Status SetDecodeExtraOptions(absl::string_view extra_option); - - ////////////////////////////////////////////////////////////// - // Vocabulary restriction. -@@ -219,41 +220,41 @@ class SentencePieceProcessor { - - // Restricts the vocabulary set. - // The input sentences are encoded into the tokens in `valid_vocab`. -- virtual util::Status SetVocabulary( -+ virtual absl::Status SetVocabulary( - const std::vector &valid_vocab); - - // Reverts the vocabulary restriction. -- virtual util::Status ResetVocabulary(); -+ virtual absl::Status ResetVocabulary(); - - // Loads the valid vocabulary set from `filename` in TSV format. - // Format: . - // Any token with frequency < threshold will be treated as OOV. -- virtual util::Status LoadVocabulary(absl::string_view filename, -+ virtual absl::Status LoadVocabulary(absl::string_view filename, - int threshold); - - ////////////////////////////////////////////////////////////// - // Simple API. - // - // Given a UTF8 input, encodes it into a sequence of sentence pieces. -- virtual util::Status Encode(absl::string_view input, -+ virtual absl::Status Encode(absl::string_view input, - std::vector *pieces) const; - - // Given a UTF8 input, encodes it into a sequence of ids. -- virtual util::Status Encode(absl::string_view input, -+ virtual absl::Status Encode(absl::string_view input, - std::vector *ids) const; - - // Given a sequence of pieces, decodes it into a detokenized output. -- virtual util::Status Decode(const std::vector &pieces, -+ virtual absl::Status Decode(const std::vector &pieces, - std::string *detokenized) const; - - // Given a sequence of ids, decodes it into a detokenized output. -- virtual util::Status Decode(const std::vector &ids, -+ virtual absl::Status Decode(const std::vector &ids, - std::string *detokenized) const; - - // Sets the encoder version. Normally users do not need to call this function. - // But they can call this fucntion just in case if they want to fall back to - // the original encoder. -- virtual util::Status SetEncoderVersion(EncoderVersion encoder_version); -+ virtual absl::Status SetEncoderVersion(EncoderVersion encoder_version); - - // Returns the current encoder version in use. - virtual EncoderVersion GetEncoderVersion() const; -@@ -261,12 +262,12 @@ class SentencePieceProcessor { - ////////////////////////////////////////////////////////////// - // NBest API. - // Same as Encode, but returns nbest results. -- virtual util::Status NBestEncode( -+ virtual absl::Status NBestEncode( - absl::string_view input, int nbest_size, - std::vector> *pieces) const; - - // Same as Encode, but returns nbest results. -- virtual util::Status NBestEncode(absl::string_view input, int nbest_size, -+ virtual absl::Status NBestEncode(absl::string_view input, int nbest_size, - std::vector> *ids) const; - - ////////////////////////////////////////////////////////////// -@@ -289,12 +290,12 @@ class SentencePieceProcessor { - // in https://arxiv.org/abs/1910.13267 - // Nbest-based sampling is not supported so nbest_size parameter is ignored in - // BPE. -- virtual util::Status SampleEncode(absl::string_view input, int nbest_size, -+ virtual absl::Status SampleEncode(absl::string_view input, int nbest_size, - float alpha, - std::vector *pieces) const; - - // Same as above, but returns a sequence of ids. -- virtual util::Status SampleEncode(absl::string_view input, int nbest_size, -+ virtual absl::Status SampleEncode(absl::string_view input, int nbest_size, - float alpha, std::vector *ids) const; - - ////////////////////////////////////////////////////////////// -@@ -303,16 +304,16 @@ class SentencePieceProcessor { - // and internal sentencepiece sequence. - // - // Given a UTF8 input, encodes it into SentencePieceText. -- virtual util::Status Encode(absl::string_view input, -+ virtual absl::Status Encode(absl::string_view input, - SentencePieceText *spt) const; - - // Same as above, but returns NBestSentencePieceText. -- virtual util::Status NBestEncode(absl::string_view input, int nbest_size, -+ virtual absl::Status NBestEncode(absl::string_view input, int nbest_size, - NBestSentencePieceText *nbest_spt) const; - - // Same as above, but samples one segmentation from the hypotheses - // (Lattice). -- virtual util::Status SampleEncode(absl::string_view input, int nbest_size, -+ virtual absl::Status SampleEncode(absl::string_view input, int nbest_size, - float alpha, SentencePieceText *spt) const; - - // Sample `samples` segmentations from the segmentation lattice. -@@ -323,21 +324,21 @@ class SentencePieceProcessor { - // If `include_best` is true, the best tokenization is always included in the - // sample, and the remaining elements are sampled excluding the best. - // This method is only available in Unigram mode. -- virtual util::Status SampleEncodeAndScore( -+ virtual absl::Status SampleEncodeAndScore( - absl::string_view input, int samples, float theta, bool wor, - bool include_best, NBestSentencePieceText *samples_spt) const; - - // Calculate entropy of possible tokenization. - // Only available in unigram mode. -- virtual util::Status CalculateEntropy(absl::string_view input, float theta, -+ virtual absl::Status CalculateEntropy(absl::string_view input, float theta, - float *entropy) const; - - // Given a sequence of pieces, decodes it into SentencePieceText. -- virtual util::Status Decode(const std::vector &pieces, -+ virtual absl::Status Decode(const std::vector &pieces, - SentencePieceText *spt) const; - - // Given a sequence of ids, decodes it into SentencePieceText. -- virtual util::Status Decode(const std::vector &ids, -+ virtual absl::Status Decode(const std::vector &ids, - SentencePieceText *spt) const; - - ////////////////////////////////////////////////////////////// -@@ -487,13 +488,13 @@ class SentencePieceProcessor { - private: - enum ExtraOption { REVERSE, BOS, EOS }; - -- util::Status ParseExtraOptions(absl::string_view extra_option, -+ absl::Status ParseExtraOptions(absl::string_view extra_option, - std::vector *extra_options) const; - -- util::Status ApplyExtraOptions(const std::vector &extra_options, -+ absl::Status ApplyExtraOptions(const std::vector &extra_options, - SentencePieceText *spt) const; - -- util::Status PopulateSentencePieceText( -+ absl::Status PopulateSentencePieceText( - absl::string_view input, absl::string_view normalized, - const std::vector &norm_to_orig, - const std::vector> &result, -@@ -526,10 +527,10 @@ namespace io { - // io::LoadModelProto("//path/spm.model", model_proto.get()); - // SentencePieceProcessor sp; - // CHECK_OK(sp.Load(std::move(model_proto))); --util::Status LoadModelProto(absl::string_view, ModelProto *model_proto); -+absl::Status LoadModelProto(absl::string_view, ModelProto *model_proto); - - // Saves `model_proto` as `filename`. --util::Status SaveModelProto(absl::string_view, const ModelProto &model_proto); -+absl::Status SaveModelProto(absl::string_view, const ModelProto &model_proto); - } // namespace io - #endif // SWIG - } // namespace sentencepiece diff --git a/src/sentencepiece_processor_test.cc b/src/sentencepiece_processor_test.cc -index 373e73e..829c3d4 100644 +index 0f00515..c65baef 100644 --- a/src/sentencepiece_processor_test.cc +++ b/src/sentencepiece_processor_test.cc -@@ -23,10 +23,10 @@ - #include "sentencepiece_processor.h" +@@ -24,9 +24,9 @@ + #include "sentencepiece_model.pb.h" #include "sentencepiece_trainer.h" #include "testharness.h" -#include "third_party/absl/container/flat_hash_map.h" --#include "third_party/absl/memory/memory.h" -#include "third_party/absl/strings/str_cat.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/container/flat_hash_map.h" -+#include "absl/memory/memory.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include "util.h" namespace sentencepiece { diff --git a/src/sentencepiece_trainer.cc b/src/sentencepiece_trainer.cc -index b9fe64f..5b33cd7 100644 +index e08594c..8ba7797 100644 --- a/src/sentencepiece_trainer.cc +++ b/src/sentencepiece_trainer.cc -@@ -22,12 +22,13 @@ +@@ -23,12 +23,12 @@ + #include "sentencepiece.pb.h" #include "sentencepiece_model.pb.h" - #include "sentencepiece_trainer.h" #include "spec_parser.h" -#include "third_party/absl/flags/flag.h" -#include "third_party/absl/strings/numbers.h" @@ -1347,253 +393,27 @@ index b9fe64f..5b33cd7 100644 +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" +#include "absl/strings/strip.h" -+#include "absl/status/status.h" #include "trainer_factory.h" #include "util.h" -@@ -37,7 +38,7 @@ static constexpr char kDefaultNormalizerName[] = "nmt_nfkc"; - } // namespace - - // static --util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, -+absl::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, - SentenceIterator *sentence_iterator, - std::string *serialized_model_proto) { - NormalizerSpec normalizer_spec; -@@ -45,7 +46,7 @@ util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, - serialized_model_proto); - } - --util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, -+absl::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, - const NormalizerSpec &normalizer_spec, - SentenceIterator *sentence_iterator, - std::string *serialized_model_proto) { -@@ -55,7 +56,7 @@ util::Status SentencePieceTrainer::Train(const TrainerSpec &trainer_spec, - } - - // static --util::Status SentencePieceTrainer::Train( -+absl::Status SentencePieceTrainer::Train( - const TrainerSpec &trainer_spec, const NormalizerSpec &normalizer_spec, - const NormalizerSpec &denormalizer_spec, - SentenceIterator *sentence_iterator, std::string *serialized_model_proto) { -@@ -97,7 +98,7 @@ NormalizerSpec SentencePieceTrainer::GetNormalizerSpec(absl::string_view name) { - } - - // static --util::Status SentencePieceTrainer::MergeSpecsFromArgs( -+absl::Status SentencePieceTrainer::MergeSpecsFromArgs( - absl::string_view args, TrainerSpec *trainer_spec, - NormalizerSpec *normalizer_spec, NormalizerSpec *denormalizer_spec) { - CHECK_OR_RETURN(trainer_spec) << "`trainer_spec` must not be null."; -@@ -125,7 +126,7 @@ util::Status SentencePieceTrainer::MergeSpecsFromArgs( - } - - // static --util::Status SentencePieceTrainer::MergeSpecsFromArgs( -+absl::Status SentencePieceTrainer::MergeSpecsFromArgs( - const std::unordered_map &kwargs, - TrainerSpec *trainer_spec, NormalizerSpec *normalizer_spec, - NormalizerSpec *denormalizer_spec) { -@@ -171,7 +172,7 @@ util::Status SentencePieceTrainer::MergeSpecsFromArgs( - } - - // static --util::Status SentencePieceTrainer::Train(absl::string_view args, -+absl::Status SentencePieceTrainer::Train(absl::string_view args, - SentenceIterator *sentence_iterator, - std::string *serialized_model_proto) { - LOG(INFO) << "Running command: " << args.data(); -@@ -185,7 +186,7 @@ util::Status SentencePieceTrainer::Train(absl::string_view args, - } - - // static --util::Status SentencePieceTrainer::Train( -+absl::Status SentencePieceTrainer::Train( - const std::unordered_map &kwargs, - SentenceIterator *sentence_iterator, std::string *serialized_model_proto) { - TrainerSpec trainer_spec; -@@ -198,7 +199,7 @@ util::Status SentencePieceTrainer::Train( - } - - // static --util::Status SentencePieceTrainer::PopulateNormalizerSpec( -+absl::Status SentencePieceTrainer::PopulateNormalizerSpec( - NormalizerSpec *normalizer_spec, bool is_denormalizer) { - CHECK_OR_RETURN(normalizer_spec); - -@@ -226,7 +227,7 @@ util::Status SentencePieceTrainer::PopulateNormalizerSpec( - } - - // static --util::Status SentencePieceTrainer::PopulateModelTypeFromString( -+absl::Status SentencePieceTrainer::PopulateModelTypeFromString( - absl::string_view type, TrainerSpec *spec) { - static const std::unordered_map - kModelTypeMap = {{"unigram", TrainerSpec::UNIGRAM}, -@@ -239,7 +240,7 @@ util::Status SentencePieceTrainer::PopulateModelTypeFromString( - return util::OkStatus(); - } - -- return util::StatusBuilder(util::StatusCode::kInternal, GTL_LOC) -+ return util::StatusBuilder(absl::StatusCode::kInternal, GTL_LOC) - << "\"" << type << "\" is not found in TrainerSpec"; - } - -@@ -248,7 +249,7 @@ const pretokenizer::PretokenizerForTrainingInterface *g_pretokenizer = nullptr; - } // namespace - - // static --util::Status SentencePieceTrainer::SetPretokenizerForTraining( -+absl::Status SentencePieceTrainer::SetPretokenizerForTraining( - const pretokenizer::PretokenizerForTrainingInterface *pretokenizer) { - g_pretokenizer = pretokenizer; - return util::OkStatus(); -diff --git a/src/sentencepiece_trainer.h b/src/sentencepiece_trainer.h -index bb74ab9..ec6cf93 100644 ---- a/src/sentencepiece_trainer.h -+++ b/src/sentencepiece_trainer.h -@@ -19,6 +19,7 @@ - #include - - #include "sentencepiece_processor.h" -+#include "absl/status/status.h" - - namespace sentencepiece { - -@@ -46,7 +47,7 @@ class SentenceIterator { - virtual bool done() const = 0; - virtual void Next() = 0; - virtual const std::string &value() const = 0; -- virtual util::Status status() const = 0; -+ virtual absl::Status status() const = 0; - }; - - class SentencePieceTrainer { -@@ -54,14 +55,14 @@ class SentencePieceTrainer { - // Trains SentencePiece model with `trainer_spec`. - // Default `normalizer_spec` is used. - // When `sentence_iterator` is passed, load sentences from the iterator. -- static util::Status Train(const TrainerSpec &trainer_spec, -+ static absl::Status Train(const TrainerSpec &trainer_spec, - SentenceIterator *sentence_iterator = nullptr, - std::string *serialized_model_proto = nullptr); - - // Trains SentencePiece model with `trainer_spec` and - // `normalizer_spec`. - // When `sentence_iterator` is passed, load sentences from the iterator. -- static util::Status Train(const TrainerSpec &trainer_spec, -+ static absl::Status Train(const TrainerSpec &trainer_spec, - const NormalizerSpec &normalizer_spec, - SentenceIterator *sentence_iterator = nullptr, - std::string *serialized_model_proto = nullptr); -@@ -69,7 +70,7 @@ class SentencePieceTrainer { - // Trains SentencePiece model with `trainer_spec`, `normalizer_spec` - // and `denormalizer_spec`. - // When `sentence_iterator` is passed, load sentences from the iterator. -- static util::Status Train(const TrainerSpec &trainer_spec, -+ static absl::Status Train(const TrainerSpec &trainer_spec, - const NormalizerSpec &normalizer_spec, - const NormalizerSpec &denormalizer_spec, - SentenceIterator *sentence_iterator = nullptr, -@@ -78,13 +79,13 @@ class SentencePieceTrainer { - // e.g., - // '--input=data --model_prefix=m --vocab_size=8192 model_type=unigram' - // When `sentence_iterator` is passed, load sentences from the iterator. -- static util::Status Train(absl::string_view args, -+ static absl::Status Train(absl::string_view args, - SentenceIterator *sentence_iterator = nullptr, - std::string *serialized_model_proto = nullptr); - - // Trains SentencePiece model with mapin `kwargs`. - // e.g., {{"input", "data"}, {"model_prefix, "m"}, {"vocab_size", "8192"}...} -- static util::Status Train( -+ static absl::Status Train( - const std::unordered_map &kwargs, - SentenceIterator *sentence_iterator = nullptr, - std::string *serialized_model_proto = nullptr); -@@ -96,19 +97,19 @@ class SentencePieceTrainer { - - // Populates necessary fields (precompiled_charmap) from - // `NormalizerSpec::name` or `NormalizerSpec::normalization_rule_tsv`. -- static util::Status PopulateNormalizerSpec(NormalizerSpec *normalizer_spec, -+ static absl::Status PopulateNormalizerSpec(NormalizerSpec *normalizer_spec, - bool is_denormalizer = false); - - // Overrides `trainer_spec`, `normalizer_spec`, `denormalizer_spec` with the - // std::unordered_map in `kargs`. -- static util::Status MergeSpecsFromArgs( -+ static absl::Status MergeSpecsFromArgs( - const std::unordered_map &kwargs, - TrainerSpec *trainer_spec, NormalizerSpec *normalizer_spec, - NormalizerSpec *denormalizer_spec); - - // Overrides `trainer_spec`, `normalizer_spec`, `denormalizer_spec` with the - // command line flags in `args`. -- static util::Status MergeSpecsFromArgs(absl::string_view args, -+ static absl::Status MergeSpecsFromArgs(absl::string_view args, - TrainerSpec *trainer_spec, - NormalizerSpec *normalizer_spec, - NormalizerSpec *denormalizer_spec); -@@ -116,7 +117,7 @@ class SentencePieceTrainer { - // Injects global pre-tokenizer that are applied in training time. - // Pretokenizer is only used for extracting pieces. - // TODO(taku): It would be better to inject per `trainer_spec`. -- static util::Status SetPretokenizerForTraining( -+ static absl::Status SetPretokenizerForTraining( - const pretokenizer::PretokenizerForTrainingInterface *pretokenizer); - - // Returns the current pretokenizer. if no pretokenizer is defined, returns -@@ -129,17 +130,17 @@ class SentencePieceTrainer { - // with comma-separated values. `field_name` must not be a nested message. - // The body of these functions are automatically generated with - // data/gen_spec_parser.pl -- static util::Status SetProtoField(const std::string &name, -+ static absl::Status SetProtoField(const std::string &name, - const std::string &value, - TrainerSpec *message); - -- static util::Status SetProtoField(const std::string &name, -+ static absl::Status SetProtoField(const std::string &name, - const std::string &value, - NormalizerSpec *message); - - // Populates model type from string representation, e.g., "bpe". - // Supported model: "unigram", "bpe", "word", "char". -- static util::Status PopulateModelTypeFromString(absl::string_view type, -+ static absl::Status PopulateModelTypeFromString(absl::string_view type, - TrainerSpec *trainer_spec); - - private: diff --git a/src/sentencepiece_trainer_test.cc b/src/sentencepiece_trainer_test.cc -index e44e66b..00c8d08 100644 +index 80f92c0..092f9c9 100644 --- a/src/sentencepiece_trainer_test.cc +++ b/src/sentencepiece_trainer_test.cc -@@ -16,7 +16,8 @@ +@@ -17,7 +17,7 @@ + #include "filesystem.h" #include "sentencepiece_model.pb.h" - #include "sentencepiece_trainer.h" #include "testharness.h" -#include "third_party/absl/strings/str_cat.h" +#include "absl/strings/str_cat.h" -+#include "absl/status/status.h" #include "util.h" namespace sentencepiece { -@@ -109,7 +110,7 @@ TEST(SentencePieceTrainerTest, TrainFromIterator) { - bool done() const override { return idx_ == vec_.size(); } - void Next() override { ++idx_; } - const std::string &value() const override { return vec_[idx_]; } -- util::Status status() const override { return util::OkStatus(); } -+ absl::Status status() const override { return util::OkStatus(); } - - private: - std::vector vec_; diff --git a/src/spec_parser.h b/src/spec_parser.h -index 2c5a95b..259c45d 100644 +index 724d11b..ea0b5ee 100644 --- a/src/spec_parser.h +++ b/src/spec_parser.h -@@ -19,8 +19,9 @@ +@@ -19,8 +19,8 @@ #include #include "sentencepiece_processor.h" @@ -1601,89 +421,11 @@ index 2c5a95b..259c45d 100644 -#include "third_party/absl/strings/str_split.h" +#include "absl/strings/ascii.h" +#include "absl/strings/str_split.h" -+#include "absl/status/status.h" #include "util.h" namespace sentencepiece { -@@ -49,7 +50,7 @@ namespace sentencepiece { - if (name == #param_name) { \ - int32 v; \ - if (!string_util::lexical_cast(value, &v)) \ -- return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \ -+ return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \ - << "cannot parse \"" << value << "\" as int."; \ - message->set_##param_name(v); \ - return util::OkStatus(); \ -@@ -59,7 +60,7 @@ namespace sentencepiece { - if (name == #param_name) { \ - uint64 v; \ - if (!string_util::lexical_cast(value, &v)) \ -- return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \ -+ return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \ - << "cannot parse \"" << value << "\" as int."; \ - message->set_##param_name(v); \ - return util::OkStatus(); \ -@@ -69,7 +70,7 @@ namespace sentencepiece { - if (name == #param_name) { \ - double v; \ - if (!string_util::lexical_cast(value, &v)) \ -- return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \ -+ return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \ - << "cannot parse \"" << value << "\" as int."; \ - message->set_##param_name(v); \ - return util::OkStatus(); \ -@@ -79,7 +80,7 @@ namespace sentencepiece { - if (name == #param_name) { \ - bool v; \ - if (!string_util::lexical_cast(value.empty() ? "true" : value, &v)) \ -- return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \ -+ return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \ - << "cannot parse \"" << value << "\" as bool."; \ - message->set_##param_name(v); \ - return util::OkStatus(); \ -@@ -89,7 +90,7 @@ namespace sentencepiece { - if (name == #param_name) { \ - const auto it = map_name.find(absl::AsciiStrToUpper(value)); \ - if (it == map_name.end()) \ -- return util::StatusBuilder(util::StatusCode::kInvalidArgument, GTL_LOC) \ -+ return util::StatusBuilder(absl::StatusCode::kInvalidArgument, GTL_LOC) \ - << "unknown enumeration value of \"" << value << "\" as " \ - << #map_name; \ - message->set_##param_name(it->second); \ -@@ -186,7 +187,7 @@ inline std::string PrintProto(const NormalizerSpec &message, - return os.str(); - } - --util::Status SentencePieceTrainer::SetProtoField(const std::string &name, -+absl::Status SentencePieceTrainer::SetProtoField(const std::string &name, - const std::string &value, - TrainerSpec *message) { - CHECK_OR_RETURN(message); -@@ -239,11 +240,11 @@ util::Status SentencePieceTrainer::SetProtoField(const std::string &name, - PARSE_STRING(pad_piece); - PARSE_STRING(unk_surface); - -- return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC) -+ return util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC) - << "unknown field name \"" << name << "\" in TrainerSpec."; - } - --util::Status SentencePieceTrainer::SetProtoField(const std::string &name, -+absl::Status SentencePieceTrainer::SetProtoField(const std::string &name, - const std::string &value, - NormalizerSpec *message) { - CHECK_OR_RETURN(message); -@@ -255,7 +256,7 @@ util::Status SentencePieceTrainer::SetProtoField(const std::string &name, - PARSE_BOOL(escape_whitespaces); - PARSE_STRING(normalization_rule_tsv); - -- return util::StatusBuilder(util::StatusCode::kNotFound, GTL_LOC) -+ return util::StatusBuilder(absl::StatusCode::kNotFound, GTL_LOC) - << "unknown field name \"" << name << "\" in NormalizerSpec."; - } - diff --git a/src/spm_decode_main.cc b/src/spm_decode_main.cc -index 3382ddc..9dda65c 100644 +index bc49bd3..ed80939 100644 --- a/src/spm_decode_main.cc +++ b/src/spm_decode_main.cc @@ -21,8 +21,8 @@ @@ -1698,7 +440,7 @@ index 3382ddc..9dda65c 100644 ABSL_FLAG(std::string, model, "", "model file name"); diff --git a/src/spm_encode_main.cc b/src/spm_encode_main.cc -index 4d12a38..29b7458 100644 +index 2fbb850..7f93be0 100644 --- a/src/spm_encode_main.cc +++ b/src/spm_encode_main.cc @@ -21,10 +21,10 @@ @@ -1717,10 +459,10 @@ index 4d12a38..29b7458 100644 ABSL_FLAG(std::string, model, "", "model file name"); diff --git a/src/spm_export_vocab_main.cc b/src/spm_export_vocab_main.cc -index b5d93cb..70a65c1 100644 +index e5b97df..480c639 100644 --- a/src/spm_export_vocab_main.cc +++ b/src/spm_export_vocab_main.cc -@@ -20,7 +20,7 @@ +@@ -19,7 +19,7 @@ #include "init.h" #include "sentencepiece_model.pb.h" #include "sentencepiece_processor.h" @@ -1730,7 +472,7 @@ index b5d93cb..70a65c1 100644 ABSL_FLAG(std::string, output, "", "Output filename"); ABSL_FLAG(std::string, model, "", "input model file name"); diff --git a/src/spm_normalize_main.cc b/src/spm_normalize_main.cc -index 96da360..8c541b8 100644 +index 39f3ef9..b2b7562 100644 --- a/src/spm_normalize_main.cc +++ b/src/spm_normalize_main.cc @@ -21,7 +21,7 @@ @@ -1743,7 +485,7 @@ index 96da360..8c541b8 100644 ABSL_FLAG(std::string, model, "", "Model file name"); ABSL_FLAG(bool, use_internal_normalization, false, diff --git a/src/spm_train_main.cc b/src/spm_train_main.cc -index baf8dbf..ba1e811 100644 +index a8d4b20..3ca6f69 100644 --- a/src/spm_train_main.cc +++ b/src/spm_train_main.cc @@ -18,10 +18,10 @@ @@ -1775,7 +517,7 @@ index f6b1efe..daf2d14 100644 namespace sentencepiece { diff --git a/src/testharness.h b/src/testharness.h -index 9879b06..98317ad 100644 +index 4e2fa88..327b040 100644 --- a/src/testharness.h +++ b/src/testharness.h @@ -21,9 +21,9 @@ @@ -1791,217 +533,47 @@ index 9879b06..98317ad 100644 ABSL_DECLARE_FLAG(std::string, test_tmpdir); ABSL_DECLARE_FLAG(std::string, test_srcdir); -diff --git a/src/trainer_factory.cc b/src/trainer_factory.cc -index d1d2541..ff594d0 100644 ---- a/src/trainer_factory.cc -+++ b/src/trainer_factory.cc -@@ -14,7 +14,7 @@ - - #include "bpe_model_trainer.h" - #include "char_model_trainer.h" --#include "third_party/absl/memory/memory.h" -+#include "absl/memory/memory.h" - #include "trainer_factory.h" - #include "unigram_model_trainer.h" - #include "word_model_trainer.h" diff --git a/src/trainer_interface.cc b/src/trainer_interface.cc -index a3a4b74..e6a2587 100644 +index 45b3ce0..43ad436 100644 --- a/src/trainer_interface.cc +++ b/src/trainer_interface.cc -@@ -26,13 +26,14 @@ +@@ -28,12 +28,12 @@ #include "normalizer.h" #include "sentencepiece_processor.h" #include "sentencepiece_trainer.h" -#include "third_party/absl/container/flat_hash_map.h" --#include "third_party/absl/memory/memory.h" -#include "third_party/absl/strings/numbers.h" -#include "third_party/absl/strings/str_cat.h" -#include "third_party/absl/strings/str_format.h" -#include "third_party/absl/strings/str_join.h" -#include "third_party/absl/strings/str_split.h" +#include "absl/container/flat_hash_map.h" -+#include "absl/memory/memory.h" +#include "absl/strings/numbers.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" +#include "absl/strings/str_join.h" +#include "absl/strings/str_split.h" -+#include "absl/status/status.h" - #include "trainer_interface.h" #include "unicode_script.h" #include "util.h" -@@ -49,7 +50,7 @@ const char32 TrainerInterface::kUPPBoundaryChar = L'\u0009'; - const char TrainerInterface::kUPPBoundaryStr[] = "\t"; - - namespace { --util::Status VerifySpec(const TrainerSpec &trainer_spec) { -+absl::Status VerifySpec(const TrainerSpec &trainer_spec) { - CHECK_GT_OR_RETURN(trainer_spec.vocab_size(), 0); - - if (trainer_spec.model_type() == TrainerSpec::UNIGRAM || -@@ -164,7 +165,7 @@ bool MultiFileSentenceIterator::done() const { - return (!read_done_ && file_index_ == files_.size()); - } - --util::Status MultiFileSentenceIterator::status() const { -+absl::Status MultiFileSentenceIterator::status() const { - CHECK_OR_RETURN(fp_); - return fp_->status(); - } -@@ -296,7 +297,7 @@ bool TrainerInterface::IsValidSentencePiece( - return true; - } - --util::Status TrainerInterface::LoadSentences() { -+absl::Status TrainerInterface::LoadSentences() { - RETURN_IF_ERROR(status()); - CHECK_OR_RETURN(sentences_.empty()); - CHECK_OR_RETURN(required_chars_.empty()); -@@ -537,7 +538,7 @@ void TrainerInterface::SplitSentencesByWhitespace() { - LOG(INFO) << "Done! " << sentences_.size(); - } - --util::Status TrainerInterface::Serialize(ModelProto *model_proto) const { -+absl::Status TrainerInterface::Serialize(ModelProto *model_proto) const { - RETURN_IF_ERROR(status()); - - // Duplicated sentencepiece is not allowed. -@@ -611,7 +612,7 @@ util::Status TrainerInterface::Serialize(ModelProto *model_proto) const { - return util::OkStatus(); - } - --util::Status TrainerInterface::SaveModel(absl::string_view filename) const { -+absl::Status TrainerInterface::SaveModel(absl::string_view filename) const { - LOG(INFO) << "Saving model: " << filename; - ModelProto model_proto; - RETURN_IF_ERROR(Serialize(&model_proto)); -@@ -622,7 +623,7 @@ util::Status TrainerInterface::SaveModel(absl::string_view filename) const { - return util::OkStatus(); - } - --util::Status TrainerInterface::SaveVocab(absl::string_view filename) const { -+absl::Status TrainerInterface::SaveVocab(absl::string_view filename) const { - LOG(INFO) << "Saving vocabs: " << filename; - ModelProto model_proto; - RETURN_IF_ERROR(Serialize(&model_proto)); -@@ -644,7 +645,7 @@ util::Status TrainerInterface::SaveVocab(absl::string_view filename) const { - return util::OkStatus(); - } - --util::Status TrainerInterface::Save() const { -+absl::Status TrainerInterface::Save() const { - if (output_model_proto_) { - RETURN_IF_ERROR(Serialize(output_model_proto_)); - } else { -@@ -654,7 +655,7 @@ util::Status TrainerInterface::Save() const { - return util::OkStatus(); - } - --util::Status TrainerInterface::InitMetaPieces() { -+absl::Status TrainerInterface::InitMetaPieces() { - CHECK_OR_RETURN(meta_pieces_.empty()); - bool has_unk = false; diff --git a/src/trainer_interface.h b/src/trainer_interface.h -index f66d59a..b4fbc7b 100644 +index 8d625a9..0c003b6 100644 --- a/src/trainer_interface.h +++ b/src/trainer_interface.h -@@ -27,7 +27,8 @@ +@@ -27,7 +27,7 @@ #include "sentencepiece_model.pb.h" #include "sentencepiece_processor.h" #include "sentencepiece_trainer.h" -#include "third_party/absl/container/flat_hash_map.h" +#include "absl/container/flat_hash_map.h" -+#include "absl/status/status.h" #include "util.h" namespace sentencepiece { -@@ -57,7 +58,7 @@ class MultiFileSentenceIterator : public SentenceIterator { - bool done() const override; - void Next() override; - const std::string &value() const override { return value_; } -- util::Status status() const override; -+ absl::Status status() const override; - - private: - void TryRead(); -@@ -90,16 +91,16 @@ class TrainerInterface { - - // Loads sentence from `sentence_iterator` and stores the model - // to `output_model_proto`. -- virtual util::Status Train(SentenceIterator *sentence_iterator, -+ virtual absl::Status Train(SentenceIterator *sentence_iterator, - ModelProto *output_model_proto) { - sentence_iterator_ = sentence_iterator; - output_model_proto_ = output_model_proto; - return Train(); - } - -- virtual util::Status Train() { return status(); } -+ virtual absl::Status Train() { return status(); } - -- virtual util::Status status() const { return status_; } -+ virtual absl::Status status() const { return status_; } - - FRIEND_TEST(TrainerInterfaceTest, IsValidSentencePieceTest); - FRIEND_TEST(TrainerInterfaceTest, OverrideSpecialPiecesTest); -@@ -115,7 +116,7 @@ class TrainerInterface { - - // Loads all sentences from spec.input() or SentenceIterator. - // It loads at most input_sentence_size sentences. -- util::Status LoadSentences(); -+ absl::Status LoadSentences(); - - // Splits all sentencecs by whitespaces and - // replace the |sentences_| with tokenized string. -@@ -125,7 +126,7 @@ class TrainerInterface { - void SplitSentencesByWhitespace(); - - // Save model files into spec.model_prefix(). -- util::Status Save() const; -+ absl::Status Save() const; - - // Set of characters which must be included in the final vocab. - // The value of this map stores the frequency. -@@ -152,7 +153,7 @@ class TrainerInterface { - meta_pieces_; - - // Detect errors on initialization. -- util::Status status_; -+ absl::Status status_; - - // Loads sentences from SentenceIterator if not null. - SentenceIterator *sentence_iterator_ = nullptr; -@@ -162,19 +163,19 @@ class TrainerInterface { - - private: - // Serialize final_pieces_ to |model_proto|. -- util::Status Serialize(ModelProto *model_proto) const; -+ absl::Status Serialize(ModelProto *model_proto) const; - - // Saves the best sentence split with the current model for debugging. -- util::Status SaveSplits(absl::string_view filename) const; -+ absl::Status SaveSplits(absl::string_view filename) const; - - // Saves model file. -- util::Status SaveModel(absl::string_view filename) const; -+ absl::Status SaveModel(absl::string_view filename) const; - - // Saves vocabulary file for NMT. -- util::Status SaveVocab(absl::string_view filename) const; -+ absl::Status SaveVocab(absl::string_view filename) const; - - // Initializes `meta_pieces_` from TrainerSpec. -- util::Status InitMetaPieces(); -+ absl::Status InitMetaPieces(); - - // Randomly sampled raw sentences for self-testing. - std::vector self_test_samples_; diff --git a/src/trainer_interface_test.cc b/src/trainer_interface_test.cc -index 70a51ad..d7f3f0c 100644 +index feb970f..ce22bac 100644 --- a/src/trainer_interface_test.cc +++ b/src/trainer_interface_test.cc -@@ -16,8 +16,8 @@ +@@ -18,8 +18,8 @@ #include "filesystem.h" #include "testharness.h" @@ -2009,9 +581,9 @@ index 70a51ad..d7f3f0c 100644 -#include "third_party/absl/strings/str_format.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" - #include "trainer_interface.h" #include "util.h" + namespace sentencepiece { diff --git a/src/unicode_script.cc b/src/unicode_script.cc index 583dc30..11b24dc 100644 --- a/src/unicode_script.cc @@ -2052,24 +624,24 @@ index ab33565..e0b1c4d 100644 #include "util.h" diff --git a/src/unigram_model.cc b/src/unigram_model.cc -index 3b99060..9c72fb9 100644 +index 13f15c8..8863c7c 100644 --- a/src/unigram_model.cc +++ b/src/unigram_model.cc -@@ -22,9 +22,9 @@ +@@ -24,9 +24,9 @@ #include #include --#include "third_party/absl/memory/memory.h" +-#include "third_party/absl/container/flat_hash_map.h" -#include "third_party/absl/strings/str_split.h" -#include "third_party/absl/strings/string_view.h" -+#include "absl/memory/memory.h" ++#include "absl/container/flat_hash_map.h" +#include "absl/strings/str_split.h" +#include "absl/strings/string_view.h" - #include "unigram_model.h" #include "util.h" + namespace sentencepiece { diff --git a/src/unigram_model.h b/src/unigram_model.h -index 448e489..9062f12 100644 +index aa4f28f..655c8c3 100644 --- a/src/unigram_model.h +++ b/src/unigram_model.h @@ -24,7 +24,7 @@ @@ -2082,7 +654,7 @@ index 448e489..9062f12 100644 namespace sentencepiece { namespace unigram { diff --git a/src/unigram_model_test.cc b/src/unigram_model_test.cc -index f93b21c..808e907 100644 +index bf22da3..21cbec3 100644 --- a/src/unigram_model_test.cc +++ b/src/unigram_model_test.cc @@ -22,8 +22,8 @@ @@ -2097,58 +669,42 @@ index f93b21c..808e907 100644 namespace sentencepiece { diff --git a/src/unigram_model_trainer.cc b/src/unigram_model_trainer.cc -index 9615040..7d16bd2 100644 +index 6badade..079a2e2 100644 --- a/src/unigram_model_trainer.cc +++ b/src/unigram_model_trainer.cc -@@ -25,8 +25,9 @@ +@@ -28,10 +28,10 @@ #include "normalizer.h" #include "pretokenizer_for_training.h" #include "sentencepiece_trainer.h" -#include "third_party/absl/container/flat_hash_map.h" --#include "third_party/absl/memory/memory.h" +-#include "third_party/absl/strings/numbers.h" +-#include "third_party/absl/strings/str_replace.h" +-#include "third_party/absl/strings/str_split.h" +#include "absl/container/flat_hash_map.h" -+#include "absl/memory/memory.h" -+#include "absl/status/status.h" ++#include "absl/strings/numbers.h" ++#include "absl/strings/str_replace.h" ++#include "absl/strings/str_split.h" #include "third_party/esaxx/esa.hxx" // Suffix array library. + #include "trainer_interface.h" #include "unicode_script.h" - #include "unigram_model_trainer.h" -@@ -463,7 +464,7 @@ TrainerModel::SentencePieces Trainer::FinalizeSentencePieces( - return Sorted(final_sentencepieces); - } - --util::Status Trainer::Train() { -+absl::Status Trainer::Train() { - RETURN_IF_ERROR(status()); - - CHECK_EQ_OR_RETURN(TrainerSpec::UNIGRAM, trainer_spec_.model_type()); diff --git a/src/unigram_model_trainer.h b/src/unigram_model_trainer.h -index 91fbeb4..d41967d 100644 +index c6562e6..2d24eeb 100644 --- a/src/unigram_model_trainer.h +++ b/src/unigram_model_trainer.h -@@ -21,7 +21,8 @@ +@@ -21,7 +21,7 @@ #include #include "sentencepiece_model.pb.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/strings/string_view.h" -+#include "absl/status/status.h" #include "trainer_interface.h" #include "unigram_model.h" #include "util.h" -@@ -68,7 +69,7 @@ class Trainer : public TrainerInterface { - : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec, - denormalizer_spec) {} - -- util::Status Train() override; -+ absl::Status Train() override; - - private: - FRIEND_TEST(TrainerTest, IsValidSentencePieceTest); diff --git a/src/unigram_model_trainer_test.cc b/src/unigram_model_trainer_test.cc -index ffe515e..fdb25f6 100644 +index 31da90b..d7198da 100644 --- a/src/unigram_model_trainer_test.cc +++ b/src/unigram_model_trainer_test.cc -@@ -16,8 +16,8 @@ +@@ -22,8 +22,8 @@ #include "sentencepiece_processor.h" #include "sentencepiece_trainer.h" #include "testharness.h" @@ -2156,131 +712,40 @@ index ffe515e..fdb25f6 100644 -#include "third_party/absl/strings/str_join.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_join.h" - #include "unigram_model_trainer.h" #include "util.h" + namespace sentencepiece { diff --git a/src/util.h b/src/util.h -index 0d15863..7122c7c 100644 +index cd84327..2e989e0 100644 --- a/src/util.h +++ b/src/util.h -@@ -30,7 +30,8 @@ +@@ -30,7 +30,7 @@ #include "common.h" #include "sentencepiece_processor.h" -#include "third_party/absl/strings/string_view.h" +#include "absl/strings/string_view.h" -+#include "absl/status/status.h" #ifdef SPM_NO_THREADLOCAL #include -@@ -359,14 +360,14 @@ std::string StrError(int errnum); - - std::vector StrSplitAsCSV(absl::string_view text); - --inline Status OkStatus() { return Status(); } -+inline absl::Status OkStatus() { return absl::Status(); } - - #define DECLARE_ERROR(FUNC) \ -- inline util::Status FUNC##Error(absl::string_view str) { \ -- return util::Status(StatusCode::k##FUNC, str.data()); \ -+ inline absl::Status FUNC##Error(absl::string_view str) { \ -+ return absl::Status(absl::StatusCode::k##FUNC, str.data()); \ - } \ -- inline bool Is##FUNC(const util::Status &status) { \ -- return status.code() == StatusCode::k##FUNC; \ -+ inline bool Is##FUNC(const absl::Status &status) { \ -+ return status.code() ==absl::StatusCode::k##FUNC; \ - } - - DECLARE_ERROR(Cancelled) -@@ -390,8 +391,8 @@ DECLARE_ERROR(Unauthenticated) - - class StatusBuilder { - public: -- explicit StatusBuilder(StatusCode code) : code_(code) {} -- explicit StatusBuilder(StatusCode code, int loc) : code_(code) {} -+ explicit StatusBuilder(absl::StatusCode code) : code_(code) {} -+ explicit StatusBuilder(absl::StatusCode code, int loc) : code_(code) {} - - template - StatusBuilder &operator<<(const T &value) { -@@ -399,10 +400,10 @@ class StatusBuilder { - return *this; - } - -- operator Status() const { return Status(code_, os_.str()); } -+ operator absl::Status() const { return absl::Status(code_, os_.str()); } - - private: -- StatusCode code_; -+ absl::StatusCode code_; - std::ostringstream os_; - }; - -@@ -410,7 +411,7 @@ class StatusBuilder { - if (condition) { \ - } else /* NOLINT */ \ - return ::sentencepiece::util::StatusBuilder( \ -- ::sentencepiece::util::StatusCode::kInternal) \ -+ ::absl::StatusCode::kInternal) \ - << __FILE__ << "(" << __LINE__ << ") [" << #condition << "] " - - #define CHECK_EQ_OR_RETURN(a, b) CHECK_OR_RETURN((a) == (b)) diff --git a/src/util_test.cc b/src/util_test.cc -index 71d006f..67290dc 100644 +index 71d006f..231fc96 100644 --- a/src/util_test.cc +++ b/src/util_test.cc -@@ -16,7 +16,8 @@ +@@ -16,7 +16,7 @@ #include "filesystem.h" #include "testharness.h" -#include "third_party/absl/strings/str_cat.h" +#include "absl/strings/str_cat.h" -+#include "absl/status/status.h" #include "util.h" namespace sentencepiece { -@@ -376,27 +377,27 @@ TEST(UtilTest, STLDeleteELementsTest) { - } - - TEST(UtilTest, StatusTest) { -- const util::Status ok; -+ const absl::Status ok; - EXPECT_TRUE(ok.ok()); -- EXPECT_EQ(util::StatusCode::kOk, ok.code()); -+ EXPECT_EQ(absl::StatusCode::kOk, ok.code()); - EXPECT_EQ(std::string(""), ok.message()); - -- const util::Status s1(util::StatusCode::kUnknown, "unknown"); -- const util::Status s2(util::StatusCode::kUnknown, std::string("unknown")); -+ const absl::Status s1(absl::StatusCode::kUnknown, "unknown"); -+ const absl::Status s2(absl::StatusCode::kUnknown, std::string("unknown")); - -- EXPECT_EQ(util::StatusCode::kUnknown, s1.code()); -- EXPECT_EQ(util::StatusCode::kUnknown, s2.code()); -+ EXPECT_EQ(absl::StatusCode::kUnknown, s1.code()); -+ EXPECT_EQ(absl::StatusCode::kUnknown, s2.code()); - EXPECT_EQ(std::string("unknown"), s1.message()); - EXPECT_EQ(std::string("unknown"), s2.message()); - - auto ok2 = util::OkStatus(); - EXPECT_TRUE(ok2.ok()); -- EXPECT_EQ(util::StatusCode::kOk, ok2.code()); -+ EXPECT_EQ(absl::StatusCode::kOk, ok2.code()); - EXPECT_EQ(std::string(""), ok2.message()); - - util::OkStatus().IgnoreError(); - for (int i = 1; i <= 16; ++i) { -- util::Status s(static_cast(i), "message"); -+ absl::Status s(static_cast(i), "message"); - EXPECT_TRUE(s.ToString().find("message") != std::string::npos) - << s.ToString(); - } diff --git a/src/word_model_trainer.cc b/src/word_model_trainer.cc -index 0b8b062..bc1f86b 100644 +index 0b8b062..b057843 100644 --- a/src/word_model_trainer.cc +++ b/src/word_model_trainer.cc -@@ -15,8 +15,9 @@ +@@ -15,8 +15,8 @@ #include #include @@ -2288,40 +753,9 @@ index 0b8b062..bc1f86b 100644 -#include "third_party/absl/strings/string_view.h" +#include "absl/container/flat_hash_map.h" +#include "absl/strings/string_view.h" -+#include "absl/status/status.h" #include "util.h" #include "word_model.h" #include "word_model_trainer.h" -@@ -24,7 +25,7 @@ - namespace sentencepiece { - namespace word { - --util::Status Trainer::Train() { -+absl::Status Trainer::Train() { - RETURN_IF_ERROR(status()); - - CHECK_OR_RETURN(normalizer_spec_.escape_whitespaces()); -diff --git a/src/word_model_trainer.h b/src/word_model_trainer.h -index 76f8f32..436e595 100644 ---- a/src/word_model_trainer.h -+++ b/src/word_model_trainer.h -@@ -17,6 +17,7 @@ - - #include "sentencepiece_model.pb.h" - #include "trainer_interface.h" -+#include "absl/status/status.h" - - namespace sentencepiece { - namespace word { -@@ -34,7 +35,7 @@ class Trainer : public TrainerInterface { - : TrainerInterface::TrainerInterface(trainer_spec, normalizer_spec, - denormalizer_spec) {} - -- util::Status Train() override; -+ absl::Status Train() override; - }; - } // namespace word - } // namespace sentencepiece diff --git a/src/word_model_trainer_test.cc b/src/word_model_trainer_test.cc index c4a8bc6..366810f 100644 --- a/src/word_model_trainer_test.cc @@ -2336,4 +770,4 @@ index c4a8bc6..366810f 100644 +#include "absl/strings/str_join.h" #include "util.h" #include "word_model_trainer.h" - \ No newline at end of file +