From 0eefedf8cc3d01e5608ff42b414b2eb127d64fab Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Wed, 24 Mar 2021 09:13:01 +0000 Subject: [PATCH 001/135] Dynamic swap working, as long as the vocabularies are the same --- src/CMakeLists.txt | 7 +- src/command/marian_swapper.cpp | 140 +++++++++++++++++++++++++++++++++ src/common/config_parser.cpp | 3 +- src/graph/expression_graph.h | 12 +++ src/graph/parameters.h | 3 +- src/tensors/gpu/swap.cu | 10 +++ src/tensors/gpu/swap.h | 6 ++ 7 files changed, 178 insertions(+), 3 deletions(-) create mode 100644 src/command/marian_swapper.cpp create mode 100644 src/tensors/gpu/swap.cu create mode 100644 src/tensors/gpu/swap.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 64b86a695..397a0330f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -182,6 +182,7 @@ if(CUDA_FOUND) tensors/gpu/add_all.cu tensors/gpu/tensor_operators.cu tensors/gpu/cudnn_wrappers.cu + tensors/gpu/swap.cu translator/nth_element.cu translator/helpers.cu STATIC) @@ -213,6 +214,10 @@ if (NOT COMPILE_LIBRARY_ONLY) set_target_properties(marian_decoder PROPERTIES OUTPUT_NAME marian-decoder) target_compile_options(marian_decoder PRIVATE ${ALL_WARNINGS}) + add_executable(marian_swapper command/marian_swapper.cpp) + set_target_properties(marian_swapper PROPERTIES OUTPUT_NAME marian_swapper) + target_compile_options(marian_swapper PRIVATE ${ALL_WARNINGS}) + add_executable(marian_scorer command/marian_scorer.cpp) set_target_properties(marian_scorer PROPERTIES OUTPUT_NAME marian-scorer) target_compile_options(marian_scorer PRIVATE ${ALL_WARNINGS}) @@ -225,7 +230,7 @@ if (NOT COMPILE_LIBRARY_ONLY) set_target_properties(marian_conv PROPERTIES OUTPUT_NAME marian-conv) target_compile_options(marian_conv PRIVATE ${ALL_WARNINGS}) - set(EXECUTABLES ${EXECUTABLES} marian_train marian_decoder marian_scorer marian_vocab marian_conv) + set(EXECUTABLES ${EXECUTABLES} marian_train marian_decoder marian_swapper marian_scorer marian_vocab marian_conv) # marian.zip and marian.tgz # This combines marian, marian_decoder in a single ZIP or TAR file for diff --git a/src/command/marian_swapper.cpp b/src/command/marian_swapper.cpp new file mode 100644 index 000000000..3b9a4f75c --- /dev/null +++ b/src/command/marian_swapper.cpp @@ -0,0 +1,140 @@ +#include "marian.h" +#include "common/logging.h" +#include "data/corpus.h" +#include "data/text_input.h" +#include "translator/beam_search.h" +#include "translator/translator.h" +#include "common/io.h" +#include "common/timer.h" +#include +#include "tensors/gpu/swap.h" +namespace marian { +class SwapperTranslator { + private: + Ptr opts_; + Ptr graph_; + Ptr scorer_; + + std::vector> srcVocabs_; + Ptr trgVocab_; + + // Models to store model; + bool primary_ = true; + std::vector primaryModel_; + std::vector secondaryModel_; + + std::vector prepareItem(std::string path){ + std::vector ret = io::loadItems(path); + // Find the special element and remove it: + size_t special_idx = 0; + for (size_t i = 0; i < ret.size(); i++) { + if (ret[i].name == "special:model.yml") { + special_idx = i; + break; + } + } + ret.erase(ret.begin() + special_idx); + // Prepare the name so that it matches the named map + for (auto&& item : ret) { + item.name = "F0::" + item.name; + } + return ret; + } + + public: + SwapperTranslator(Ptr opt) : opts_(opt), + primaryModel_(prepareItem(opt->get>("models")[0])), + secondaryModel_(prepareItem(opt->get("swap-model"))) { + opts_->set("inference", true); + opts_->set("shuffle", "none"); + + // Get vocabs + auto vocabPaths = opts_->get>("vocabs"); + std::vector maxVocabs = opts_->get>("dim-vocabs"); + + for(size_t i = 0; i < vocabPaths.size() - 1; ++i) { + Ptr vocab = New(opts_, i); + vocab->load(vocabPaths[i], maxVocabs[i]); + srcVocabs_.emplace_back(vocab); + } + + trgVocab_ = New(opts_, vocabPaths.size() - 1); + trgVocab_->load(vocabPaths.back()); + + // get device IDs + auto devices = Config::getDevices(opts_); + auto numDevices = devices.size(); + std::cerr << "Num devices: " << numDevices << std::endl; + + // Create graph + graph_ = New(); + auto prec = opts_->get>("precision", {"float32"}); + graph_->setDefaultElementType(typeFromString(prec[0])); + graph_->setDevice(devices[0]); + graph_->reserveWorkspaceMB(opts_->get("workspace")); + scorer_ = createScorers(opts_)[0]; + scorer_->init(graph_); + graph_->forward(); + } + + void translateTxt(std::string txt) { + std::vector instr(1, txt); + auto corpus_ = New(instr, srcVocabs_, opts_); + data::BatchGenerator batchGenerator(corpus_, opts_, nullptr, false); + + static const std::vector > scorers(1, scorer_); + auto search = New(opts_, scorers, trgVocab_); + auto printer = New(opts_, trgVocab_); + static int i = 0; + for (auto&& batch : batchGenerator) { + auto histories = search->search(graph_, batch); + for(auto history : histories) { + std::stringstream best1; + std::stringstream bestn; + printer->print(history, best1, bestn); + LOG(info, "Translation {} : {}", i, best1.str()); + i++; + } + } + } + + void swapActual(std::vector& from) { + auto namedMap = graph_->getParamsNamedMap(); + for (auto&& item : from) { + auto to = reinterpret_cast(namedMap[item.name]->val()->memory()->data()); + swapper::copyCpuToGpu(to, &item.bytes[0], item.bytes.size()); + } + } + + void swap() { + timer::Timer timer; + if (primary_) { + swapActual(secondaryModel_); + primary_ = false; + } else { + swapActual(primaryModel_); + primary_ = true; + } + LOG(info, "Swap took: {:.8f}s wall", timer.elapsed()); + } +}; +} // namespace marian + +int main(int argc, char** argv) { + using namespace marian; + auto options = parseOptions(argc, argv, cli::mode::translation); + SwapperTranslator swapper(options); + + std::string line; + while (std::getline(std::cin, line)) { + if (line == "quit") { + break; + } else if (line == "swap") { + swapper.swap(); + } else { + swapper.translateTxt(line); + } + } + + return 0; +} diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index 602509c59..530fad69a 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -689,7 +689,8 @@ void ConfigParser::addOptionsTranslation(cli::CLIWrapper& cli) { cli.add>("--output-approx-knn", "Use approximate knn search in output layer (currently only in transformer)") ->implicit_val("100 1024"); - + cli.add("--swap-model", + "Path to model to swap to."); #if 0 // @TODO: Ask Hany if there are any decoding-time options // add ULR settings addSuboptionsULR(cli); diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h index adc0aeae9..2fa28f67b 100644 --- a/src/graph/expression_graph.h +++ b/src/graph/expression_graph.h @@ -232,6 +232,18 @@ class ExpressionGraph : public std::enable_shared_from_this { namespace_ = newNamespace; } + const std::unordered_map & getParamsNamedMap() const { + if (paramsByElementType_.size() != 1) { + ABORT("Expected exactly one parameter datatype, got", paramsByElementType_.size()); + } + for(auto&& kvParams : paramsByElementType_) { + auto cur_param = kvParams.second; + return cur_param->getMap(); + } + ABORT("We should never get here"); // Just to satisfy compiler warnings; + return paramsByElementType_.find(Type::float32)->second->getMap(); + } + /** * Copy all parameter objects from one graph to current graph. * @param graph a pointer to a graph object diff --git a/src/graph/parameters.h b/src/graph/parameters.h index 8b4af9dd5..d5ede0b4e 100644 --- a/src/graph/parameters.h +++ b/src/graph/parameters.h @@ -2,6 +2,7 @@ #include #include +#include #include #include "common/definitions.h" @@ -22,7 +23,7 @@ class Parameters { /** @brief List of all parameter nodes of this expression graph. */ std::vector params_; - std::map named_; + std::unordered_map named_; Ptr vals_; Ptr grads_; diff --git a/src/tensors/gpu/swap.cu b/src/tensors/gpu/swap.cu new file mode 100644 index 000000000..3eb2c0df8 --- /dev/null +++ b/src/tensors/gpu/swap.cu @@ -0,0 +1,10 @@ +#include "cuda_helpers.h" +void copyCpuToGpu(const char * in, char * gpuOut); + +namespace marian { + namespace swapper { + void copyCpuToGpu(char * gpuOut, const char * in, size_t count) { + CUDA_CHECK(cudaMemcpy(gpuOut, in, count, cudaMemcpyHostToDevice)); + } + } +} diff --git a/src/tensors/gpu/swap.h b/src/tensors/gpu/swap.h new file mode 100644 index 000000000..86b3094d7 --- /dev/null +++ b/src/tensors/gpu/swap.h @@ -0,0 +1,6 @@ +#include +namespace marian { + namespace swapper { + void copyCpuToGpu(char * gpuOut, const char * in, size_t count); + } +} From 521f6343cb9ea86549e15cd11d0d670dd1ddbc7c Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Sun, 28 Mar 2021 18:09:20 +0000 Subject: [PATCH 002/135] Model and GPUSlot separation, add vocab support --- src/command/marian_swapper.cpp | 218 +++++++++++++++++---------------- 1 file changed, 115 insertions(+), 103 deletions(-) diff --git a/src/command/marian_swapper.cpp b/src/command/marian_swapper.cpp index 3b9a4f75c..7c5e92abe 100644 --- a/src/command/marian_swapper.cpp +++ b/src/command/marian_swapper.cpp @@ -9,130 +9,142 @@ #include #include "tensors/gpu/swap.h" namespace marian { -class SwapperTranslator { - private: - Ptr opts_; - Ptr graph_; - Ptr scorer_; - - std::vector> srcVocabs_; - Ptr trgVocab_; - - // Models to store model; - bool primary_ = true; - std::vector primaryModel_; - std::vector secondaryModel_; - - std::vector prepareItem(std::string path){ - std::vector ret = io::loadItems(path); - // Find the special element and remove it: - size_t special_idx = 0; - for (size_t i = 0; i < ret.size(); i++) { - if (ret[i].name == "special:model.yml") { - special_idx = i; - break; - } - } - ret.erase(ret.begin() + special_idx); - // Prepare the name so that it matches the named map - for (auto&& item : ret) { - item.name = "F0::" + item.name; - } - return ret; - } - public: - SwapperTranslator(Ptr opt) : opts_(opt), - primaryModel_(prepareItem(opt->get>("models")[0])), - secondaryModel_(prepareItem(opt->get("swap-model"))) { - opts_->set("inference", true); - opts_->set("shuffle", "none"); - - // Get vocabs - auto vocabPaths = opts_->get>("vocabs"); - std::vector maxVocabs = opts_->get>("dim-vocabs"); - - for(size_t i = 0; i < vocabPaths.size() - 1; ++i) { - Ptr vocab = New(opts_, i); - vocab->load(vocabPaths[i], maxVocabs[i]); - srcVocabs_.emplace_back(vocab); +/* A model loaded on the CPU and possibly on a GPU */ +class Model { + private: + std::vector parameters_; + std::vector> srcVocabs_; + Ptr trgVocab_; + + public: + Model(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath) + : parameters_(io::loadItems(parameters)) { + // Load parameters. + // Find the special element and remove it: + size_t special_idx = 0; + for (size_t i = 0; i < parameters_.size(); i++) { + if (parameters_[i].name == "special:model.yml") { + special_idx = i; + break; } + } + parameters_.erase(parameters_.begin() + special_idx); + // Prepare the name so that it matches the named map + for (auto&& item : parameters_) { + item.name = "F0::" + item.name; + } - trgVocab_ = New(opts_, vocabPaths.size() - 1); - trgVocab_->load(vocabPaths.back()); + // Load source vocabs. + const std::vector &maxVocabs = options->get>("dim-vocabs"); + for(size_t i = 0; i < sourceVocabPaths.size(); ++i) { + Ptr vocab = New(options, i); + vocab->load(sourceVocabPaths[i], maxVocabs[i]); + srcVocabs_.emplace_back(vocab); + } - // get device IDs - auto devices = Config::getDevices(opts_); - auto numDevices = devices.size(); - std::cerr << "Num devices: " << numDevices << std::endl; - - // Create graph - graph_ = New(); - auto prec = opts_->get>("precision", {"float32"}); - graph_->setDefaultElementType(typeFromString(prec[0])); - graph_->setDevice(devices[0]); - graph_->reserveWorkspaceMB(opts_->get("workspace")); - scorer_ = createScorers(opts_)[0]; - scorer_->init(graph_); - graph_->forward(); + // Load target vocab. + trgVocab_ = New(options, sourceVocabPaths.size()); + trgVocab_->load(targetVocabPath); } - void translateTxt(std::string txt) { - std::vector instr(1, txt); - auto corpus_ = New(instr, srcVocabs_, opts_); - data::BatchGenerator batchGenerator(corpus_, opts_, nullptr, false); - - static const std::vector > scorers(1, scorer_); - auto search = New(opts_, scorers, trgVocab_); - auto printer = New(opts_, trgVocab_); - static int i = 0; - for (auto&& batch : batchGenerator) { - auto histories = search->search(graph_, batch); - for(auto history : histories) { - std::stringstream best1; - std::stringstream bestn; - printer->print(history, best1, bestn); - LOG(info, "Translation {} : {}", i, best1.str()); - i++; - } - } + const std::vector &Parameters() const { return parameters_; } + + const std::vector> &SrcVocabs() const { return srcVocabs_; } + + Ptr TrgVocab() const { return trgVocab_; } +}; + +/* Reserved space on a GPU with which to translate */ +class GPUSlot { + private: + Ptr options_; + Ptr graph_; + std::vector > scorers_; + + // Last model used for translation. Used to skip loading. + const Model *loadedModel_; + + void Load(const std::vector ¶meters) { + timer::Timer timer; + auto namedMap = graph_->getParamsNamedMap(); + for (auto&& item : parameters) { + auto to = reinterpret_cast(namedMap[item.name]->val()->memory()->data()); + swapper::copyCpuToGpu(to, &item.bytes[0], item.bytes.size()); + } + LOG(info, "Load took: {:.8f}s wall", timer.elapsed()); } - void swapActual(std::vector& from) { - auto namedMap = graph_->getParamsNamedMap(); - for (auto&& item : from) { - auto to = reinterpret_cast(namedMap[item.name]->val()->memory()->data()); - swapper::copyCpuToGpu(to, &item.bytes[0], item.bytes.size()); - } + public: + explicit GPUSlot(Ptr options) : options_(options), loadedModel_(nullptr) { + options_->set("inference", true); + options_->set("shuffle", "none"); + // get device IDs + auto devices = Config::getDevices(options_); + auto numDevices = devices.size(); + std::cerr << "Num devices: " << numDevices << std::endl; + + // Create graph + graph_ = New(); + auto prec = options_->get>("precision", {"float32"}); + graph_->setDefaultElementType(typeFromString(prec[0])); + graph_->setDevice(devices[0]); + graph_->reserveWorkspaceMB(options_->get("workspace")); + // TODO: multiple scorers. + Ptr scorer = createScorers(options_)[0]; + scorer->init(graph_); + scorers_.push_back(scorer); + graph_->forward(); } - void swap() { - timer::Timer timer; - if (primary_) { - swapActual(secondaryModel_); - primary_ = false; - } else { - swapActual(primaryModel_); - primary_ = true; + void Translate(const Model &model, const std::vector &input) { + if (loadedModel_ != &model) { + Load(model.Parameters()); + loadedModel_ = &model; + } + auto corpus = New(input, model.SrcVocabs(), options_); + data::BatchGenerator batchGenerator(corpus, options_, nullptr, false); + + auto search = New(options_, scorers_, model.TrgVocab()); + auto printer = New(options_, model.TrgVocab()); + for (auto&& batch : batchGenerator) { + auto histories = search->search(graph_, batch); + for(auto history : histories) { + std::stringstream best1; + std::stringstream bestn; + printer->print(history, best1, bestn); + LOG(info, "Translation {}", best1.str()); } - LOG(info, "Swap took: {:.8f}s wall", timer.elapsed()); + } } }; + } // namespace marian +/* Demo program */ int main(int argc, char** argv) { using namespace marian; - auto options = parseOptions(argc, argv, cli::mode::translation); - SwapperTranslator swapper(options); + Ptr options = parseOptions(argc, argv, cli::mode::translation); + GPUSlot slot(options); + Model pten(options, + "/home/ubuntu/consistent-big-models/padded/pten.npz", + {"/home/ubuntu/consistent-big-models/padded/pten.vocab"}, + "/home/ubuntu/consistent-big-models/padded/pten.vocab"); + + Model enit(options, + "/home/ubuntu/consistent-big-models/padded/enit.npz", + {"/home/ubuntu/consistent-big-models/padded/enit.vocab"}, + "/home/ubuntu/consistent-big-models/padded/enit.vocab"); + const Model *model = &pten; std::string line; while (std::getline(std::cin, line)) { - if (line == "quit") { - break; - } else if (line == "swap") { - swapper.swap(); + if (line == " TRANSLATE PTEN") { + model = &pten; + } else if (line == " TRANSLATE ENIT") { + model = &enit; } else { - swapper.translateTxt(line); + slot.Translate(*model, {line}); } } From 67190dba9a6bbd1539d5a47c3439cd442058c84e Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Sun, 28 Mar 2021 18:34:46 +0000 Subject: [PATCH 003/135] Add vocabulary padding script --- scripts/contrib/pad_model_vocabulary.py | 52 +++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100755 scripts/contrib/pad_model_vocabulary.py diff --git a/scripts/contrib/pad_model_vocabulary.py b/scripts/contrib/pad_model_vocabulary.py new file mode 100755 index 000000000..eca73e34a --- /dev/null +++ b/scripts/contrib/pad_model_vocabulary.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +# Pads a Marian model's vocabulary to have greater size. The added tokens have +# zero probability. +# ./pad_model_vocabulary.py input.npz output.npz desired_vocab_size +# +# You'll also need to separately pad your vocabulary file like so: +# old=$(wc -l input.vocab |cut -d " " -f 1) +# (cat input.vocab; seq -f "" $((desired_vocab_size-old))) >output.vocab +# +# Warning: probably only works with shared vocabulary models. +import math +import numpy as np +import sys +import yaml + +# Amend the vocab size in a raw ["special:model.yml"] data from a Marian npz. +# Returns the raw data to use for ["special:model.yml"] +def substitute_vocab_config(raw, new_size): + print("Old yml: ", raw.tostring()) + raw_yaml = raw.tostring().decode("utf-8") + #Python yaml doesn't like null bytes. + if raw_yaml.endswith("\x00"): + raw_yaml = raw_yaml[:-1] + config = yaml.load(raw_yaml) + config['dim-vocabs'] = [new_size] * len(config['dim-vocabs']) + raw_yaml = yaml.dump(config) + if raw_yaml.endswith("\n"): + raw_yaml = raw_yaml[:-1] + raw_yaml += "\x00" + return np.array(bytearray(raw_yaml, 'utf-8')) + +if len(sys.argv) != 4: + print("Usage: " + sys.argv[0] + " input.npz output.npz desired_vocab_size") + sys.exit(1) + +resized_path = sys.argv[2] +new_size = int(sys.argv[3]) +old_model = np.load(sys.argv[1]) + +new_model = dict(old_model) +old_size = len(old_model["Wemb"]) +if old_size > new_size: + sys.stderr.write("New size is smaller than original. Cowardly refusing to clip vocab.\n") + sys.exit(2) +print("Before: ", new_model["decoder_ff_logit_out_b"].shape, new_model["Wemb"].shape) +bias = new_model["decoder_ff_logit_out_b"] +new_model["decoder_ff_logit_out_b"] = np.pad(bias, [(0,0),(0,new_size - bias.shape[1])], mode='constant', constant_values = -math.inf) +new_model["Wemb"] = np.pad(new_model["Wemb"], [(0,new_size - bias.shape[1]), (0,0)], mode='constant', constant_values = 0) +print("After: ", new_model["decoder_ff_logit_out_b"].shape, new_model["Wemb"].shape) +new_model["special:model.yml"] = substitute_vocab_config(new_model["special:model.yml"], new_size) +print("New yml: ", new_model["special:model.yml"].tostring()) +np.savez(resized_path, **new_model) From b165af8d6a6f1d8642c630f3fda7ff3ccfd7bab0 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Sun, 28 Mar 2021 19:31:44 +0000 Subject: [PATCH 004/135] Split code into main and library h/cpp --- src/CMakeLists.txt | 1 + src/command/marian_swapper.cpp | 134 ++------------------------------- src/translator/swappable.cpp | 96 +++++++++++++++++++++++ src/translator/swappable.h | 55 ++++++++++++++ 4 files changed, 159 insertions(+), 127 deletions(-) create mode 100644 src/translator/swappable.cpp create mode 100644 src/translator/swappable.h diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 397a0330f..98d5c4e98 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -100,6 +100,7 @@ set(MARIAN_SOURCES translator/nth_element.cpp translator/helpers.cpp translator/scorers.cpp + translator/swappable.cpp training/graph_group_async.cpp training/graph_group_sync.cpp diff --git a/src/command/marian_swapper.cpp b/src/command/marian_swapper.cpp index 7c5e92abe..15bdecf63 100644 --- a/src/command/marian_swapper.cpp +++ b/src/command/marian_swapper.cpp @@ -1,142 +1,22 @@ -#include "marian.h" -#include "common/logging.h" -#include "data/corpus.h" -#include "data/text_input.h" -#include "translator/beam_search.h" -#include "translator/translator.h" -#include "common/io.h" -#include "common/timer.h" -#include -#include "tensors/gpu/swap.h" -namespace marian { +#include "translator/swappable.h" +#include -/* A model loaded on the CPU and possibly on a GPU */ -class Model { - private: - std::vector parameters_; - std::vector> srcVocabs_; - Ptr trgVocab_; - - public: - Model(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath) - : parameters_(io::loadItems(parameters)) { - // Load parameters. - // Find the special element and remove it: - size_t special_idx = 0; - for (size_t i = 0; i < parameters_.size(); i++) { - if (parameters_[i].name == "special:model.yml") { - special_idx = i; - break; - } - } - parameters_.erase(parameters_.begin() + special_idx); - // Prepare the name so that it matches the named map - for (auto&& item : parameters_) { - item.name = "F0::" + item.name; - } - - // Load source vocabs. - const std::vector &maxVocabs = options->get>("dim-vocabs"); - for(size_t i = 0; i < sourceVocabPaths.size(); ++i) { - Ptr vocab = New(options, i); - vocab->load(sourceVocabPaths[i], maxVocabs[i]); - srcVocabs_.emplace_back(vocab); - } - - // Load target vocab. - trgVocab_ = New(options, sourceVocabPaths.size()); - trgVocab_->load(targetVocabPath); - } - - const std::vector &Parameters() const { return parameters_; } - - const std::vector> &SrcVocabs() const { return srcVocabs_; } - - Ptr TrgVocab() const { return trgVocab_; } -}; - -/* Reserved space on a GPU with which to translate */ -class GPUSlot { - private: - Ptr options_; - Ptr graph_; - std::vector > scorers_; - - // Last model used for translation. Used to skip loading. - const Model *loadedModel_; - - void Load(const std::vector ¶meters) { - timer::Timer timer; - auto namedMap = graph_->getParamsNamedMap(); - for (auto&& item : parameters) { - auto to = reinterpret_cast(namedMap[item.name]->val()->memory()->data()); - swapper::copyCpuToGpu(to, &item.bytes[0], item.bytes.size()); - } - LOG(info, "Load took: {:.8f}s wall", timer.elapsed()); - } - - public: - explicit GPUSlot(Ptr options) : options_(options), loadedModel_(nullptr) { - options_->set("inference", true); - options_->set("shuffle", "none"); - // get device IDs - auto devices = Config::getDevices(options_); - auto numDevices = devices.size(); - std::cerr << "Num devices: " << numDevices << std::endl; - - // Create graph - graph_ = New(); - auto prec = options_->get>("precision", {"float32"}); - graph_->setDefaultElementType(typeFromString(prec[0])); - graph_->setDevice(devices[0]); - graph_->reserveWorkspaceMB(options_->get("workspace")); - // TODO: multiple scorers. - Ptr scorer = createScorers(options_)[0]; - scorer->init(graph_); - scorers_.push_back(scorer); - graph_->forward(); - } - - void Translate(const Model &model, const std::vector &input) { - if (loadedModel_ != &model) { - Load(model.Parameters()); - loadedModel_ = &model; - } - auto corpus = New(input, model.SrcVocabs(), options_); - data::BatchGenerator batchGenerator(corpus, options_, nullptr, false); - - auto search = New(options_, scorers_, model.TrgVocab()); - auto printer = New(options_, model.TrgVocab()); - for (auto&& batch : batchGenerator) { - auto histories = search->search(graph_, batch); - for(auto history : histories) { - std::stringstream best1; - std::stringstream bestn; - printer->print(history, best1, bestn); - LOG(info, "Translation {}", best1.str()); - } - } - } -}; - -} // namespace marian - -/* Demo program */ +/* Demo program: run with options for any of the models */ int main(int argc, char** argv) { using namespace marian; Ptr options = parseOptions(argc, argv, cli::mode::translation); - GPUSlot slot(options); - Model pten(options, + SwappableSlot slot(options); + SwappableModel pten(options, "/home/ubuntu/consistent-big-models/padded/pten.npz", {"/home/ubuntu/consistent-big-models/padded/pten.vocab"}, "/home/ubuntu/consistent-big-models/padded/pten.vocab"); - Model enit(options, + SwappableModel enit(options, "/home/ubuntu/consistent-big-models/padded/enit.npz", {"/home/ubuntu/consistent-big-models/padded/enit.vocab"}, "/home/ubuntu/consistent-big-models/padded/enit.vocab"); - const Model *model = &pten; + const SwappableModel *model = &pten; std::string line; while (std::getline(std::cin, line)) { if (line == " TRANSLATE PTEN") { diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp new file mode 100644 index 000000000..5f42f0748 --- /dev/null +++ b/src/translator/swappable.cpp @@ -0,0 +1,96 @@ +#include "marian.h" +#include "translator/swappable.h" +#include "common/logging.h" +#include "data/corpus.h" +#include "data/text_input.h" +#include "translator/beam_search.h" +#include "translator/translator.h" +#include "common/io.h" +#include "common/timer.h" +#include +#include "tensors/gpu/swap.h" +namespace marian { + +SwappableModel::SwappableModel(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath) + : parameters_(io::loadItems(parameters)) { + // Load parameters. + // Find the special element and remove it: + size_t special_idx = 0; + for (size_t i = 0; i < parameters_.size(); i++) { + if (parameters_[i].name == "special:model.yml") { + special_idx = i; + break; + } + } + parameters_.erase(parameters_.begin() + special_idx); + // Prepare the name so that it matches the named map + for (auto&& item : parameters_) { + item.name = "F0::" + item.name; + } + + // Load source vocabs. + const std::vector &maxVocabs = options->get>("dim-vocabs"); + for(size_t i = 0; i < sourceVocabPaths.size(); ++i) { + Ptr vocab = New(options, i); + vocab->load(sourceVocabPaths[i], maxVocabs[i]); + srcVocabs_.emplace_back(vocab); + } + + // Load target vocab. + trgVocab_ = New(options, sourceVocabPaths.size()); + trgVocab_->load(targetVocabPath); +} + +void SwappableSlot::Load(const std::vector ¶meters) { + timer::Timer timer; + auto namedMap = graph_->getParamsNamedMap(); + for (auto&& item : parameters) { + auto to = reinterpret_cast(namedMap[item.name]->val()->memory()->data()); + swapper::copyCpuToGpu(to, &item.bytes[0], item.bytes.size()); + } + LOG(info, "Load took: {:.8f}s wall", timer.elapsed()); +} + +SwappableSlot::SwappableSlot(Ptr options) : options_(options), loadedModel_(nullptr) { + options_->set("inference", true); + options_->set("shuffle", "none"); + // get device IDs + auto devices = Config::getDevices(options_); + auto numDevices = devices.size(); + std::cerr << "Num devices: " << numDevices << std::endl; + + // Create graph + graph_ = New(); + auto prec = options_->get>("precision", {"float32"}); + graph_->setDefaultElementType(typeFromString(prec[0])); + graph_->setDevice(devices[0]); + graph_->reserveWorkspaceMB(options_->get("workspace")); + // TODO: multiple scorers. + Ptr scorer = createScorers(options_)[0]; + scorer->init(graph_); + scorers_.push_back(scorer); + graph_->forward(); +} + +void SwappableSlot::Translate(const SwappableModel &model, const std::vector &input) { + if (loadedModel_ != &model) { + Load(model.Parameters()); + loadedModel_ = &model; + } + auto corpus = New(input, model.SrcVocabs(), options_); + data::BatchGenerator batchGenerator(corpus, options_, nullptr, false); + + auto search = New(options_, scorers_, model.TrgVocab()); + auto printer = New(options_, model.TrgVocab()); + for (auto&& batch : batchGenerator) { + auto histories = search->search(graph_, batch); + for(auto history : histories) { + std::stringstream best1; + std::stringstream bestn; + printer->print(history, best1, bestn); + LOG(info, "Translation {}", best1.str()); + } + } +} + +} // namespace marian diff --git a/src/translator/swappable.h b/src/translator/swappable.h new file mode 100644 index 000000000..9ef0c871e --- /dev/null +++ b/src/translator/swappable.h @@ -0,0 +1,55 @@ +#pragma once +/* Support for swapping models in and out of a GPU, when you have more models + * than fit in the GPU's RAM. The models must have identical graphs, including + * size. They can have different parameters and different vocabularies but the + * vocabularies must have the same size. To make vocabulary the same size, pad + * using scripts/contrib/pad_model_vocabulary.py offline. + */ +#include "marian.h" +#include "common/io.h" +#include "data/corpus.h" +#include "data/text_input.h" +#include "translator/translator.h" +#include +namespace marian { + +/* A model loaded on the CPU and possibly on a GPU. + */ +class SwappableModel { + private: + std::vector parameters_; + std::vector> srcVocabs_; + Ptr trgVocab_; + + public: + // The parts of Options that relate to model and vocab are ignored. The files provided will be loaded. + SwappableModel(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath); + + const std::vector &Parameters() const { return parameters_; } + + const std::vector> &SrcVocabs() const { return srcVocabs_; } + + Ptr TrgVocab() const { return trgVocab_; } +}; + +/* Reserved space on a GPU with which to translate. If you can afford to fit + * multiple models on 1 GPU, then each one that fits is a GPUSlot + */ +class SwappableSlot { + private: + Ptr options_; + Ptr graph_; + std::vector > scorers_; + + // Last model used for translation. Used to skip loading. + const SwappableModel *loadedModel_; + + void Load(const std::vector ¶meters); + + public: + explicit SwappableSlot(Ptr options); + + void Translate(const SwappableModel &model, const std::vector &input); +}; + +} // namespace marian From 4d8e3271f9b07d666ba933f5604448557aacf20e Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Sun, 28 Mar 2021 19:53:34 +0000 Subject: [PATCH 005/135] Restore ensemble support --- src/translator/swappable.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 5f42f0748..d09cd85d0 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -65,10 +65,12 @@ SwappableSlot::SwappableSlot(Ptr options) : options_(options), loadedMo graph_->setDefaultElementType(typeFromString(prec[0])); graph_->setDevice(devices[0]); graph_->reserveWorkspaceMB(options_->get("workspace")); - // TODO: multiple scorers. - Ptr scorer = createScorers(options_)[0]; - scorer->init(graph_); - scorers_.push_back(scorer); + + scorers_ = createScorers(options_); + for (auto scorer : scorers_) { + scorer->init(graph_); + // TODO lexical shortlists are not supported yet. + } graph_->forward(); } From 203a9bb87d5de281d377df7b6925f90118543d21 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Sun, 28 Mar 2021 19:56:53 +0000 Subject: [PATCH 006/135] Minor logging improvements --- src/translator/swappable.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index d09cd85d0..0f0912085 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -48,7 +48,7 @@ void SwappableSlot::Load(const std::vector ¶meters) { auto to = reinterpret_cast(namedMap[item.name]->val()->memory()->data()); swapper::copyCpuToGpu(to, &item.bytes[0], item.bytes.size()); } - LOG(info, "Load took: {:.8f}s wall", timer.elapsed()); + LOG(info, "Swapping model from CPU to GPU took {:.8f}s wall", timer.elapsed()); } SwappableSlot::SwappableSlot(Ptr options) : options_(options), loadedModel_(nullptr) { @@ -57,7 +57,6 @@ SwappableSlot::SwappableSlot(Ptr options) : options_(options), loadedMo // get device IDs auto devices = Config::getDevices(options_); auto numDevices = devices.size(); - std::cerr << "Num devices: " << numDevices << std::endl; // Create graph graph_ = New(); From c71d48838752240297102f9c182a599cadd88811 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Sun, 28 Mar 2021 20:17:13 +0000 Subject: [PATCH 007/135] Return Histories --- src/command/marian_swapper.cpp | 15 +++++++++++++-- src/translator/swappable.cpp | 16 ++++++---------- src/translator/swappable.h | 11 +++++++---- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/command/marian_swapper.cpp b/src/command/marian_swapper.cpp index 15bdecf63..d0d52d78b 100644 --- a/src/command/marian_swapper.cpp +++ b/src/command/marian_swapper.cpp @@ -1,5 +1,8 @@ #include "translator/swappable.h" +#include "translator/output_printer.h" + #include +#include /* Demo program: run with options for any of the models */ int main(int argc, char** argv) { @@ -21,10 +24,18 @@ int main(int argc, char** argv) { while (std::getline(std::cin, line)) { if (line == " TRANSLATE PTEN") { model = &pten; + continue; } else if (line == " TRANSLATE ENIT") { model = &enit; - } else { - slot.Translate(*model, {line}); + continue; + } + marian::OutputPrinter printer(options, model->TrgVocab()); + marian::Histories histories = slot.Translate(*model, {line}); + for(auto history : histories) { + std::stringstream best1; + std::stringstream bestn; + printer.print(history, best1, bestn); + std::cout << best1.str() << '\n'; } } diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 0f0912085..475e9bb5b 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -56,7 +56,6 @@ SwappableSlot::SwappableSlot(Ptr options) : options_(options), loadedMo options_->set("shuffle", "none"); // get device IDs auto devices = Config::getDevices(options_); - auto numDevices = devices.size(); // Create graph graph_ = New(); @@ -73,7 +72,7 @@ SwappableSlot::SwappableSlot(Ptr options) : options_(options), loadedMo graph_->forward(); } -void SwappableSlot::Translate(const SwappableModel &model, const std::vector &input) { +Histories SwappableSlot::Translate(const SwappableModel &model, const std::vector &input) { if (loadedModel_ != &model) { Load(model.Parameters()); loadedModel_ = &model; @@ -82,16 +81,13 @@ void SwappableSlot::Translate(const SwappableModel &model, const std::vector batchGenerator(corpus, options_, nullptr, false); auto search = New(options_, scorers_, model.TrgVocab()); - auto printer = New(options_, model.TrgVocab()); + Histories ret; + ret.reserve(input.size()); for (auto&& batch : batchGenerator) { - auto histories = search->search(graph_, batch); - for(auto history : histories) { - std::stringstream best1; - std::stringstream bestn; - printer->print(history, best1, bestn); - LOG(info, "Translation {}", best1.str()); - } + auto result = search->search(graph_, batch); + ret.insert(ret.end(), result.begin(), result.end()); } + return ret; } } // namespace marian diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 9ef0c871e..f6431b04c 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -7,12 +7,15 @@ */ #include "marian.h" #include "common/io.h" -#include "data/corpus.h" -#include "data/text_input.h" -#include "translator/translator.h" +#include "data/vocab.h" +#include "translator/history.h" + +#include #include namespace marian { +class Scorer; + /* A model loaded on the CPU and possibly on a GPU. */ class SwappableModel { @@ -49,7 +52,7 @@ class SwappableSlot { public: explicit SwappableSlot(Ptr options); - void Translate(const SwappableModel &model, const std::vector &input); + Histories Translate(const SwappableModel &model, const std::vector &input); }; } // namespace marian From 47feb2b2d103ab857985b3c05bff010b0beeddcc Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Sun, 28 Mar 2021 21:52:33 +0000 Subject: [PATCH 008/135] Alignments --- src/command/marian_swapper.cpp | 91 +++++++++++++++++++++++++++------- 1 file changed, 72 insertions(+), 19 deletions(-) diff --git a/src/command/marian_swapper.cpp b/src/command/marian_swapper.cpp index d0d52d78b..0d8198846 100644 --- a/src/command/marian_swapper.cpp +++ b/src/command/marian_swapper.cpp @@ -1,41 +1,94 @@ -#include "translator/swappable.h" +#include "translator/history.h" #include "translator/output_printer.h" +#include "translator/swappable.h" #include #include +#include + +namespace marian { +void LoadBig(Ptr options, std::unordered_map &to) { + to.emplace("pten", SwappableModel(options, + "/home/ubuntu/consistent-big-models/padded/pten.npz", + {"/home/ubuntu/consistent-big-models/padded/pten.vocab"}, + "/home/ubuntu/consistent-big-models/padded/pten.vocab")); + + to.emplace("iten", SwappableModel(options, + "/home/ubuntu/consistent-big-models/padded/enit.npz", + {"/home/ubuntu/consistent-big-models/padded/enit.vocab"}, + "/home/ubuntu/consistent-big-models/padded/enit.vocab")); +} + +void LoadTiny(Ptr options, std::unordered_map &to) { + std::vector models = {"csen", "encs", "enet", "eten", "esen", "enes"}; + for (const std::string m : models) { + std::string base = "/home/ubuntu/consistent-bergamot-students/padded/"; + base += m + "."; + to.emplace(m, SwappableModel(options, base + "npz", {base + "spm"}, base + "spm")); + } +} + +} // namespace /* Demo program: run with options for any of the models */ int main(int argc, char** argv) { using namespace marian; Ptr options = parseOptions(argc, argv, cli::mode::translation); + // You can have multiple slots. In principle these can even have different sizes, just use separate options. SwappableSlot slot(options); - SwappableModel pten(options, - "/home/ubuntu/consistent-big-models/padded/pten.npz", - {"/home/ubuntu/consistent-big-models/padded/pten.vocab"}, - "/home/ubuntu/consistent-big-models/padded/pten.vocab"); + + std::unordered_map models; +// LoadBig(options, models); + LoadTiny(options, models); - SwappableModel enit(options, - "/home/ubuntu/consistent-big-models/padded/enit.npz", - {"/home/ubuntu/consistent-big-models/padded/enit.vocab"}, - "/home/ubuntu/consistent-big-models/padded/enit.vocab"); + // begin with a space to avoid conflict with a real sentence. + const std::string kSwitchPrefix(" CHANGE "); + + bool alignments = !options->get("alignment").empty(); - const SwappableModel *model = &pten; + const SwappableModel *model = nullptr; std::string line; while (std::getline(std::cin, line)) { - if (line == " TRANSLATE PTEN") { - model = &pten; + // Switch out which model is used. + if (line.substr(0, kSwitchPrefix.size()) == kSwitchPrefix) { + std::string key = line.substr(kSwitchPrefix.size()); + auto found = models.find(key); + if (found == models.end()) { + std::cerr << "Model for " << key << " not loaded." << std::endl; + return 1; + } + model = &found->second; continue; - } else if (line == " TRANSLATE ENIT") { - model = &enit; + } + if (!model) { + std::cerr << "Select a model first." << std::endl; continue; } - marian::OutputPrinter printer(options, model->TrgVocab()); + + // Actually translating with a model. marian::Histories histories = slot.Translate(*model, {line}); + // In practice there is one history because we provided one line. for(auto history : histories) { - std::stringstream best1; - std::stringstream bestn; - printer.print(history, best1, bestn); - std::cout << best1.str() << '\n'; + Result result(history->top()); + Words words = std::get<0>(result); + std::cout << model->TrgVocab()->decode(words) << std::endl; + + /* Print alignments */ + if (alignments) { + Hypothesis &hypo = *std::get<1>(result); + // [t][s] -> P(s|t) + marian::data::SoftAlignment alignment(hypo.tracebackAlignment()); + // An easier call for this is: + // std:cout << data::SoftAlignToString(alignment); + // The below is just there to show how access them programatically. + // NB you can convert to hard with data::ConvertSoftAlignToHardAlign(alignment, threshold) + for (auto target : alignment) { + for (float source : target) { + std::cout << source << ' '; + } + std::cout << '\n'; + } + } } } From 8fc8d02b8080fb661ca2c84d8be26320767d11b6 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Sun, 28 Mar 2021 22:01:56 +0000 Subject: [PATCH 009/135] Fix enit --- src/command/marian_swapper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/command/marian_swapper.cpp b/src/command/marian_swapper.cpp index 0d8198846..8cc422ee7 100644 --- a/src/command/marian_swapper.cpp +++ b/src/command/marian_swapper.cpp @@ -13,7 +13,7 @@ void LoadBig(Ptr options, std::unordered_map Date: Tue, 30 Mar 2021 13:37:06 +0100 Subject: [PATCH 010/135] Add an option to force loading --- src/translator/swappable.cpp | 5 +++++ src/translator/swappable.h | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 475e9bb5b..3b77ed3ec 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -72,6 +72,11 @@ SwappableSlot::SwappableSlot(Ptr options) : options_(options), loadedMo graph_->forward(); } +void SwappableSlot::ForceLoad(const SwappableModel &model) { + Load(model.Parameters()); + loadedModel_ = &model; +} + Histories SwappableSlot::Translate(const SwappableModel &model, const std::vector &input) { if (loadedModel_ != &model) { Load(model.Parameters()); diff --git a/src/translator/swappable.h b/src/translator/swappable.h index f6431b04c..915824a33 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -52,6 +52,10 @@ class SwappableSlot { public: explicit SwappableSlot(Ptr options); + // Load this model even if it's already loaded. Mostly useful for timing. + void ForceLoad(const SwappableModel &model); + + // Translate using this model, loading if necessary. Histories Translate(const SwappableModel &model, const std::vector &input); }; From cf12178df736f25ba256ba4cd347f5afd542234e Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Tue, 30 Mar 2021 14:25:41 +0100 Subject: [PATCH 011/135] Allow CPU only compilation --- src/tensors/gpu/swap.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/tensors/gpu/swap.h b/src/tensors/gpu/swap.h index 86b3094d7..872810b0a 100644 --- a/src/tensors/gpu/swap.h +++ b/src/tensors/gpu/swap.h @@ -1,6 +1,14 @@ +#pragma once #include +#include "common/logging.h" namespace marian { namespace swapper { +#ifdef CUDA_FOUND void copyCpuToGpu(char * gpuOut, const char * in, size_t count); +#else + inline void copyCpuToGpu(char * gpuOut, const char * in, size_t count) { + ABORT("Copy from CPU to GPU memory is only available with CUDA."); + } +#endif } } From 7e06801a6e9fc86eb70429ecbfe1dc22e553ad70 Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Tue, 30 Mar 2021 18:36:44 +0000 Subject: [PATCH 012/135] Add explicit gpu device index when creating the object --- src/tensors/gpu/swap.cu | 4 +++- src/tensors/gpu/swap.h | 5 +++-- src/translator/swappable.cpp | 11 +++++------ src/translator/swappable.h | 8 +++++++- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/tensors/gpu/swap.cu b/src/tensors/gpu/swap.cu index 3eb2c0df8..c0bd73a9a 100644 --- a/src/tensors/gpu/swap.cu +++ b/src/tensors/gpu/swap.cu @@ -1,9 +1,11 @@ #include "cuda_helpers.h" +#include "swap.h" void copyCpuToGpu(const char * in, char * gpuOut); namespace marian { namespace swapper { - void copyCpuToGpu(char * gpuOut, const char * in, size_t count) { + void copyCpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId) { + CUDA_CHECK(cudaSetDevice(deviceId.no)); CUDA_CHECK(cudaMemcpy(gpuOut, in, count, cudaMemcpyHostToDevice)); } } diff --git a/src/tensors/gpu/swap.h b/src/tensors/gpu/swap.h index 872810b0a..a020c8827 100644 --- a/src/tensors/gpu/swap.h +++ b/src/tensors/gpu/swap.h @@ -1,12 +1,13 @@ #pragma once #include +#include "common/definitions.h" #include "common/logging.h" namespace marian { namespace swapper { #ifdef CUDA_FOUND - void copyCpuToGpu(char * gpuOut, const char * in, size_t count); + void copyCpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId); #else - inline void copyCpuToGpu(char * gpuOut, const char * in, size_t count) { + inline void copyCpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId) { ABORT("Copy from CPU to GPU memory is only available with CUDA."); } #endif diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 3b77ed3ec..f9bf98bbc 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -46,22 +46,21 @@ void SwappableSlot::Load(const std::vector ¶meters) { auto namedMap = graph_->getParamsNamedMap(); for (auto&& item : parameters) { auto to = reinterpret_cast(namedMap[item.name]->val()->memory()->data()); - swapper::copyCpuToGpu(to, &item.bytes[0], item.bytes.size()); + swapper::copyCpuToGpu(to, &item.bytes[0], item.bytes.size(), myDeviceId_); } LOG(info, "Swapping model from CPU to GPU took {:.8f}s wall", timer.elapsed()); } -SwappableSlot::SwappableSlot(Ptr options) : options_(options), loadedModel_(nullptr) { +SwappableSlot::SwappableSlot(Ptr options, size_t deviceIdx /*=0*/) : options_(options), myDeviceId_(Config::getDevices(options)[deviceIdx]), loadedModel_(nullptr) { + ABORT_IF(myDeviceId_.type == DeviceType::cpu, "Swappable slot only works for GPU devices."); options_->set("inference", true); options_->set("shuffle", "none"); - // get device IDs - auto devices = Config::getDevices(options_); // Create graph graph_ = New(); auto prec = options_->get>("precision", {"float32"}); graph_->setDefaultElementType(typeFromString(prec[0])); - graph_->setDevice(devices[0]); + graph_->setDevice(myDeviceId_); graph_->reserveWorkspaceMB(options_->get("workspace")); scorers_ = createScorers(options_); @@ -83,7 +82,7 @@ Histories SwappableSlot::Translate(const SwappableModel &model, const std::vecto loadedModel_ = &model; } auto corpus = New(input, model.SrcVocabs(), options_); - data::BatchGenerator batchGenerator(corpus, options_, nullptr, false); + data::BatchGenerator batchGenerator(corpus, options_, nullptr, false); // @TODO if the asynchronous batch preparation = true, but we supply less text than the mini-batch size we crash auto search = New(options_, scorers_, model.TrgVocab()); Histories ret; diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 915824a33..00e5027c3 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -43,6 +43,7 @@ class SwappableSlot { Ptr options_; Ptr graph_; std::vector > scorers_; + const marian::DeviceId myDeviceId_; // Last model used for translation. Used to skip loading. const SwappableModel *loadedModel_; @@ -50,7 +51,12 @@ class SwappableSlot { void Load(const std::vector ¶meters); public: - explicit SwappableSlot(Ptr options); + /** + * @param options The marian options object + * @param deviceNum The index of the device you want to use for this slot. Note that this is not the deviceID but the index of the device in the + * array of supplied devices. Eg if you provide -d 0 3 5 and you want the Slot to run on GPU 3, you provide deviceNum=1. + */ + explicit SwappableSlot(Ptr options, size_t deviceIdx=0); // Load this model even if it's already loaded. Mostly useful for timing. void ForceLoad(const SwappableModel &model); From 635cfb06460c06bf1058d131815f699673519d52 Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Tue, 30 Mar 2021 18:59:54 +0000 Subject: [PATCH 013/135] Allow multiple mini-batches --- src/translator/swappable.cpp | 14 +++++++++++++- src/translator/swappable.h | 2 ++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index f9bf98bbc..d1c282012 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -51,6 +51,18 @@ void SwappableSlot::Load(const std::vector ¶meters) { LOG(info, "Swapping model from CPU to GPU took {:.8f}s wall", timer.elapsed()); } +std::string SwappableSlot::MultilineInputHack(const std::vector &input) { + if (input.size() == 1) { + return input[0]; + } else { + std::stringstream ss; + for (auto&& line : input) { + ss << line << '\n'; + } + return ss.str(); + } +} + SwappableSlot::SwappableSlot(Ptr options, size_t deviceIdx /*=0*/) : options_(options), myDeviceId_(Config::getDevices(options)[deviceIdx]), loadedModel_(nullptr) { ABORT_IF(myDeviceId_.type == DeviceType::cpu, "Swappable slot only works for GPU devices."); options_->set("inference", true); @@ -81,7 +93,7 @@ Histories SwappableSlot::Translate(const SwappableModel &model, const std::vecto Load(model.Parameters()); loadedModel_ = &model; } - auto corpus = New(input, model.SrcVocabs(), options_); + auto corpus = New(std::vector(1,MultilineInputHack(input)), model.SrcVocabs(), options_); // @TODO dirty hack data::BatchGenerator batchGenerator(corpus, options_, nullptr, false); // @TODO if the asynchronous batch preparation = true, but we supply less text than the mini-batch size we crash auto search = New(options_, scorers_, model.TrgVocab()); diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 00e5027c3..8d6e207de 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -50,6 +50,8 @@ class SwappableSlot { void Load(const std::vector ¶meters); + std::string MultilineInputHack(const std::vector &input); + public: /** * @param options The marian options object From ee6ff754471c4b56e7a64a196bda71bd11f04454 Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Tue, 30 Mar 2021 19:24:55 +0000 Subject: [PATCH 014/135] No stringstreams --- src/translator/swappable.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index d1c282012..2517baec5 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -55,11 +55,13 @@ std::string SwappableSlot::MultilineInputHack(const std::vector &in if (input.size() == 1) { return input[0]; } else { - std::stringstream ss; + std::string ret; + ret.reserve(10000); for (auto&& line : input) { - ss << line << '\n'; + ret.append(line); + ret.append("\n"); } - return ss.str(); + return ret; } } From 57ddebacbe057c519cc04840dd2f0d23b25d1e1b Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Thu, 1 Apr 2021 00:02:21 +0000 Subject: [PATCH 015/135] Sort the histories before returning them --- src/translator/swappable.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 2517baec5..eec74a72d 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -105,6 +105,7 @@ Histories SwappableSlot::Translate(const SwappableModel &model, const std::vecto auto result = search->search(graph_, batch); ret.insert(ret.end(), result.begin(), result.end()); } + std::sort(ret.begin(), ret.end(),[](marian::Ptr a, marian::Ptr b){return a->getLineNum() < b->getLineNum();}); return ret; } From 4f2b218ba62f95b4af98f6681c22730a59cd7c0b Mon Sep 17 00:00:00 2001 From: Davide Caroselli Date: Thu, 1 Apr 2021 11:11:40 +0000 Subject: [PATCH 016/135] SwappableSlot: add GPU-to-GPU reset feature --- src/tensors/gpu/swap.cu | 5 +++++ src/tensors/gpu/swap.h | 5 +++++ src/translator/swappable.cpp | 21 +++++++++++++++++++++ src/translator/swappable.h | 4 ++++ 4 files changed, 35 insertions(+) diff --git a/src/tensors/gpu/swap.cu b/src/tensors/gpu/swap.cu index c0bd73a9a..16210e0c5 100644 --- a/src/tensors/gpu/swap.cu +++ b/src/tensors/gpu/swap.cu @@ -1,6 +1,7 @@ #include "cuda_helpers.h" #include "swap.h" void copyCpuToGpu(const char * in, char * gpuOut); +void copyGpuToGpu(const char * in, char * gpuOut); namespace marian { namespace swapper { @@ -8,5 +9,9 @@ namespace marian { CUDA_CHECK(cudaSetDevice(deviceId.no)); CUDA_CHECK(cudaMemcpy(gpuOut, in, count, cudaMemcpyHostToDevice)); } + void copyGpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId) { + CUDA_CHECK(cudaSetDevice(deviceId.no)); + CUDA_CHECK(cudaMemcpy(gpuOut, in, count, cudaMemcpyDeviceToDevice)); + } } } diff --git a/src/tensors/gpu/swap.h b/src/tensors/gpu/swap.h index a020c8827..7d8784266 100644 --- a/src/tensors/gpu/swap.h +++ b/src/tensors/gpu/swap.h @@ -6,10 +6,15 @@ namespace marian { namespace swapper { #ifdef CUDA_FOUND void copyCpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId); + void copyGpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId); #else inline void copyCpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId) { ABORT("Copy from CPU to GPU memory is only available with CUDA."); } + + inline void copyGpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId) { + ABORT("Copy from GPU to GPU memory is only available with CUDA."); + } #endif } } diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index eec74a72d..9f9ce3b14 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -51,6 +51,22 @@ void SwappableSlot::Load(const std::vector ¶meters) { LOG(info, "Swapping model from CPU to GPU took {:.8f}s wall", timer.elapsed()); } +void SwappableSlot::Load(const SwappableSlot &slot) { + timer::Timer timer; + auto toMap = graph_->getParamsNamedMap(); + auto fromMap = slot.graph_->getParamsNamedMap(); + + for (auto &it : fromMap) { + size_t size = it.second->val()->memory()->size(); + auto from = reinterpret_cast(it.second->val()->memory()->data()); + auto to = reinterpret_cast(toMap[it.first]->val()->memory()->data()); + + swapper::copyGpuToGpu(to, from, size, myDeviceId_); + } + + LOG(info, "Swapping model from GPU to GPU took {:.8f}s wall", timer.elapsed()); +} + std::string SwappableSlot::MultilineInputHack(const std::vector &input) { if (input.size() == 1) { return input[0]; @@ -90,6 +106,11 @@ void SwappableSlot::ForceLoad(const SwappableModel &model) { loadedModel_ = &model; } +void SwappableSlot::ForceLoad(const SwappableModel &model, const SwappableSlot &slot) { + Load(slot); + loadedModel_ = &model; +} + Histories SwappableSlot::Translate(const SwappableModel &model, const std::vector &input) { if (loadedModel_ != &model) { Load(model.Parameters()); diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 8d6e207de..c9b728cf4 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -50,6 +50,8 @@ class SwappableSlot { void Load(const std::vector ¶meters); + void Load(const SwappableSlot &slot); + std::string MultilineInputHack(const std::vector &input); public: @@ -63,6 +65,8 @@ class SwappableSlot { // Load this model even if it's already loaded. Mostly useful for timing. void ForceLoad(const SwappableModel &model); + void ForceLoad(const SwappableModel &model, const SwappableSlot &slot); + // Translate using this model, loading if necessary. Histories Translate(const SwappableModel &model, const std::vector &input); }; From e3f53884a06fa91db12299d78c530d6c8d68bd09 Mon Sep 17 00:00:00 2001 From: Nikolay Bogoychev Date: Fri, 2 Apr 2021 21:56:50 +0000 Subject: [PATCH 017/135] Separate graph from loading to GPU --- src/command/marian_swapper.cpp | 30 +++--- src/translator/swappable.cpp | 176 ++++++++++++++++++--------------- src/translator/swappable.h | 84 +++++++++------- 3 files changed, 159 insertions(+), 131 deletions(-) diff --git a/src/command/marian_swapper.cpp b/src/command/marian_swapper.cpp index 8cc422ee7..ce3c160f0 100644 --- a/src/command/marian_swapper.cpp +++ b/src/command/marian_swapper.cpp @@ -7,24 +7,24 @@ #include namespace marian { -void LoadBig(Ptr options, std::unordered_map &to) { - to.emplace("pten", SwappableModel(options, +void LoadBig(Ptr options, std::unordered_map &to) { + to.emplace("pten", CPULoadedModel(options, "/home/ubuntu/consistent-big-models/padded/pten.npz", {"/home/ubuntu/consistent-big-models/padded/pten.vocab"}, "/home/ubuntu/consistent-big-models/padded/pten.vocab")); - to.emplace("enit", SwappableModel(options, + to.emplace("enit", CPULoadedModel(options, "/home/ubuntu/consistent-big-models/padded/enit.npz", {"/home/ubuntu/consistent-big-models/padded/enit.vocab"}, "/home/ubuntu/consistent-big-models/padded/enit.vocab")); } -void LoadTiny(Ptr options, std::unordered_map &to) { +void LoadTiny(Ptr options, std::unordered_map &to) { std::vector models = {"csen", "encs", "enet", "eten", "esen", "enes"}; for (const std::string m : models) { std::string base = "/home/ubuntu/consistent-bergamot-students/padded/"; base += m + "."; - to.emplace(m, SwappableModel(options, base + "npz", {base + "spm"}, base + "spm")); + to.emplace(m, CPULoadedModel(options, base + "npz", {base + "spm"}, base + "spm")); } } @@ -34,10 +34,11 @@ void LoadTiny(Ptr options, std::unordered_map options = parseOptions(argc, argv, cli::mode::translation); - // You can have multiple slots. In principle these can even have different sizes, just use separate options. - SwappableSlot slot(options); - - std::unordered_map models; + + Ptr engine = New(options, 0); + GPULoadedModel slot(engine); + + std::unordered_map models; // LoadBig(options, models); LoadTiny(options, models); @@ -46,7 +47,7 @@ int main(int argc, char** argv) { bool alignments = !options->get("alignment").empty(); - const SwappableModel *model = nullptr; + bool loaded = false; std::string line; while (std::getline(std::cin, line)) { // Switch out which model is used. @@ -57,21 +58,22 @@ int main(int argc, char** argv) { std::cerr << "Model for " << key << " not loaded." << std::endl; return 1; } - model = &found->second; + slot.OverwriteFrom(found->second); + loaded = true; continue; } - if (!model) { + if (!loaded) { std::cerr << "Select a model first." << std::endl; continue; } // Actually translating with a model. - marian::Histories histories = slot.Translate(*model, {line}); + marian::Histories histories = slot.Translate({line}); // In practice there is one history because we provided one line. for(auto history : histories) { Result result(history->top()); Words words = std::get<0>(result); - std::cout << model->TrgVocab()->decode(words) << std::endl; + std::cout << slot.TrgVocab()->decode(words) << std::endl; /* Print alignments */ if (alignments) { diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 9f9ce3b14..21c9413bc 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -9,70 +9,81 @@ #include "common/timer.h" #include #include "tensors/gpu/swap.h" + namespace marian { -SwappableModel::SwappableModel(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath) - : parameters_(io::loadItems(parameters)) { - // Load parameters. - // Find the special element and remove it: - size_t special_idx = 0; - for (size_t i = 0; i < parameters_.size(); i++) { - if (parameters_[i].name == "special:model.yml") { - special_idx = i; - break; - } - } - parameters_.erase(parameters_.begin() + special_idx); - // Prepare the name so that it matches the named map - for (auto&& item : parameters_) { - item.name = "F0::" + item.name; +void GPUEngine::SwapPointers(std::vector &with) { + auto write_it = graph_->params()->begin(); + auto read_it = with.begin(); + for (; read_it != with.end(); ++write_it, ++read_it) { + std::swap(*(*write_it)->val()->memory(), **read_it); } +} - // Load source vocabs. - const std::vector &maxVocabs = options->get>("dim-vocabs"); - for(size_t i = 0; i < sourceVocabPaths.size(); ++i) { - Ptr vocab = New(options, i); - vocab->load(sourceVocabPaths[i], maxVocabs[i]); - srcVocabs_.emplace_back(vocab); +GPUEngine::GPUEngine(Ptr options, size_t deviceIdx) + : options_(options), graph_(New()), myDeviceId_(Config::getDevices(options)[deviceIdx]), allocator_(myDeviceId_, 0, 128 * 1048576) { + ABORT_IF(myDeviceId_.type == DeviceType::cpu, "Swappable slot only works for GPU devices."); + options_->set("inference", true); + options_->set("shuffle", "none"); + + // Create graph + auto prec = options_->get>("precision", {"float32"}); + graph_->setDefaultElementType(typeFromString(prec[0])); + graph_->setDevice(myDeviceId_); + graph_->reserveWorkspaceMB(options_->get("workspace")); + + scorers_ = createScorers(options_); + for (auto scorer : scorers_) { + scorer->init(graph_); + // TODO lexical shortlists are not supported yet. } + graph_->forward(); + // TODO: reach into graph_->params() private members and free the parameter memory. +} - // Load target vocab. - trgVocab_ = New(options, sourceVocabPaths.size()); - trgVocab_->load(targetVocabPath); +GPUEngine::~GPUEngine() {} + +GPULoadedModel::GPULoadedModel(Ptr gpu) : engine_(gpu) { + for (auto ¶m : *engine_->graph_->params()) { + parameters_.push_back(engine_->allocator_.alloc(param->val()->memory()->size())); + } } -void SwappableSlot::Load(const std::vector ¶meters) { - timer::Timer timer; - auto namedMap = graph_->getParamsNamedMap(); - for (auto&& item : parameters) { - auto to = reinterpret_cast(namedMap[item.name]->val()->memory()->data()); - swapper::copyCpuToGpu(to, &item.bytes[0], item.bytes.size(), myDeviceId_); +GPULoadedModel::~GPULoadedModel() { + for (MemoryPiece::PtrType &p : parameters_) { + engine_->allocator_.free(p); } - LOG(info, "Swapping model from CPU to GPU took {:.8f}s wall", timer.elapsed()); } -void SwappableSlot::Load(const SwappableSlot &slot) { - timer::Timer timer; - auto toMap = graph_->getParamsNamedMap(); - auto fromMap = slot.graph_->getParamsNamedMap(); +void GPULoadedModel::OverwriteFrom(const GPULoadedModel &from) { + srcVocabs_ = from.srcVocabs_; + trgVocab_ = from.trgVocab_; - for (auto &it : fromMap) { - size_t size = it.second->val()->memory()->size(); - auto from = reinterpret_cast(it.second->val()->memory()->data()); - auto to = reinterpret_cast(toMap[it.first]->val()->memory()->data()); + ABORT_IF(engine_ != from.engine_, "TODO: copy across GPUs."); - swapper::copyGpuToGpu(to, from, size, myDeviceId_); - } + for (size_t i = 0; i < parameters_.size(); ++i) { + swapper::copyGpuToGpu(reinterpret_cast(parameters_[i]->data()), reinterpret_cast(from.parameters_[i]->data()), parameters_[i]->size(), engine_->myDeviceId_); + } +} - LOG(info, "Swapping model from GPU to GPU took {:.8f}s wall", timer.elapsed()); +void GPULoadedModel::OverwriteFrom(const CPULoadedModel &from) { + srcVocabs_ = from.SrcVocabs(); + trgVocab_ = from.TrgVocab(); + for (size_t i = 0; i < parameters_.size(); ++i) { + swapper::copyCpuToGpu(reinterpret_cast(parameters_[i]->data()), from.Parameters()[i].data(), from.Parameters()[i].size(), engine_->myDeviceId_); + } } -std::string SwappableSlot::MultilineInputHack(const std::vector &input) { +std::string MultilineInputHack(const std::vector &input) { if (input.size() == 1) { return input[0]; } else { std::string ret; - ret.reserve(10000); + std::size_t size = 0; + for (auto&& line : input) { + size += line.size() + 1; + } + ret.reserve(size); for (auto&& line : input) { ret.append(line); ret.append("\n"); @@ -81,53 +92,54 @@ std::string SwappableSlot::MultilineInputHack(const std::vector &in } } -SwappableSlot::SwappableSlot(Ptr options, size_t deviceIdx /*=0*/) : options_(options), myDeviceId_(Config::getDevices(options)[deviceIdx]), loadedModel_(nullptr) { - ABORT_IF(myDeviceId_.type == DeviceType::cpu, "Swappable slot only works for GPU devices."); - options_->set("inference", true); - options_->set("shuffle", "none"); - - // Create graph - graph_ = New(); - auto prec = options_->get>("precision", {"float32"}); - graph_->setDefaultElementType(typeFromString(prec[0])); - graph_->setDevice(myDeviceId_); - graph_->reserveWorkspaceMB(options_->get("workspace")); - - scorers_ = createScorers(options_); - for (auto scorer : scorers_) { - scorer->init(graph_); - // TODO lexical shortlists are not supported yet. - } - graph_->forward(); -} - -void SwappableSlot::ForceLoad(const SwappableModel &model) { - Load(model.Parameters()); - loadedModel_ = &model; -} - -void SwappableSlot::ForceLoad(const SwappableModel &model, const SwappableSlot &slot) { - Load(slot); - loadedModel_ = &model; -} +Histories GPULoadedModel::Translate(const std::vector &input) { + engine_->SwapPointers(parameters_); -Histories SwappableSlot::Translate(const SwappableModel &model, const std::vector &input) { - if (loadedModel_ != &model) { - Load(model.Parameters()); - loadedModel_ = &model; - } - auto corpus = New(std::vector(1,MultilineInputHack(input)), model.SrcVocabs(), options_); // @TODO dirty hack - data::BatchGenerator batchGenerator(corpus, options_, nullptr, false); // @TODO if the asynchronous batch preparation = true, but we supply less text than the mini-batch size we crash + auto corpus = New(std::vector(1, MultilineInputHack(input)), srcVocabs_, engine_->options_); // @TODO dirty hack + data::BatchGenerator batchGenerator(corpus, engine_->options_, nullptr, false); // @TODO if the asynchronous batch preparation = true, but we supply less text than the mini-batch size we crash - auto search = New(options_, scorers_, model.TrgVocab()); + BeamSearch search(engine_->options_, engine_->scorers_, trgVocab_); Histories ret; ret.reserve(input.size()); for (auto&& batch : batchGenerator) { - auto result = search->search(graph_, batch); + auto result = search.search(engine_->graph_, batch); ret.insert(ret.end(), result.begin(), result.end()); } std::sort(ret.begin(), ret.end(),[](marian::Ptr a, marian::Ptr b){return a->getLineNum() < b->getLineNum();}); + engine_->SwapPointers(parameters_); return ret; } +CPULoadedModel::CPULoadedModel(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath) + : parameters_(io::loadItems(parameters)) { + // Load parameters. + // Find the special element and remove it: + size_t special_idx = 0; + for (size_t i = 0; i < parameters_.size(); i++) { + if (parameters_[i].name == "special:model.yml") { + special_idx = i; + break; + } + } + parameters_.erase(parameters_.begin() + special_idx); + // Prepare the name so that it matches the named map + for (auto&& item : parameters_) { + item.name = "F0::" + item.name; + } + // Sort by name to match params order. + std::sort(parameters_.begin(), parameters_.end(), [](const io::Item &a, const io::Item &b){return a.name < b.name;}); + + // Load source vocabs. + const std::vector &maxVocabs = options->get>("dim-vocabs"); + for(size_t i = 0; i < sourceVocabPaths.size(); ++i) { + Ptr vocab = New(options, i); + vocab->load(sourceVocabPaths[i], maxVocabs[i]); + srcVocabs_.emplace_back(vocab); + } + + // Load target vocab. + trgVocab_ = New(options, sourceVocabPaths.size()); + trgVocab_->load(targetVocabPath); +} + } // namespace marian diff --git a/src/translator/swappable.h b/src/translator/swappable.h index c9b728cf4..f9a276e46 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -16,59 +16,73 @@ namespace marian { class Scorer; -/* A model loaded on the CPU and possibly on a GPU. - */ -class SwappableModel { +class GPULoadedModel; +class CPULoadedModel; + +/* Execute on a particular device */ +class GPUEngine { + private: + friend class GPULoadedModel; + Ptr options_; + Ptr graph_; + std::vector > scorers_; + const marian::DeviceId myDeviceId_; + Allocator allocator_; + + void SwapPointers(std::vector &with); + + public: + /** + * @param options The marian options object + * @param deviceNum The index of the device you want to use for this slot. Note that this is not the deviceID but the index of the device in the + * array of supplied devices. Eg if you provide -d 0 3 5 and you want the Slot to run on GPU 3, you provide deviceNum=1. + */ + explicit GPUEngine(Ptr options, size_t deviceNum); + + ~GPUEngine(); +}; + +/* A model loaded on the GPU that can be overwritten from CPU or GPU. */ +class GPULoadedModel { private: - std::vector parameters_; + Ptr engine_; + + std::vector parameters_; std::vector> srcVocabs_; Ptr trgVocab_; public: - // The parts of Options that relate to model and vocab are ignored. The files provided will be loaded. - SwappableModel(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath); + GPULoadedModel(Ptr gpu); - const std::vector &Parameters() const { return parameters_; } + ~GPULoadedModel(); const std::vector> &SrcVocabs() const { return srcVocabs_; } Ptr TrgVocab() const { return trgVocab_; } -}; -/* Reserved space on a GPU with which to translate. If you can afford to fit - * multiple models on 1 GPU, then each one that fits is a GPUSlot - */ -class SwappableSlot { - private: - Ptr options_; - Ptr graph_; - std::vector > scorers_; - const marian::DeviceId myDeviceId_; + // Overwrite this model with parameters from a different one. + void OverwriteFrom(const CPULoadedModel &from); + void OverwriteFrom(const GPULoadedModel &from); - // Last model used for translation. Used to skip loading. - const SwappableModel *loadedModel_; - - void Load(const std::vector ¶meters); - - void Load(const SwappableSlot &slot); + Histories Translate(const std::vector &input); +}; - std::string MultilineInputHack(const std::vector &input); +/* A model loaded on the CPU. */ +class CPULoadedModel { + private: + std::vector parameters_; + std::vector> srcVocabs_; + Ptr trgVocab_; public: - /** - * @param options The marian options object - * @param deviceNum The index of the device you want to use for this slot. Note that this is not the deviceID but the index of the device in the - * array of supplied devices. Eg if you provide -d 0 3 5 and you want the Slot to run on GPU 3, you provide deviceNum=1. - */ - explicit SwappableSlot(Ptr options, size_t deviceIdx=0); + // The parts of Options that relate to model and vocab are ignored. The files provided will be loaded. + CPULoadedModel(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath); - // Load this model even if it's already loaded. Mostly useful for timing. - void ForceLoad(const SwappableModel &model); + const std::vector &Parameters() const { return parameters_; } - void ForceLoad(const SwappableModel &model, const SwappableSlot &slot); + const std::vector> &SrcVocabs() const { return srcVocabs_; } - // Translate using this model, loading if necessary. - Histories Translate(const SwappableModel &model, const std::vector &input); + Ptr TrgVocab() const { return trgVocab_; } }; } // namespace marian From ba4d166bca752ed5565b49279f55d605ac9522d0 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 2 Apr 2021 22:14:59 +0000 Subject: [PATCH 018/135] Abort if not initialized --- src/translator/swappable.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 21c9413bc..441608390 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -93,6 +93,7 @@ std::string MultilineInputHack(const std::vector &input) { } Histories GPULoadedModel::Translate(const std::vector &input) { + ABORT_IF(!trgVocab_, "GPULoadedModel needs to be overwritten by a CPU model first."); engine_->SwapPointers(parameters_); auto corpus = New(std::vector(1, MultilineInputHack(input)), srcVocabs_, engine_->options_); // @TODO dirty hack From f8523b70fae5d235f4bbfb49422e2168de8c4890 Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 2 Apr 2021 22:32:37 +0000 Subject: [PATCH 019/135] Go back to Load instead of OverwriteFrom --- src/command/marian_swapper.cpp | 2 +- src/translator/swappable.cpp | 4 ++-- src/translator/swappable.h | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/command/marian_swapper.cpp b/src/command/marian_swapper.cpp index ce3c160f0..758501d1e 100644 --- a/src/command/marian_swapper.cpp +++ b/src/command/marian_swapper.cpp @@ -58,7 +58,7 @@ int main(int argc, char** argv) { std::cerr << "Model for " << key << " not loaded." << std::endl; return 1; } - slot.OverwriteFrom(found->second); + slot.Load(found->second); loaded = true; continue; } diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 441608390..fbaac7bba 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -55,7 +55,7 @@ GPULoadedModel::~GPULoadedModel() { } } -void GPULoadedModel::OverwriteFrom(const GPULoadedModel &from) { +void GPULoadedModel::Load(const GPULoadedModel &from) { srcVocabs_ = from.srcVocabs_; trgVocab_ = from.trgVocab_; @@ -66,7 +66,7 @@ void GPULoadedModel::OverwriteFrom(const GPULoadedModel &from) { } } -void GPULoadedModel::OverwriteFrom(const CPULoadedModel &from) { +void GPULoadedModel::Load(const CPULoadedModel &from) { srcVocabs_ = from.SrcVocabs(); trgVocab_ = from.TrgVocab(); for (size_t i = 0; i < parameters_.size(); ++i) { diff --git a/src/translator/swappable.h b/src/translator/swappable.h index f9a276e46..91f92adad 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -61,8 +61,8 @@ class GPULoadedModel { Ptr TrgVocab() const { return trgVocab_; } // Overwrite this model with parameters from a different one. - void OverwriteFrom(const CPULoadedModel &from); - void OverwriteFrom(const GPULoadedModel &from); + void Load(const CPULoadedModel &from); + void Load(const GPULoadedModel &from); Histories Translate(const std::vector &input); }; From 8bcfdcc2aef76a5fc86a9015a594af56fa4930fc Mon Sep 17 00:00:00 2001 From: Kenneth Heafield Date: Fri, 2 Apr 2021 22:54:24 +0000 Subject: [PATCH 020/135] Check device index --- src/translator/swappable.cpp | 10 +++++++++- src/translator/swappable.h | 2 +- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index fbaac7bba..c5c2bae05 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -20,8 +20,16 @@ void GPUEngine::SwapPointers(std::vector &with) { } } +namespace { +DeviceId LookupGPU(const Ptr options, size_t deviceIdx) { + auto devices = Config::getDevices(options); + ABORT_IF(deviceIdx >= devices.size(), "GPU device index higher than configured."); + return devices[deviceIdx]; +} +} // namespace + GPUEngine::GPUEngine(Ptr options, size_t deviceIdx) - : options_(options), graph_(New()), myDeviceId_(Config::getDevices(options)[deviceIdx]), allocator_(myDeviceId_, 0, 128 * 1048576) { + : options_(options), graph_(New()), myDeviceId_(LookupGPU(options, deviceIdx)), allocator_(myDeviceId_, 0, 128 * 1048576) { ABORT_IF(myDeviceId_.type == DeviceType::cpu, "Swappable slot only works for GPU devices."); options_->set("inference", true); options_->set("shuffle", "none"); diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 91f92adad..b3cb5f82f 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -26,7 +26,7 @@ class GPUEngine { Ptr options_; Ptr graph_; std::vector > scorers_; - const marian::DeviceId myDeviceId_; + const DeviceId myDeviceId_; Allocator allocator_; void SwapPointers(std::vector &with); From a893f19fab2d6d57018d4b8b76958290b56da1c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 12 Feb 2021 09:26:57 +0200 Subject: [PATCH 021/135] Start working on code to reproduce a bug i encountered --- src/CMakeLists.txt | 5 +++++ src/command/bug_repro.cpp | 27 +++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) create mode 100644 src/command/bug_repro.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 98d5c4e98..a3a8008e1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -261,6 +261,11 @@ if (NOT COMPILE_LIBRARY_ONLY) add_custom_target(marian_tgz DEPENDS "${CMAKE_BINARY_DIR}/marian.tgz") add_custom_target(philly DEPENDS marian_tgz marian_zip) + add_executable(bug_repro command/bug_repro.cpp) + set_target_properties(bug_repro PROPERTIES OUTPUT_NAME bug_repro) + target_compile_options(bug_repro PRIVATE ${ALL_WARNINGS} -Wno-suggest-override) + set(EXECUTABLES ${EXECUTABLES} bug_repro) + if(COMPILE_SERVER) add_executable(marian_server command/marian_server.cpp) set_target_properties(marian_server PROPERTIES OUTPUT_NAME marian-server) diff --git a/src/command/bug_repro.cpp b/src/command/bug_repro.cpp new file mode 100644 index 000000000..2025249e0 --- /dev/null +++ b/src/command/bug_repro.cpp @@ -0,0 +1,27 @@ +#include "../models/model_factory.h" +#include "../models/model_task.h" +#include "marian.h" + +namespace marian { + +class ReproTask : public marian::ModelServiceTask { +private: + Ptr graph_; + Ptr builder_; // Training model + +public: + ReproTask() { + graph_ = New(); + graph_->setDevice({0, DeviceType::cpu}); + graph_->reserveWorkspaceMB(128); + // builder_ = models::createCriterionFunctionFromOptions(options_, models::usage::training); + } + std::string run(const std::string& json) override { + return ""; + } +}; + +int main(int argc, char **argv) { + return 0; +} +} From 7f6d01e835e95f096ae147c3167a2f1ba1e83496 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 17 Feb 2021 15:16:58 +0200 Subject: [PATCH 022/135] Build the model implement a simplistic training loop This doesn't work though because we're missing a lot of options because we initialize them manually instead of using the config parser. --- src/command/bug_repro.cpp | 89 +++++++++++++++++++++++++++++++++------ 1 file changed, 77 insertions(+), 12 deletions(-) diff --git a/src/command/bug_repro.cpp b/src/command/bug_repro.cpp index 2025249e0..20abfec28 100644 --- a/src/command/bug_repro.cpp +++ b/src/command/bug_repro.cpp @@ -1,27 +1,92 @@ +#include "../common/options.h" +#include "../data/text_input.h" #include "../models/model_factory.h" #include "../models/model_task.h" +#include "../training/scheduler.h" #include "marian.h" namespace marian { -class ReproTask : public marian::ModelServiceTask { -private: - Ptr graph_; - Ptr builder_; // Training model - +class ReproTask : public marian::ModelTask { public: ReproTask() { - graph_ = New(); - graph_->setDevice({0, DeviceType::cpu}); - graph_->reserveWorkspaceMB(128); - // builder_ = models::createCriterionFunctionFromOptions(options_, models::usage::training); } - std::string run(const std::string& json) override { - return ""; + void run() override { + io::InputFileStream strm("/home/rihards/exp/marian-adaptive-crash-repro/models/model.npz.repro.yml"); + YAML::Node optionsNode = YAML::Load(strm); + auto optionsBig = New(optionsNode); + auto options = New("after", "0e"); + options->merge(optionsBig); + auto builder = models::createCriterionFunctionFromOptions(options, models::usage::training); + auto optimizer = Optimizer(0.01); + + std::vector vocabPaths + = {"/home/rihards/exp/marian-adaptive-crash-repro/models/train.1-to-1.bpe.en-lv.yml", + "/home/rihards/exp/marian-adaptive-crash-repro/models/train.1-to-1.bpe.en-lv.yml"}; + std::vector maxVocabs = {500, 500}; + + std::vector> vocabs; + for(size_t i = 0; i < vocabPaths.size(); i++) { + Ptr vocab = New(options, i); + vocab->load(vocabPaths[i], maxVocabs[i]); + vocabs.emplace_back(vocab); + } + std::string sources = "del@@ e@@ tions affecting 13 q 14 are also the most frequent structural genetic ab@@ " + "err@@ ations in chronic lym@@ pho@@ cy@@ tic leu@@ ka@@ emia ( C@@ ll ) 6,@@ 7 , 8 " + ".\nthis region is found to be heter@@ oz@@ y@@ g@@ ously deleted in 30 ¬ 60 % and hom@@ " + "oz@@ y@@ g@@ ously deleted in 10 ¬ 20 % of C@@ ll patien@@ ts@@ 9 ."; + std::string targets + = "del@@ ē@@ cijas , kas ietekmē 13 q 14 , arī ir visbiežāk sastopa@@ mās strukturālās " + "ģenē@@ tiskās ab@@ er@@ ācijas hron@@ iskā lim@@ foc@@ ī@@ tiskajā leik@@ ēm@@ ijā ( " + "H@@ LL ) 6,@@ 7 , 8 .\n30 –@@ 60 % H@@ LL pacientu ir konstatēta šī reģiona heter@@ " + "oz@@ ig@@ ota del@@ ē@@ cija , savukārt 10 –@@ 20 % H@@ LL pacientu ir konstatēta šī " + "reģiona hom@@ oz@@ ig@@ ota del@@ ē@@ c@@ ij@@ a@@ 9 ."; + auto inputs = New(std::vector({sources, targets}), vocabs, options); + auto batches = New>(inputs, options); + + auto state = New(options->get("learn-rate")); + auto scheduler = New(options, state); + scheduler->registerTrainingObserver(scheduler); + scheduler->registerTrainingObserver(optimizer); + + Ptr graph; + + bool first = true; + scheduler->started(); + while(scheduler->keepGoing()) { + batches->prepare(); + + for(auto batch : *batches) { + if(!scheduler->keepGoing()) { + break; + } + + if(first) { + graph = New(); + graph->setDevice({0, DeviceType::cpu}); + graph->reserveWorkspaceMB(128); + first = false; + } + + auto lossNode = builder->build(graph, batch); + graph->forward(); + StaticLoss loss = *lossNode; + graph->backward(); + + optimizer->update(graph); + scheduler->update(loss, batch); + } + + if(scheduler->keepGoing()) + scheduler->increaseEpoch(); + } + scheduler->finished(); } }; +} int main(int argc, char **argv) { + auto task = marian::ReproTask(); + task.run(); return 0; } -} From f4e227e019c4b79d565971f787e8d51a53a9b05c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 17 Feb 2021 17:14:35 +0200 Subject: [PATCH 023/135] Load config using the cli parser so that we can have default values for options --- src/command/bug_repro.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/command/bug_repro.cpp b/src/command/bug_repro.cpp index 20abfec28..f69f5a74f 100644 --- a/src/command/bug_repro.cpp +++ b/src/command/bug_repro.cpp @@ -1,3 +1,4 @@ +#include "../common/config_parser.h" #include "../common/options.h" #include "../data/text_input.h" #include "../models/model_factory.h" @@ -12,11 +13,20 @@ class ReproTask : public marian::ModelTask { ReproTask() { } void run() override { - io::InputFileStream strm("/home/rihards/exp/marian-adaptive-crash-repro/models/model.npz.repro.yml"); - YAML::Node optionsNode = YAML::Load(strm); - auto optionsBig = New(optionsNode); - auto options = New("after", "0e"); - options->merge(optionsBig); + auto parser = ConfigParser(cli::mode::training); + // i'm prob leaking memory at the end of run() but i don't care + const char* argseasy[] + = {"marian", + "-c", + "/home/rihards/exp/marian-adaptive-crash-repro/models/model.npz.repro.yml"}; + int argc = sizeof(argseasy) / sizeof(char*); + // this is as close as i could get to initializing a char** in a sane manner + char** args = new char*[argc]; + for (int i = 0; i < argc; i++) { + args[i] = strdup(argseasy[i]); + } + auto options = parser.parseOptions(argc, args, false); + auto builder = models::createCriterionFunctionFromOptions(options, models::usage::training); auto optimizer = Optimizer(0.01); From dcb71223a180a0fd4cb0176fd9dd2b10b2a640af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 24 Feb 2021 13:11:16 +0200 Subject: [PATCH 024/135] Add dummy values for training sets in the config --- src/command/bug_repro.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/command/bug_repro.cpp b/src/command/bug_repro.cpp index f69f5a74f..6d9c48b7e 100644 --- a/src/command/bug_repro.cpp +++ b/src/command/bug_repro.cpp @@ -18,7 +18,8 @@ class ReproTask : public marian::ModelTask { const char* argseasy[] = {"marian", "-c", - "/home/rihards/exp/marian-adaptive-crash-repro/models/model.npz.repro.yml"}; + "/home/rihards/exp/marian-adaptive-crash-repro/models/model.npz.repro.yml", + "-t", "dummy-value", "-t", "dummy-value"}; int argc = sizeof(argseasy) / sizeof(char*); // this is as close as i could get to initializing a char** in a sane manner char** args = new char*[argc]; From fcb9a61bee28b983b505c4bb7e7531d66eacfb3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 1 Mar 2021 11:06:47 +0200 Subject: [PATCH 025/135] Repeat the graph initialization in a cycle --- src/command/bug_repro.cpp | 66 +++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/src/command/bug_repro.cpp b/src/command/bug_repro.cpp index 6d9c48b7e..10ac3cadb 100644 --- a/src/command/bug_repro.cpp +++ b/src/command/bug_repro.cpp @@ -19,7 +19,11 @@ class ReproTask : public marian::ModelTask { = {"marian", "-c", "/home/rihards/exp/marian-adaptive-crash-repro/models/model.npz.repro.yml", - "-t", "dummy-value", "-t", "dummy-value"}; + "-t", "dummy-value", "-t", "dummy-value", + "--after-batches", "20", + "--after-epochs", "20", + "--learn-rate", "0.1", + "--mini-batch", "1"}; int argc = sizeof(argseasy) / sizeof(char*); // this is as close as i could get to initializing a char** in a sane manner char** args = new char*[argc]; @@ -55,43 +59,45 @@ class ReproTask : public marian::ModelTask { auto inputs = New(std::vector({sources, targets}), vocabs, options); auto batches = New>(inputs, options); - auto state = New(options->get("learn-rate")); - auto scheduler = New(options, state); - scheduler->registerTrainingObserver(scheduler); - scheduler->registerTrainingObserver(optimizer); + for(size_t i = 0; i < 10; i++) { + auto state = New(options->get("learn-rate")); + auto scheduler = New(options, state); + scheduler->registerTrainingObserver(scheduler); + scheduler->registerTrainingObserver(optimizer); - Ptr graph; + Ptr graph; - bool first = true; - scheduler->started(); - while(scheduler->keepGoing()) { - batches->prepare(); + bool first = true; + scheduler->started(); + while(scheduler->keepGoing()) { + batches->prepare(); - for(auto batch : *batches) { - if(!scheduler->keepGoing()) { - break; - } + for(auto batch : *batches) { + if(!scheduler->keepGoing()) { + break; + } - if(first) { - graph = New(); - graph->setDevice({0, DeviceType::cpu}); - graph->reserveWorkspaceMB(128); - first = false; - } + if(first) { + graph = New(); + graph->setDevice({0, DeviceType::cpu}); + graph->reserveWorkspaceMB(128); + first = false; + } - auto lossNode = builder->build(graph, batch); - graph->forward(); - StaticLoss loss = *lossNode; - graph->backward(); + auto lossNode = builder->build(graph, batch); + graph->forward(); + StaticLoss loss = *lossNode; + graph->backward(); - optimizer->update(graph); - scheduler->update(loss, batch); - } + optimizer->update(graph); + scheduler->update(loss, batch); + } - if(scheduler->keepGoing()) - scheduler->increaseEpoch(); + if(scheduler->keepGoing()) + scheduler->increaseEpoch(); + } + scheduler->finished(); } - scheduler->finished(); } }; } From 6560067f96be5bb31140093adefdb521a1f2fbdc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 2 Mar 2021 15:52:20 +0200 Subject: [PATCH 026/135] Add a part of the self adaptive marian's implementation --- CMakeLists.txt | 8 +- src/CMakeLists.txt | 8 + src/command/marian_adaptive.cpp | 66 +++++++ src/common/config_parser.cpp | 98 +++++---- src/common/config_parser.h | 2 +- src/translator/output_collector.cpp | 4 + src/translator/output_collector.h | 15 +- src/translator/self_adaptive.h | 296 ++++++++++++++++++++++++++++ 8 files changed, 453 insertions(+), 44 deletions(-) create mode 100644 src/command/marian_adaptive.cpp create mode 100644 src/translator/self_adaptive.h diff --git a/CMakeLists.txt b/CMakeLists.txt index dffbd1ff2..a1ebded05 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,7 @@ option(COMPILE_CPU "Compile CPU version" ON) option(COMPILE_CUDA "Compile GPU version" ON) option(COMPILE_EXAMPLES "Compile examples" OFF) option(COMPILE_SERVER "Compile marian-server" OFF) +option(COMPILE_ADAPTIVE "Compile marian-ADAPTIVE" OFF) option(COMPILE_TESTS "Compile tests" OFF) option(USE_APPLE_ACCELERATE "Compile with Apple Accelerate" OFF) option(USE_CCACHE "Use ccache compiler cache (https://ccache.dev)" OFF) @@ -464,7 +465,7 @@ endif(COMPILE_CPU) ############################################################################### # Find OpenSSL set(BOOST_COMPONENTS "") -if(COMPILE_SERVER) +if(COMPILE_SERVER OR COMPILE_ADAPTIVE) find_package(OpenSSL) if(OpenSSL_FOUND) message(STATUS "Found OpenSSL") @@ -479,10 +480,11 @@ if(COMPILE_SERVER) endif() set(BOOST_COMPONENTS ${BOOST_COMPONENTS} system) else(OpenSSL_FOUND) - message(WARNING "Cannot find OpenSSL library. Not compiling server.") + message(WARNING "Cannot find OpenSSL library. Not compiling server or marian-adaptive.") set(COMPILE_SERVER "off") + set(COMPILE_ADAPTIVE "off") endif(OpenSSL_FOUND) -endif(COMPILE_SERVER) +endif(COMPILE_SERVER OR COMPILE_ADAPTIVE) ############################################################################### # Undo static lib search and put non-static searches here: diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index a3a8008e1..6084f091e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -246,6 +246,7 @@ if (NOT COMPILE_LIBRARY_ONLY) "${CMAKE_BINARY_DIR}/marian-scorer" "${CMAKE_BINARY_DIR}/marian-vocab" "${CMAKE_BINARY_DIR}/marian-conv" + "${CMAKE_BINARY_DIR}/marian-adaptive" DEPENDS marian_train marian_decoder marian_scorer marian_vocab marian_conv) add_custom_target(marian_zip DEPENDS "${CMAKE_BINARY_DIR}/marian.zip") @@ -257,6 +258,7 @@ if (NOT COMPILE_LIBRARY_ONLY) "marian-scorer" "marian-vocab" "marian-conv" + "marian-adaptive" DEPENDS marian_train marian_decoder marian_scorer marian_vocab marian_conv) add_custom_target(marian_tgz DEPENDS "${CMAKE_BINARY_DIR}/marian.tgz") add_custom_target(philly DEPENDS marian_tgz marian_zip) @@ -279,6 +281,12 @@ if (NOT COMPILE_LIBRARY_ONLY) set(EXECUTABLES ${EXECUTABLES} marian_server) endif(COMPILE_SERVER) + if(COMPILE_ADAPTIVE) + add_executable(marian_adaptive command/marian_adaptive.cpp) + set_target_properties(marian_adaptive PROPERTIES OUTPUT_NAME marian-adaptive) + set(EXECUTABLES ${EXECUTABLES} marian_adaptive) + endif(COMPILE_ADAPTIVE) + foreach(exec ${EXECUTABLES}) target_link_libraries(${exec} marian) if(CUDA_FOUND) diff --git a/src/command/marian_adaptive.cpp b/src/command/marian_adaptive.cpp new file mode 100644 index 000000000..0f64a84ca --- /dev/null +++ b/src/command/marian_adaptive.cpp @@ -0,0 +1,66 @@ +#include "marian.h" + +#include "3rd_party/simple-websocket-server/server_ws.hpp" +#include "common/file_stream.h" +#include "common/timer.h" +#include "common/utils.h" +#include "training/training.h" +#include "translator/self_adaptive.h" + +using namespace marian; + +typedef SimpleWeb::SocketServer WSServer; + +int main(int argc, char **argv) { + auto options = parseOptions(argc, argv, cli::mode::selfadaptive); + auto task = New(options); + + if(options->has("port") && options->get("port") != 0) { + // Initialize web server + WSServer server; + server.config.port = options->get("port", 8080); + + auto &translate = server.endpoint["^/translate/?$"]; + + translate.on_message = [&task](Ptr connection, + Ptr message) { + auto sendStream = std::make_shared(); + + // Get input text + auto inputText = message->string(); + + // Translate + timer::Timer timer; + auto outputText = task->run(inputText); + LOG(info, "Best translation: {}", outputText); + *sendStream << outputText << std::endl; + LOG(info, "Translation took: {:.5f}s", timer.elapsed()); + + // Send translation back + connection->send(sendStream, [](const SimpleWeb::error_code &ec) { + if(ec) + LOG(error, "Error sending message: ({}) {}", ec.value(), ec.message()); + }); + }; + + // Error Codes for error code meanings + // http://www.boost.org/doc/libs/1_55_0/doc/html/boost_asio/reference.html + translate.on_error = [](Ptr connection, const SimpleWeb::error_code &ec) { + LOG(error, "Connection error: ({}) {}", ec.value(), ec.message()); + }; + + // Start server thread + std::thread serverThread([&server]() { + LOG(info, "Server is listening on port {}", server.config.port); + server.start(); + }); + + serverThread.join(); + } else { + timer::Timer timer; + task->run(); + LOG(info, "Total time: {:.5f}s", timer.elapsed()); + } + + return 0; +} diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index b2c73b2b7..f611f9246 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -94,6 +94,11 @@ ConfigParser::ConfigParser(cli::mode mode) case cli::mode::embedding: addOptionsEmbedding(cli_); break; + case cli::mode::selfadaptive: + addOptionsTraining(cli_); + addOptionsValidation(cli_); + addOptionsServer(cli_); + break; default: ABORT("wrong CLI mode"); break; @@ -159,9 +164,11 @@ void ConfigParser::addOptionsGeneral(cli::CLIWrapper& cli) { void ConfigParser::addOptionsServer(cli::CLIWrapper& cli) { // clang-format off auto previous_group = cli.switchGroup("Server options"); + // TODO why is this needed? + size_t defaultPort = mode_ == cli::mode::selfadaptive ? 0 : 8080; cli.add("--port,-p", "Port number for web socket server", - 8080); + defaultPort); cli.switchGroup(previous_group); // clang-format on } @@ -318,7 +325,7 @@ void ConfigParser::addOptionsModel(cli::CLIWrapper& cli) { {1, 2, 3, 4, 5, 6, 7, 8}); #endif - if(mode_ == cli::mode::training) { + if(mode_ == cli::mode::training || mode_ == cli::mode::selfadaptive) { // TODO: add ->range(0,1); cli.add("--dropout-rnn", "Scaling dropout along rnn layers and time (0 = no dropout)"); @@ -370,9 +377,13 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) { #endif // scheduling options + // In self-adaptive mode users would typically want less updates to happen than in regular training + size_t defaultAfterEpochs = (mode_ == cli::mode::selfadaptive) ? 2 : 0; + std::string defaultDispFreq = (mode_ == cli::mode::selfadaptive) ? "1" : "1000u"; + // @TODO: these should be re-defined as aliases for `--after` but the current frame work matches on value, so not doable. cli.add("--after-epochs,-e", - "Finish after this many epochs, 0 is infinity (deprecated, '--after-epochs N' corresponds to '--after Ne')"); // @TODO: replace with alias + "Finish after this many epochs, 0 is infinity (deprecated, '--after-epochs N' corresponds to '--after Ne')", defaultAfterEpochs); // @TODO: replace with alias cli.add("--after-batches", "Finish after this many batch updates, 0 is infinity (deprecated, '--after-batches N' corresponds to '--after Nu')"); // @TODO: replace with alias @@ -381,7 +392,7 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) { "0e"); cli.add("--disp-freq", "Display information every arg updates (append 't' for every arg target labels)", - "1000u"); + defaultDispFreq); cli.add("--disp-first", "Display information for the first arg updates"); cli.add("--disp-label-counts", @@ -401,31 +412,34 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) { addSuboptionsTSV(cli); // data management options - cli.add("--shuffle", - "How to shuffle input data (data: shuffles data and sorted batches; batches: " - "data is read in order into batches, but batches are shuffled; none: no shuffling). " - "Use with '--maxi-batch-sort none' in order to achieve exact reading order", "data"); - cli.add("--no-shuffle", - "Shortcut for backwards compatiblity, equivalent to --shuffle none (deprecated)"); - cli.add("--no-restore-corpus", - "Skip restoring corpus state after training is restarted"); - cli.add("--tempdir,-T", - "Directory for temporary (shuffled) files and database", - "/tmp"); - cli.add("--sqlite", - "Use disk-based sqlite3 database for training corpus storage, default" - " is temporary with path creates persistent storage") - ->implicit_val("temporary"); - cli.add("--sqlite-drop", - "Drop existing tables in sqlite3 database"); + if (mode_ != cli::mode::selfadaptive) { + cli.add("--shuffle", + "How to shuffle input data (data: shuffles data and sorted batches; batches: " + "data is read in order into batches, but batches are shuffled; none: no shuffling). " + "Use with '--maxi-batch-sort none' in order to achieve exact reading order", "data"); + cli.add("--no-shuffle", + "Shortcut for backwards compatiblity, equivalent to --shuffle none (deprecated)"); + cli.add("--no-restore-corpus", + "Skip restoring corpus state after training is restarted"); + cli.add("--tempdir,-T", + "Directory for temporary (shuffled) files and database", + "/tmp"); + cli.add("--sqlite", + "Use disk-based sqlite3 database for training corpus storage, default" + " is temporary with path creates persistent storage") + ->implicit_val("temporary"); + cli.add("--sqlite-drop", + "Drop existing tables in sqlite3 database"); + } addSuboptionsDevices(cli); addSuboptionsBatching(cli); // optimizer options + auto defaultOptimizer = (mode_ == cli::mode::selfadaptive) ? "sgd" : "adam"; cli.add("--optimizer,-o", "Optimization algorithm: sgd, adagrad, adam", - "adam"); + defaultOptimizer); cli.add>("--optimizer-params", "Parameters for optimization algorithm, e.g. betas for Adam. " "Auto-adjusted to --mini-batch-words-ref if given"); @@ -635,8 +649,11 @@ void ConfigParser::addOptionsTranslation(cli::CLIWrapper& cli) { cli.add("--output,-o", "Path to output file, stdout by default", "stdout"); - cli.add>("--vocabs,-v", - "Paths to vocabulary files have to correspond to --input"); + // for self-adaptive mode vocabs are already added via the training options + if(mode_ != cli::mode::selfadaptive) { + cli.add>("--vocabs,-v", + "Paths to vocabulary files have to correspond to --input"); + } // decoding options cli.add("--beam-size,-b", "Beam size used during search with validating translator", @@ -668,16 +685,21 @@ void ConfigParser::addOptionsTranslation(cli::CLIWrapper& cli) { "Keep the output segmented into SentencePiece subwords"); #endif - addSuboptionsInputLength(cli); - addSuboptionsTSV(cli); - addSuboptionsDevices(cli); - addSuboptionsBatching(cli); + if(mode_ != cli::mode::selfadaptive) { + addSuboptionsInputLength(cli); + addSuboptionsTSV(cli); + addSuboptionsDevices(cli); + addSuboptionsBatching(cli); + } - cli.add("--fp16", - "Shortcut for mixed precision inference with float16, corresponds to: --precision float16"); - cli.add>("--precision", - "Mixed precision for inference, set parameter type in expression graph", - {"float32"}); + // for self-adaptive mode vocabs are already added via the training options + if(mode_ != cli::mode::selfadaptive) { + cli.add("--fp16", + "Shortcut for mixed precision inference with float16, corresponds to: --precision float16"); + cli.add>("--precision", + "Mixed precision for inference, set parameter type in expression graph", + {"float32"}); + } cli.add("--skip-cost", "Ignore model cost during translation, not recommended for beam-size > 1"); @@ -695,7 +717,8 @@ void ConfigParser::addOptionsTranslation(cli::CLIWrapper& cli) { "Path to model to swap to."); #if 0 // @TODO: Ask Hany if there are any decoding-time options // add ULR settings - addSuboptionsULR(cli); + if(mode_ != cli::mode::selfadaptive) + addSuboptionsULR(cli); #endif cli.switchGroup(previous_group); @@ -819,8 +842,9 @@ void ConfigParser::addSuboptionsDevices(cli::CLIWrapper& cli) { } void ConfigParser::addSuboptionsBatching(cli::CLIWrapper& cli) { - int defaultMiniBatch = (mode_ == cli::mode::translation) ? 1 : 64; - int defaultMaxiBatch = (mode_ == cli::mode::translation) ? 1 : 100; + bool transMode = mode_ == cli::mode::translation || mode_ == cli::mode::selfadaptive; + int defaultMiniBatch = transMode ? 1 : 64; + int defaultMaxiBatch = transMode ? 1 : 100; std::string defaultMaxiBatchSort = (mode_ == cli::mode::translation) ? "none" : "trg"; // clang-format off @@ -852,7 +876,7 @@ void ConfigParser::addSuboptionsBatching(cli::CLIWrapper& cli) { "Sorting strategy for maxi-batch: none, src, trg (not available for decoder)", defaultMaxiBatchSort); - if(mode_ == cli::mode::training) { + if(mode_ == cli::mode::training || mode_ == cli::mode::selfadaptive) { cli.add("--shuffle-in-ram", "Keep shuffled corpus in RAM, do not write to temp file"); // @TODO: Consider making the next two options options of the vocab instead, to make it more local in scope. diff --git a/src/common/config_parser.h b/src/common/config_parser.h index 18b6eccb7..b6b825d7d 100644 --- a/src/common/config_parser.h +++ b/src/common/config_parser.h @@ -14,7 +14,7 @@ namespace marian { namespace cli { -enum struct mode { training, translation, scoring, server, embedding }; +enum struct mode { training, translation, scoring, server, embedding, selfadaptive }; } // namespace cli /** diff --git a/src/translator/output_collector.cpp b/src/translator/output_collector.cpp index 078be232b..b74a5a54c 100644 --- a/src/translator/output_collector.cpp +++ b/src/translator/output_collector.cpp @@ -81,6 +81,10 @@ void OutputCollector::Write(long sourceId, StringCollector::StringCollector(bool quiet /*=false*/) : maxId_(-1), quiet_(quiet) {} +void StringCollector::Write(long sourceId, const std::string &best1, const std::string &bestn, bool) { + StringCollector::add(sourceId, best1, bestn); +} + void StringCollector::add(long sourceId, const std::string& best1, const std::string& bestn) { diff --git a/src/translator/output_collector.h b/src/translator/output_collector.h index 0e6bfc9f8..4b0c48f13 100644 --- a/src/translator/output_collector.h +++ b/src/translator/output_collector.h @@ -44,7 +44,12 @@ class GeometricPrinting : public PrintingStrategy { long next_{10}; }; -class OutputCollector { +struct CollectorBase { + virtual void Write(long sourceId, const std::string& best1, const std::string& bestn, bool nbest) + = 0; +}; + +class OutputCollector : public CollectorBase { public: OutputCollector(); OutputCollector(std::string outFile); @@ -57,7 +62,7 @@ class OutputCollector { void Write(long sourceId, const std::string& best1, const std::string& bestn, - bool nbest); + bool nbest) override; void setPrintingStrategy(Ptr strategy) { printing_ = strategy; @@ -72,11 +77,15 @@ class OutputCollector { std::mutex mutex_; }; -class StringCollector { +class StringCollector : public CollectorBase { public: StringCollector(bool quiet = false); StringCollector(const StringCollector&) = delete; + void Write(long sourceId, + const std::string& best1, + const std::string& bestn, + bool nbest) override; void add(long sourceId, const std::string& best1, const std::string& bestn); std::vector collect(bool nbest); diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h new file mode 100644 index 000000000..c03a85ea3 --- /dev/null +++ b/src/translator/self_adaptive.h @@ -0,0 +1,296 @@ +#pragma once + +#include "common/config.h" +#include "common/file_stream.h" +#include "data/batch_generator.h" +#include "data/text_input.h" +#include "models/model_task.h" +#include "training/scheduler.h" +#include "training/validator.h" + +namespace marian { + +using namespace data; + +class TrainSetReader { + std::vector> files_; + +public: + TrainSetReader(std::vector paths) { + for(auto& path : paths) + files_.emplace_back(new io::InputFileStream(path)); + } + + std::vector getSamples() { + // extracted lines for source and target corpora + std::vector samples; + // counters of number of lines extracted for source and target + std::vector counts; + + for(auto const& file : files_) { + size_t currCount = 0; + std::string lines; + std::string line; + while(io::getline(*file, line)) { + if(line.empty()) + break; + + if(currCount) + lines += "\n"; + lines += line; + currCount += 1; + } + + if(!lines.empty()) + samples.emplace_back(lines); + counts.push_back(currCount); + + // check if the same number of lines is extracted for source and target + size_t prevCount = counts[0]; + for(size_t i = 1; i < counts.size(); ++i) { + ABORT_IF(prevCount != counts[i], + "An empty source or target sentence has been encountered!"); + prevCount = counts[i]; + } + } + + return samples; + } +}; + +class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { +public: + TrainSelfAdaptive(Ptr options) : options_(options) { + + // @TODO: should probably better re-enable the shuffling related options + // in config for marian-adaptive + options_->set("shuffle", "none"); + // Set up translator options + optionsTrans_ = New(options_->clone()); + optionsTrans_->set("mini-batch", 1); + optionsTrans_->set("maxi-batch", 1); + optionsTrans_->set("max-length", 1000); + optionsTrans_->set("shuffle", "none"); + + // auto deviceId = Config::getDevices(options_)[0]; + + // Initialize model for training + // graph_ = New(); + // graph_->setDevice(deviceId); + // graph_->reserveWorkspaceMB(options_->get("workspace")); + builder_ = models::createCriterionFunctionFromOptions(options_, models::usage::training); + + optimizer_ = Optimizer(options_); + + // Initialize model for translation + Ptr opts = New(); + opts->merge(options_); + opts->set("inference", true); + // builderTrans_ = models::createModelFromOptions(opts, models::usage::translation); + + // Initialize a scorer for translation + auto model = options_->get("model"); + // Ptr scorer = New(builderTrans_, "", 1.0f, model); + // scorers_.push_back(scorer); + + // Read vocabularies + auto vocabPaths = options_->get>("vocabs"); + std::vector maxVocabs = options_->get>("dim-vocabs"); + for(size_t i = 0; i < vocabPaths.size(); ++i) { + Ptr vocab = New(options_, i); + vocab->load(vocabPaths[i], maxVocabs[i]); + vocabs_.emplace_back(vocab); + } + + // Load model + // builder_->load(graph_, model); + } + + std::string run(const std::string& json) override { + //LOG(warn, "REMOVEME Received Json:\n{}", json); + + // Check if input is in JSON + YAML::Node yaml = YAML::Load(json); + if(!yaml["input"]) { + LOG(warn, "No 'input' node found in the request"); + return ""; + } + + // Get input sentences + auto input = yaml["input"].as(); + std::vector> srcVocabs(vocabs_.begin(), vocabs_.end() - 1); + auto testSet = New(std::vector({input}), srcVocabs, optionsTrans_); + + // Prepare batches + auto testBatches = New>(testSet, optionsTrans_); + testBatches->prepare(); + + // Initialize output printing + auto collector = New(); + auto printer = New(options_, vocabs_.back()); + + // Get training sentences + std::vector> contexts; + if(yaml["context"]) + contexts = yaml["context"].as>>(); + + LOG(info, "Running..."); + + size_t id = 0; + for(auto testBatch : *testBatches) { + if(contexts.size() > id && !contexts[id].empty()) { + // train(contexts[id]); + translate(testBatch, collector, printer, graphAdapt_); + } else { + LOG(info, "No context provided for sentence {}", id); + // translate(testBatch, collector, printer, graph_); + } + + // iterating by 1 is quite safe because the mini-batch size for + // translation is always 1 + ++id; + } + + auto translations = collector->collect(options_->get("n-best")); + YAML::Emitter output; + output << YAML::DoubleQuoted << YAML::Flow << utils::join(translations, "\\n"); + return "{\"output\":" + std::string(output.c_str()) + "}"; + } + + void run() override { + // Initialize input data + auto srcPaths = options_->get>("input"); + std::vector> srcVocabs(vocabs_.begin(), vocabs_.end() - 1); + auto testSet = New(srcPaths, srcVocabs, optionsTrans_); + + // Prepare batches + auto testBatches = New>(testSet, optionsTrans_); + testBatches->prepare(); + + // Initialize output printing + auto collector = New(options_->get("output")); + if(options_->get("quiet-translation")) + collector->setPrintingStrategy(New()); + auto printer = New(options_, vocabs_.back()); + + // Initialize train data + auto trainPaths = options_->get>("train-sets"); + auto trainSets = New(trainPaths); + + LOG(info, "Running..."); + + // auto state = New(options_->get("learn-rate")); + // auto scheduler = New(options_, state); + // scheduler->registerTrainingObserver(scheduler); + // scheduler->registerTrainingObserver(optimizer_); + + for(auto testBatch : *testBatches) { + auto trainSet = trainSets->getSamples(); + + if(!trainSet.empty()) { + LOG(info, "### NEW TEST BATCH"); + train(trainSet, nullptr); + translate(testBatch, collector, printer, graphAdapt_); + } else { + LOG(info, "### EMPTY TEST BATCH"); + // translate(testBatch, collector, printer, graph_); + } + } + } + +private: + Ptr options_; // Options for training + Ptr optionsTrans_; // Options for translator + + Ptr builder_; // Training model + // Ptr builderTrans_; // Translation model + // Ptr graph_; // A graph with original parameters + Ptr graphAdapt_; // A graph on which training is performed + + std::vector> vocabs_; + // std::vector> scorers_; + Ptr optimizer_; + + void train(std::vector trainSents, std::shared_ptr _scheduler) { + auto state = New(options_->get("learn-rate")); + auto scheduler = New(options_, state); + scheduler->registerTrainingObserver(scheduler); + scheduler->registerTrainingObserver(optimizer_); + + auto trainSet = New(trainSents, vocabs_, options_); + auto trainBatches = New>(trainSet, options_); + + bool first = true; + + scheduler->started(); + while(scheduler->keepGoing()) { + trainBatches->prepare(); + + LOG(info, "### NEW BATCHES"); + for(auto batch : *trainBatches) { + if(!scheduler->keepGoing()) + break; + + LOG(info, "### NEW BATCH"); + // Copy params from the original model + if(first) { + auto deviceId = Config::getDevices(options_)[0]; + graphAdapt_ = New(); + graphAdapt_->setDevice(deviceId); + graphAdapt_->reserveWorkspaceMB(options_->get("workspace")); + + // builder_->build(graph_, batch); + // graph_->forward(); + + // graphAdapt_ = New(); + // graphAdapt_->setDevice(graph_->getDeviceId()); + // graphAdapt_->reuseWorkspace(graph_); + + // graphAdapt_->copyParams(graph_); + first = false; + } + + // Make an update step on the copy of the model + auto lossNode = builder_->build(graphAdapt_, batch); + graphAdapt_->forward(); + StaticLoss loss = *lossNode; + graphAdapt_->backward(); + + // Notify optimizer and scheduler + optimizer_->update(graphAdapt_); + scheduler->update(loss, batch); + } + if(scheduler->keepGoing()) + scheduler->increaseEpoch(); + } + scheduler->finished(); + } + + void translate(Ptr batch, + Ptr collector, + Ptr printer, + Ptr graph) { + // graph->setInference(true); + // graph->clear(); + + // { + // auto search = New(options_, + // scorers_, + // vocabs_.back()); + // auto histories = search->search(graph, batch); + + // for(auto history : histories) { + // std::stringstream best1; + // std::stringstream bestn; + // printer->print(history, best1, bestn); + // collector->Write(history->getLineNum(), + // best1.str(), + // bestn.str(), + // options_->get("n-best")); + // } + // } + + // graph->setInference(false); + } +}; +} From 7130800012b689a4806efdf4659ee76d67d6baa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 3 Mar 2021 15:29:56 +0200 Subject: [PATCH 027/135] Fix compatability issues with some new refactors in master --- src/command/bug_repro.cpp | 4 ++-- src/common/config_validator.cpp | 5 +++++ src/translator/self_adaptive.h | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/command/bug_repro.cpp b/src/command/bug_repro.cpp index 10ac3cadb..f37c47add 100644 --- a/src/command/bug_repro.cpp +++ b/src/command/bug_repro.cpp @@ -33,7 +33,7 @@ class ReproTask : public marian::ModelTask { auto options = parser.parseOptions(argc, args, false); auto builder = models::createCriterionFunctionFromOptions(options, models::usage::training); - auto optimizer = Optimizer(0.01); + auto optimizer = Optimizer(New("optimizer", "adam", "learn-rate", 0.01)); std::vector vocabPaths = {"/home/rihards/exp/marian-adaptive-crash-repro/models/train.1-to-1.bpe.en-lv.yml", @@ -89,7 +89,7 @@ class ReproTask : public marian::ModelTask { StaticLoss loss = *lossNode; graph->backward(); - optimizer->update(graph); + optimizer->update(graph, 1); scheduler->update(loss, batch); } diff --git a/src/common/config_validator.cpp b/src/common/config_validator.cpp index b24001450..92c86c553 100644 --- a/src/common/config_validator.cpp +++ b/src/common/config_validator.cpp @@ -35,6 +35,11 @@ void ConfigValidator::validateOptions(cli::mode mode) const { validateOptionsParallelData(); validateOptionsTraining(); break; + case cli::mode::selfadaptive: + validateOptionsTranslation(); + validateOptionsParallelData(); + validateOptionsTraining(); + break; default: ABORT("wrong CLI mode"); break; diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index c03a85ea3..318acff6a 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -257,7 +257,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { graphAdapt_->backward(); // Notify optimizer and scheduler - optimizer_->update(graphAdapt_); + optimizer_->update(graphAdapt_, 1); scheduler->update(loss, batch); } if(scheduler->keepGoing()) From 10cdffab9cd891eb69aada7df8df6870119df240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 4 Mar 2021 17:12:29 +0200 Subject: [PATCH 028/135] Fix options parsing issues --- src/common/config_parser.cpp | 12 ++++++------ src/common/config_validator.cpp | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index f611f9246..79d0b7d1e 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -96,7 +96,7 @@ ConfigParser::ConfigParser(cli::mode mode) break; case cli::mode::selfadaptive: addOptionsTraining(cli_); - addOptionsValidation(cli_); + addOptionsTranslation(cli_); addOptionsServer(cli_); break; default: @@ -646,11 +646,11 @@ void ConfigParser::addOptionsTranslation(cli::CLIWrapper& cli) { cli.add>("--input,-i", "Paths to input file(s), stdin by default", {"stdin"}); - cli.add("--output,-o", - "Path to output file, stdout by default", - "stdout"); - // for self-adaptive mode vocabs are already added via the training options + // for self-adaptive mode these are already added via the training options if(mode_ != cli::mode::selfadaptive) { + cli.add("--output,-o", + "Path to output file, stdout by default", + "stdout"); cli.add>("--vocabs,-v", "Paths to vocabulary files have to correspond to --input"); } @@ -723,7 +723,7 @@ void ConfigParser::addOptionsTranslation(cli::CLIWrapper& cli) { cli.switchGroup(previous_group); // clang-format on -} + } void ConfigParser::addOptionsScoring(cli::CLIWrapper& cli) { auto previous_group = cli.switchGroup("Scorer options"); diff --git a/src/common/config_validator.cpp b/src/common/config_validator.cpp index 92c86c553..cbb8c3d86 100644 --- a/src/common/config_validator.cpp +++ b/src/common/config_validator.cpp @@ -36,9 +36,9 @@ void ConfigValidator::validateOptions(cli::mode mode) const { validateOptionsTraining(); break; case cli::mode::selfadaptive: - validateOptionsTranslation(); - validateOptionsParallelData(); - validateOptionsTraining(); + // validateOptionsTranslation(); + // validateOptionsParallelData(); + // validateOptionsTraining(); break; default: ABORT("wrong CLI mode"); From 286a23c8e5f6426f7bf8abfb06b9fbcc308920a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 10 Mar 2021 17:20:25 +0200 Subject: [PATCH 029/135] Fix remaining input parsing issues --- src/common/config_parser.cpp | 20 +++++++++++++++----- src/common/config_parser.h | 1 + 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index 79d0b7d1e..b46b6a6e7 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -98,6 +98,7 @@ ConfigParser::ConfigParser(cli::mode mode) addOptionsTraining(cli_); addOptionsTranslation(cli_); addOptionsServer(cli_); + addOptionsStupid(cli_); break; default: ABORT("wrong CLI mode"); @@ -108,7 +109,16 @@ ConfigParser::ConfigParser(cli::mode mode) // clang-format on } -void ConfigParser::addOptionsGeneral(cli::CLIWrapper& cli) { +void ConfigParser::addOptionsStupid(cli::CLIWrapper & cli) { + auto previous_group = cli.switchGroup("Server options"); + cli.add( + "--early-stopping", + "Stop if the first validation metric does not improve for arg consecutive validation steps", + 10); + cli.switchGroup(previous_group); +} + +void ConfigParser::addOptionsGeneral(cli::CLIWrapper & cli) { int defaultWorkspace = (mode_ == cli::mode::translation) ? 512 : 2048; cli.switchGroup("General options"); @@ -437,7 +447,7 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) { // optimizer options auto defaultOptimizer = (mode_ == cli::mode::selfadaptive) ? "sgd" : "adam"; - cli.add("--optimizer,-o", + cli.add("--optimizer", "Optimization algorithm: sgd, adagrad, adam", defaultOptimizer); cli.add>("--optimizer-params", @@ -646,11 +656,11 @@ void ConfigParser::addOptionsTranslation(cli::CLIWrapper& cli) { cli.add>("--input,-i", "Paths to input file(s), stdin by default", {"stdin"}); + cli.add("--output,-o", + "Path to output file, stdout by default", + "stdout"); // for self-adaptive mode these are already added via the training options if(mode_ != cli::mode::selfadaptive) { - cli.add("--output,-o", - "Path to output file, stdout by default", - "stdout"); cli.add>("--vocabs,-v", "Paths to vocabulary files have to correspond to --input"); } diff --git a/src/common/config_parser.h b/src/common/config_parser.h index b6b825d7d..744656458 100644 --- a/src/common/config_parser.h +++ b/src/common/config_parser.h @@ -130,6 +130,7 @@ class ConfigParser { void addOptionsTranslation(cli::CLIWrapper&); void addOptionsScoring(cli::CLIWrapper&); void addOptionsEmbedding(cli::CLIWrapper&); + void addOptionsStupid(cli::CLIWrapper&); void addAliases(cli::CLIWrapper&); From 85685c640dbc663ede34d0a1f06fd8cabef399cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 19 Mar 2021 13:48:06 +0200 Subject: [PATCH 030/135] Re-enable all of the adaptive code --- src/translator/self_adaptive.h | 106 ++++++++++++++++----------------- 1 file changed, 50 insertions(+), 56 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 318acff6a..e45602be6 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -72,12 +72,12 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { optionsTrans_->set("max-length", 1000); optionsTrans_->set("shuffle", "none"); - // auto deviceId = Config::getDevices(options_)[0]; + auto deviceId = Config::getDevices(options_)[0]; // Initialize model for training - // graph_ = New(); - // graph_->setDevice(deviceId); - // graph_->reserveWorkspaceMB(options_->get("workspace")); + graph_ = New(); + graph_->setDevice(deviceId); + graph_->reserveWorkspaceMB(options_->get("workspace")); builder_ = models::createCriterionFunctionFromOptions(options_, models::usage::training); optimizer_ = Optimizer(options_); @@ -86,12 +86,12 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { Ptr opts = New(); opts->merge(options_); opts->set("inference", true); - // builderTrans_ = models::createModelFromOptions(opts, models::usage::translation); + builderTrans_ = models::createModelFromOptions(opts, models::usage::translation); // Initialize a scorer for translation auto model = options_->get("model"); - // Ptr scorer = New(builderTrans_, "", 1.0f, model); - // scorers_.push_back(scorer); + Ptr scorer = New(builderTrans_, "", 1.0f, model); + scorers_.push_back(scorer); // Read vocabularies auto vocabPaths = options_->get>("vocabs"); @@ -103,7 +103,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { } // Load model - // builder_->load(graph_, model); + builder_->load(graph_, model); } std::string run(const std::string& json) override { @@ -139,11 +139,11 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { size_t id = 0; for(auto testBatch : *testBatches) { if(contexts.size() > id && !contexts[id].empty()) { - // train(contexts[id]); + train(contexts[id]); translate(testBatch, collector, printer, graphAdapt_); } else { LOG(info, "No context provided for sentence {}", id); - // translate(testBatch, collector, printer, graph_); + translate(testBatch, collector, printer, graph_); } // iterating by 1 is quite safe because the mini-batch size for @@ -179,21 +179,16 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { LOG(info, "Running..."); - // auto state = New(options_->get("learn-rate")); - // auto scheduler = New(options_, state); - // scheduler->registerTrainingObserver(scheduler); - // scheduler->registerTrainingObserver(optimizer_); - for(auto testBatch : *testBatches) { auto trainSet = trainSets->getSamples(); if(!trainSet.empty()) { - LOG(info, "### NEW TEST BATCH"); - train(trainSet, nullptr); + LOG(info, "# NEW TEST BATCH"); + train(trainSet); translate(testBatch, collector, printer, graphAdapt_); } else { - LOG(info, "### EMPTY TEST BATCH"); - // translate(testBatch, collector, printer, graph_); + LOG(info, "# EMPTY TEST BATCH"); + translate(testBatch, collector, printer, graph_); } } } @@ -203,15 +198,15 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { Ptr optionsTrans_; // Options for translator Ptr builder_; // Training model - // Ptr builderTrans_; // Translation model - // Ptr graph_; // A graph with original parameters + Ptr builderTrans_; // Translation model + Ptr graph_; // A graph with original parameters Ptr graphAdapt_; // A graph on which training is performed std::vector> vocabs_; - // std::vector> scorers_; + std::vector> scorers_; Ptr optimizer_; - void train(std::vector trainSents, std::shared_ptr _scheduler) { + void train(std::vector trainSents) { auto state = New(options_->get("learn-rate")); auto scheduler = New(options_, state); scheduler->registerTrainingObserver(scheduler); @@ -226,7 +221,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { while(scheduler->keepGoing()) { trainBatches->prepare(); - LOG(info, "### NEW BATCHES"); + LOG(info, "## NEW BATCHES"); for(auto batch : *trainBatches) { if(!scheduler->keepGoing()) break; @@ -234,19 +229,18 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { LOG(info, "### NEW BATCH"); // Copy params from the original model if(first) { - auto deviceId = Config::getDevices(options_)[0]; - graphAdapt_ = New(); - graphAdapt_->setDevice(deviceId); - graphAdapt_->reserveWorkspaceMB(options_->get("workspace")); + builder_->build(graph_, batch); + // TODO: Why do we need to do a froward pass here? + graph_->forward(); - // builder_->build(graph_, batch); - // graph_->forward(); - - // graphAdapt_ = New(); - // graphAdapt_->setDevice(graph_->getDeviceId()); - // graphAdapt_->reuseWorkspace(graph_); + graphAdapt_ = New(); + graphAdapt_->setDevice(graph_->getDeviceId()); + graphAdapt_->reuseWorkspace(graph_); - // graphAdapt_->copyParams(graph_); + // TODO: why aren't we using a builder before this? + // it's probably because the order doesn't matter and the + // builder is used below + graphAdapt_->copyParams(graph_); first = false; } @@ -270,27 +264,27 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { Ptr collector, Ptr printer, Ptr graph) { - // graph->setInference(true); - // graph->clear(); - - // { - // auto search = New(options_, - // scorers_, - // vocabs_.back()); - // auto histories = search->search(graph, batch); - - // for(auto history : histories) { - // std::stringstream best1; - // std::stringstream bestn; - // printer->print(history, best1, bestn); - // collector->Write(history->getLineNum(), - // best1.str(), - // bestn.str(), - // options_->get("n-best")); - // } - // } - - // graph->setInference(false); + graph->setInference(true); + graph->clear(); + + { + auto search = New(options_, + scorers_, + vocabs_.back()); + auto histories = search->search(graph, batch); + + for(auto history : histories) { + std::stringstream best1; + std::stringstream bestn; + printer->print(history, best1, bestn); + collector->Write(history->getLineNum(), + best1.str(), + bestn.str(), + options_->get("n-best")); + } + } + + graph->setInference(false); } }; } From fca5fe4b5e1e1a5dedbd0ff66dc838eb89424176 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 19 Mar 2021 15:04:24 +0200 Subject: [PATCH 031/135] Some further debugging, ugh --- src/translator/self_adaptive.h | 62 +++++++++++++++++++--------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index e45602be6..1582bb997 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -89,7 +89,8 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { builderTrans_ = models::createModelFromOptions(opts, models::usage::translation); // Initialize a scorer for translation - auto model = options_->get("model"); + // auto model = options_->get("model"); + model = options_->get("model"); Ptr scorer = New(builderTrans_, "", 1.0f, model); scorers_.push_back(scorer); @@ -201,6 +202,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { Ptr builderTrans_; // Translation model Ptr graph_; // A graph with original parameters Ptr graphAdapt_; // A graph on which training is performed + std::string model; std::vector> vocabs_; std::vector> scorers_; @@ -229,18 +231,22 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { LOG(info, "### NEW BATCH"); // Copy params from the original model if(first) { - builder_->build(graph_, batch); - // TODO: Why do we need to do a froward pass here? - graph_->forward(); + // builder_->build(graph_, batch); + // // TODO: Why do we need to do a froward pass here? + // graph_->forward(); graphAdapt_ = New(); - graphAdapt_->setDevice(graph_->getDeviceId()); - graphAdapt_->reuseWorkspace(graph_); + // graphAdapt_->setDevice(graph_->getDeviceId()); + auto deviceId = Config::getDevices(options_)[0]; + graphAdapt_->setDevice(deviceId); + // graphAdapt_->reuseWorkspace(graph_); + graphAdapt_->reserveWorkspaceMB(options_->get("workspace")); // TODO: why aren't we using a builder before this? // it's probably because the order doesn't matter and the // builder is used below - graphAdapt_->copyParams(graph_); + // graphAdapt_->copyParams(graph_); + // builder_->load(graphAdapt_, model); first = false; } @@ -264,27 +270,27 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { Ptr collector, Ptr printer, Ptr graph) { - graph->setInference(true); - graph->clear(); - - { - auto search = New(options_, - scorers_, - vocabs_.back()); - auto histories = search->search(graph, batch); - - for(auto history : histories) { - std::stringstream best1; - std::stringstream bestn; - printer->print(history, best1, bestn); - collector->Write(history->getLineNum(), - best1.str(), - bestn.str(), - options_->get("n-best")); - } - } - - graph->setInference(false); + // graph->setInference(true); + // graph->clear(); + + // { + // auto search = New(options_, + // scorers_, + // vocabs_.back()); + // auto histories = search->search(graph, batch); + + // for(auto history : histories) { + // std::stringstream best1; + // std::stringstream bestn; + // printer->print(history, best1, bestn); + // collector->Write(history->getLineNum(), + // best1.str(), + // bestn.str(), + // options_->get("n-best")); + // } + // } + + // graph->setInference(false); } }; } From 37b6aa4d0d91de42ddaa41073271cc6c2a3946a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 29 Mar 2021 11:33:44 +0300 Subject: [PATCH 032/135] Fix the way inputs are initialized --- src/command/bug_repro.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/command/bug_repro.cpp b/src/command/bug_repro.cpp index f37c47add..ee9bc905e 100644 --- a/src/command/bug_repro.cpp +++ b/src/command/bug_repro.cpp @@ -21,8 +21,9 @@ class ReproTask : public marian::ModelTask { "/home/rihards/exp/marian-adaptive-crash-repro/models/model.npz.repro.yml", "-t", "dummy-value", "-t", "dummy-value", "--after-batches", "20", - "--after-epochs", "20", + "--after-epochs", "4", "--learn-rate", "0.1", + "--shuffle", "none", "--mini-batch", "1"}; int argc = sizeof(argseasy) / sizeof(char*); // this is as close as i could get to initializing a char** in a sane manner @@ -56,8 +57,8 @@ class ReproTask : public marian::ModelTask { "H@@ LL ) 6,@@ 7 , 8 .\n30 –@@ 60 % H@@ LL pacientu ir konstatēta šī reģiona heter@@ " "oz@@ ig@@ ota del@@ ē@@ cija , savukārt 10 –@@ 20 % H@@ LL pacientu ir konstatēta šī " "reģiona hom@@ oz@@ ig@@ ota del@@ ē@@ c@@ ij@@ a@@ 9 ."; - auto inputs = New(std::vector({sources, targets}), vocabs, options); - auto batches = New>(inputs, options); + // auto inputs = New(std::vector({sources, targets}), vocabs, options); + // auto batches = New>(inputs, options); for(size_t i = 0; i < 10; i++) { auto state = New(options->get("learn-rate")); @@ -70,6 +71,11 @@ class ReproTask : public marian::ModelTask { bool first = true; scheduler->started(); while(scheduler->keepGoing()) { + // if inputs aren't initialized for each epoch, their internal istringstreams get exhausted + auto inputs + = New(std::vector({sources, targets}), vocabs, options); + auto batches = New>(inputs, options); + // auto batches = New>(inputs, options); batches->prepare(); for(auto batch : *batches) { From f67015e9a9233f81e68b537aa58bb18843e48557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 30 Mar 2021 10:08:34 +0300 Subject: [PATCH 033/135] Output graphviz graphs for the training graph --- src/command/bug_repro.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/command/bug_repro.cpp b/src/command/bug_repro.cpp index ee9bc905e..a7f63e54b 100644 --- a/src/command/bug_repro.cpp +++ b/src/command/bug_repro.cpp @@ -61,6 +61,7 @@ class ReproTask : public marian::ModelTask { // auto batches = New>(inputs, options); for(size_t i = 0; i < 10; i++) { + LOG(info, "# NEW OUTER ITER"); auto state = New(options->get("learn-rate")); auto scheduler = New(options, state); scheduler->registerTrainingObserver(scheduler); @@ -70,7 +71,12 @@ class ReproTask : public marian::ModelTask { bool first = true; scheduler->started(); + + graph = New(); + graph->setDevice({0, DeviceType::cpu}); + graph->reserveWorkspaceMB(128); while(scheduler->keepGoing()) { + LOG(info, "## NEW INNER ITER"); // if inputs aren't initialized for each epoch, their internal istringstreams get exhausted auto inputs = New(std::vector({sources, targets}), vocabs, options); @@ -79,18 +85,16 @@ class ReproTask : public marian::ModelTask { batches->prepare(); for(auto batch : *batches) { + LOG(info, "### NEW BATCH"); if(!scheduler->keepGoing()) { break; } - if(first) { - graph = New(); - graph->setDevice({0, DeviceType::cpu}); - graph->reserveWorkspaceMB(128); + auto lossNode = builder->build(graph, batch); + if (first) { + graph->graphviz("graph-" + std::to_string(i) + ".gv"); first = false; } - - auto lossNode = builder->build(graph, batch); graph->forward(); StaticLoss loss = *lossNode; graph->backward(); From 78bcce1b93abc20cbd991fda11f9d4e900135779 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 31 Mar 2021 11:00:22 +0300 Subject: [PATCH 034/135] Fix the segfault in the repro by moving the builder inside the loop Turns out that the builder is maintaining some internal state and for some reason it messes everything up when building a new expressino graph the second time. Symptoms are 1. the node ids in the graph are incremented by a constant amount in the second expression graph 2. the graphviz diagram for the second graph is all messed up (edges missing and such) --- src/command/bug_repro.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/command/bug_repro.cpp b/src/command/bug_repro.cpp index a7f63e54b..86464ff77 100644 --- a/src/command/bug_repro.cpp +++ b/src/command/bug_repro.cpp @@ -33,7 +33,7 @@ class ReproTask : public marian::ModelTask { } auto options = parser.parseOptions(argc, args, false); - auto builder = models::createCriterionFunctionFromOptions(options, models::usage::training); + // auto builder = models::createCriterionFunctionFromOptions(options, models::usage::training); auto optimizer = Optimizer(New("optimizer", "adam", "learn-rate", 0.01)); std::vector vocabPaths @@ -62,6 +62,7 @@ class ReproTask : public marian::ModelTask { for(size_t i = 0; i < 10; i++) { LOG(info, "# NEW OUTER ITER"); + auto builder = models::createCriterionFunctionFromOptions(options, models::usage::training); auto state = New(options->get("learn-rate")); auto scheduler = New(options, state); scheduler->registerTrainingObserver(scheduler); From 162a17c488a71e2a6ccd00bf60e7d26a16a30e3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 31 Mar 2021 11:24:27 +0300 Subject: [PATCH 035/135] Move the builder initialization inside run() to fix the segfault Turns out that the builder is maintaining some internal state and for some reason it messes everything up when building a new expression graph the second time. The symptoms are 1. the node ids in the graph are incremented by a constant amount in the second expression graph 2. the graphviz diagram for the second graph is all messed up (edges missing and such) This is meant as a workaround, it seems to be quite inefficient --- src/translator/self_adaptive.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 1582bb997..82dcf6239 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -78,7 +78,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { graph_ = New(); graph_->setDevice(deviceId); graph_->reserveWorkspaceMB(options_->get("workspace")); - builder_ = models::createCriterionFunctionFromOptions(options_, models::usage::training); + // builder_ = models::createCriterionFunctionFromOptions(options_, models::usage::training); optimizer_ = Optimizer(options_); @@ -104,7 +104,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { } // Load model - builder_->load(graph_, model); + // builder_->load(graph_, model); } std::string run(const std::string& json) override { @@ -231,6 +231,9 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { LOG(info, "### NEW BATCH"); // Copy params from the original model if(first) { + builder_ = models::createCriterionFunctionFromOptions(options_, models::usage::training); + builder_->load(graph_, model); + // builder_->build(graph_, batch); // // TODO: Why do we need to do a froward pass here? // graph_->forward(); From de49880ac8cf50ab975b579d2d5794260da6f130 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 1 Apr 2021 11:37:17 +0300 Subject: [PATCH 036/135] Use a dedicated builder for the adaptive graph to avoid segfaults This moves crashing further down the line - the crash now happens upon translation with the adaptive graph --- src/translator/self_adaptive.h | 56 ++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 82dcf6239..08a16e0d9 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -78,7 +78,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { graph_ = New(); graph_->setDevice(deviceId); graph_->reserveWorkspaceMB(options_->get("workspace")); - // builder_ = models::createCriterionFunctionFromOptions(options_, models::usage::training); + builder_ = models::createCriterionFunctionFromOptions(options_, models::usage::training); optimizer_ = Optimizer(options_); @@ -104,7 +104,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { } // Load model - // builder_->load(graph_, model); + builder_->load(graph_, model); } std::string run(const std::string& json) override { @@ -199,6 +199,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { Ptr optionsTrans_; // Options for translator Ptr builder_; // Training model + Ptr secondBuilder_; // To not get a segfault when training model else could just use builder_ Ptr builderTrans_; // Translation model Ptr graph_; // A graph with original parameters Ptr graphAdapt_; // A graph on which training is performed @@ -231,8 +232,9 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { LOG(info, "### NEW BATCH"); // Copy params from the original model if(first) { - builder_ = models::createCriterionFunctionFromOptions(options_, models::usage::training); - builder_->load(graph_, model); + secondBuilder_ + = models::createCriterionFunctionFromOptions(options_, models::usage::training); + // secondBuilder->load(graph_, model); // builder_->build(graph_, batch); // // TODO: Why do we need to do a froward pass here? @@ -249,12 +251,12 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { // it's probably because the order doesn't matter and the // builder is used below // graphAdapt_->copyParams(graph_); - // builder_->load(graphAdapt_, model); + secondBuilder_->load(graphAdapt_, model); first = false; } // Make an update step on the copy of the model - auto lossNode = builder_->build(graphAdapt_, batch); + auto lossNode = secondBuilder_->build(graphAdapt_, batch); graphAdapt_->forward(); StaticLoss loss = *lossNode; graphAdapt_->backward(); @@ -273,27 +275,27 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { Ptr collector, Ptr printer, Ptr graph) { - // graph->setInference(true); - // graph->clear(); - - // { - // auto search = New(options_, - // scorers_, - // vocabs_.back()); - // auto histories = search->search(graph, batch); - - // for(auto history : histories) { - // std::stringstream best1; - // std::stringstream bestn; - // printer->print(history, best1, bestn); - // collector->Write(history->getLineNum(), - // best1.str(), - // bestn.str(), - // options_->get("n-best")); - // } - // } - - // graph->setInference(false); + graph->setInference(true); + graph->clear(); + + { + auto search = New(options_, + scorers_, + vocabs_.back()); + auto histories = search->search(graph, batch); + + for(auto history : histories) { + std::stringstream best1; + std::stringstream bestn; + printer->print(history, best1, bestn); + collector->Write(history->getLineNum(), + best1.str(), + bestn.str(), + options_->get("n-best")); + } + } + + graph->setInference(false); } }; } From 29415c71f892d65ee7a025617242fee3dcb25bf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 19 Apr 2021 11:32:36 +0300 Subject: [PATCH 037/135] Make a copy of all the swappable stuff to later adjust for training --- src/translator/swappable.cpp | 170 +++++++++++++++++++++++++++++------ src/translator/swappable.h | 77 ++++++++++++++++ 2 files changed, 222 insertions(+), 25 deletions(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index c5c2bae05..803fb352e 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -11,8 +11,33 @@ #include "tensors/gpu/swap.h" namespace marian { +std::string MultilineInputHack(const std::vector &input) { + if (input.size() == 1) { + return input[0]; + } else { + std::string ret; + std::size_t size = 0; + for (auto&& line : input) { + size += line.size() + 1; + } + ret.reserve(size); + for (auto&& line : input) { + ret.append(line); + ret.append("\n"); + } + return ret; + } +} -void GPUEngine::SwapPointers(std::vector &with) { +namespace { + DeviceId LookupGPU(const Ptr options, size_t deviceIdx) { + auto devices = Config::getDevices(options); + ABORT_IF(deviceIdx >= devices.size(), "GPU device index higher than configured."); + return devices[deviceIdx]; + } +} // namespace + +void GPUEngineTrain::SwapPointers(std::vector &with) { auto write_it = graph_->params()->begin(); auto read_it = with.begin(); for (; read_it != with.end(); ++write_it, ++read_it) { @@ -20,13 +45,126 @@ void GPUEngine::SwapPointers(std::vector &with) { } } -namespace { -DeviceId LookupGPU(const Ptr options, size_t deviceIdx) { - auto devices = Config::getDevices(options); - ABORT_IF(deviceIdx >= devices.size(), "GPU device index higher than configured."); - return devices[deviceIdx]; +GPUEngineTrain::GPUEngineTrain(Ptr options, size_t deviceIdx) + : options_(options), graph_(New()), myDeviceId_(LookupGPU(options, deviceIdx)), allocator_(myDeviceId_, 0, 128 * 1048576) { + ABORT_IF(myDeviceId_.type == DeviceType::cpu, "Swappable slot only works for GPU devices."); + options_->set("inference", true); + options_->set("shuffle", "none"); + + // Create graph + auto prec = options_->get>("precision", {"float32"}); + graph_->setDefaultElementType(typeFromString(prec[0])); + graph_->setDevice(myDeviceId_); + graph_->reserveWorkspaceMB(options_->get("workspace")); + + scorers_ = createScorers(options_); + for (auto scorer : scorers_) { + scorer->init(graph_); + // TODO lexical shortlists are not supported yet. + } + graph_->forward(); + // TODO: reach into graph_->params() private members and free the parameter memory. +} + +GPUEngineTrain::~GPUEngineTrain() {} + +GPULoadedModelTrain::GPULoadedModelTrain(Ptr gpu) : engine_(gpu) { + for (auto ¶m : *engine_->graph_->params()) { + parameters_.push_back(engine_->allocator_.alloc(param->val()->memory()->size())); + } +} + +GPULoadedModelTrain::~GPULoadedModelTrain() { + for (MemoryPiece::PtrType &p : parameters_) { + engine_->allocator_.free(p); + } +} + +void GPULoadedModelTrain::Load(const GPULoadedModelTrain &from) { + srcVocabs_ = from.srcVocabs_; + trgVocab_ = from.trgVocab_; + + ABORT_IF(engine_ != from.engine_, "TODO: copy across GPUs."); + + for (size_t i = 0; i < parameters_.size(); ++i) { + swapper::copyGpuToGpu(reinterpret_cast(parameters_[i]->data()), reinterpret_cast(from.parameters_[i]->data()), parameters_[i]->size(), engine_->myDeviceId_); + } +} + +void GPULoadedModelTrain::Load(const CPULoadedModelTrain &from) { + srcVocabs_ = from.SrcVocabs(); + trgVocab_ = from.TrgVocab(); + for (size_t i = 0; i < parameters_.size(); ++i) { + swapper::copyCpuToGpu(reinterpret_cast(parameters_[i]->data()), from.Parameters()[i].data(), from.Parameters()[i].size(), engine_->myDeviceId_); + } +} + +Histories GPULoadedModelTrain::Translate(const std::vector &input) { + ABORT_IF(!trgVocab_, "GPULoadedModelTrain needs to be overwritten by a CPU model first."); + engine_->SwapPointers(parameters_); + + auto corpus = New(std::vector(1, MultilineInputHack(input)), srcVocabs_, engine_->options_); // @TODO dirty hack + data::BatchGenerator batchGenerator(corpus, engine_->options_, nullptr, false); // @TODO if the asynchronous batch preparation = true, but we supply less text than the mini-batch size we crash + + BeamSearch search(engine_->options_, engine_->scorers_, trgVocab_); + Histories ret; + ret.reserve(input.size()); + for (auto&& batch : batchGenerator) { + auto result = search.search(engine_->graph_, batch); + ret.insert(ret.end(), result.begin(), result.end()); + } + std::sort(ret.begin(), ret.end(),[](marian::Ptr a, marian::Ptr b){return a->getLineNum() < b->getLineNum();}); + engine_->SwapPointers(parameters_); + return ret; +} + +CPULoadedModelTrain::CPULoadedModelTrain(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath) + : parameters_(io::loadItems(parameters)) { + // Load parameters. + // Find the special element and remove it: + size_t special_idx = 0; + for (size_t i = 0; i < parameters_.size(); i++) { + if (parameters_[i].name == "special:model.yml") { + special_idx = i; + break; + } + } + parameters_.erase(parameters_.begin() + special_idx); + // Prepare the name so that it matches the named map + for (auto&& item : parameters_) { + item.name = "F0::" + item.name; + } + // Sort by name to match params order. + std::sort(parameters_.begin(), parameters_.end(), [](const io::Item &a, const io::Item &b){return a.name < b.name;}); + + // Load source vocabs. + const std::vector &maxVocabs = options->get>("dim-vocabs"); + for(size_t i = 0; i < sourceVocabPaths.size(); ++i) { + Ptr vocab = New(options, i); + vocab->load(sourceVocabPaths[i], maxVocabs[i]); + srcVocabs_.emplace_back(vocab); + } + + // Load target vocab. + trgVocab_ = New(options, sourceVocabPaths.size()); + trgVocab_->load(targetVocabPath); +} + + + + // ##### ^ above is stuff for runtime domain adaptation + + + + + +void GPUEngine::SwapPointers(std::vector &with) { + auto write_it = graph_->params()->begin(); + auto read_it = with.begin(); + for (; read_it != with.end(); ++write_it, ++read_it) { + std::swap(*(*write_it)->val()->memory(), **read_it); + } } -} // namespace GPUEngine::GPUEngine(Ptr options, size_t deviceIdx) : options_(options), graph_(New()), myDeviceId_(LookupGPU(options, deviceIdx)), allocator_(myDeviceId_, 0, 128 * 1048576) { @@ -82,24 +220,6 @@ void GPULoadedModel::Load(const CPULoadedModel &from) { } } -std::string MultilineInputHack(const std::vector &input) { - if (input.size() == 1) { - return input[0]; - } else { - std::string ret; - std::size_t size = 0; - for (auto&& line : input) { - size += line.size() + 1; - } - ret.reserve(size); - for (auto&& line : input) { - ret.append(line); - ret.append("\n"); - } - return ret; - } -} - Histories GPULoadedModel::Translate(const std::vector &input) { ABORT_IF(!trgVocab_, "GPULoadedModel needs to be overwritten by a CPU model first."); engine_->SwapPointers(parameters_); diff --git a/src/translator/swappable.h b/src/translator/swappable.h index b3cb5f82f..4b525c580 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -14,11 +14,88 @@ #include namespace marian { +class GPULoadedModelTrain; +class CPULoadedModelTrain; + class Scorer; class GPULoadedModel; class CPULoadedModel; + +/* Execute on a particular device */ +class GPUEngineTrain { +private: + friend class GPULoadedModelTrain; + Ptr options_; + Ptr graph_; + std::vector > scorers_; + const DeviceId myDeviceId_; + Allocator allocator_; + + void SwapPointers(std::vector &with); + +public: + /** + * @param options The marian options object + * @param deviceNum The index of the device you want to use for this slot. Note that this is not the deviceID but the index of the device in the + * array of supplied devices. Eg if you provide -d 0 3 5 and you want the Slot to run on GPU 3, you provide deviceNum=1. + */ + explicit GPUEngineTrain(Ptr options, size_t deviceNum); + + ~GPUEngineTrain(); +}; + +/* A model loaded on the GPU that can be overwritten from CPU or GPU. */ +class GPULoadedModelTrain { + private: + Ptr engine_; + + std::vector parameters_; + std::vector> srcVocabs_; + Ptr trgVocab_; + + public: + GPULoadedModelTrain(Ptr gpu); + + ~GPULoadedModelTrain(); + + const std::vector> &SrcVocabs() const { return srcVocabs_; } + + Ptr TrgVocab() const { return trgVocab_; } + + // Overwrite this model with parameters from a different one. + void Load(const CPULoadedModelTrain &from); + void Load(const GPULoadedModelTrain &from); + + Histories Translate(const std::vector &input); +}; + +/* A model loaded on the CPU. */ +class CPULoadedModelTrain { + private: + std::vector parameters_; + std::vector> srcVocabs_; + Ptr trgVocab_; + + public: + // The parts of Options that relate to model and vocab are ignored. The files provided will be loaded. + CPULoadedModelTrain(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath); + + const std::vector &Parameters() const { return parameters_; } + + const std::vector> &SrcVocabs() const { return srcVocabs_; } + + Ptr TrgVocab() const { return trgVocab_; } +}; + + + +// ##### ^ above is stuff for runtime domain adaptation + + + + /* Execute on a particular device */ class GPUEngine { private: From 5b28f1f8a273a3098c2460f0bcff79cd267e11f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 19 Apr 2021 16:11:24 +0300 Subject: [PATCH 038/135] Implement training with swappable stuff --- src/translator/swappable.cpp | 93 +++++++++++++++++++++++++++++------- src/translator/swappable.h | 13 +++-- 2 files changed, 85 insertions(+), 21 deletions(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 803fb352e..b61779323 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -45,10 +45,18 @@ void GPUEngineTrain::SwapPointers(std::vector &with) { } } +void GPUEngineTrain::Initialize(Ptr batch) { + if (!initialized_) { + builder_->build(graph_, batch); + graph_->forward(); + initialized_ = true; + } +} + GPUEngineTrain::GPUEngineTrain(Ptr options, size_t deviceIdx) : options_(options), graph_(New()), myDeviceId_(LookupGPU(options, deviceIdx)), allocator_(myDeviceId_, 0, 128 * 1048576) { ABORT_IF(myDeviceId_.type == DeviceType::cpu, "Swappable slot only works for GPU devices."); - options_->set("inference", true); + options_->set("inference", false); options_->set("shuffle", "none"); // Create graph @@ -57,13 +65,14 @@ GPUEngineTrain::GPUEngineTrain(Ptr options, size_t deviceIdx) graph_->setDevice(myDeviceId_); graph_->reserveWorkspaceMB(options_->get("workspace")); - scorers_ = createScorers(options_); - for (auto scorer : scorers_) { - scorer->init(graph_); - // TODO lexical shortlists are not supported yet. - } - graph_->forward(); - // TODO: reach into graph_->params() private members and free the parameter memory. + builder_ = models::createCriterionFunctionFromOptions(options_, models::usage::training); + // scorers_ = createScorers(options_); + // for (auto scorer : scorers_) { + // scorer->init(graph_); + // // TODO lexical shortlists are not supported yet. + // } + // graph_->forward(); + // // TODO: reach into graph_->params() private members and free the parameter memory. } GPUEngineTrain::~GPUEngineTrain() {} @@ -99,23 +108,57 @@ void GPULoadedModelTrain::Load(const CPULoadedModelTrain &from) { } } -Histories GPULoadedModelTrain::Translate(const std::vector &input) { +void GPULoadedModelTrain::Train(const std::vector &input) { ABORT_IF(!trgVocab_, "GPULoadedModelTrain needs to be overwritten by a CPU model first."); - engine_->SwapPointers(parameters_); + // engine_->SwapPointers(parameters_); + + auto state = New(engine_->options_->get("learn-rate")); + auto scheduler = New(engine_->options_, state); + auto optimizer = Optimizer(engine_->options_); + scheduler->registerTrainingObserver(scheduler); + scheduler->registerTrainingObserver(optimizer); auto corpus = New(std::vector(1, MultilineInputHack(input)), srcVocabs_, engine_->options_); // @TODO dirty hack data::BatchGenerator batchGenerator(corpus, engine_->options_, nullptr, false); // @TODO if the asynchronous batch preparation = true, but we supply less text than the mini-batch size we crash - BeamSearch search(engine_->options_, engine_->scorers_, trgVocab_); - Histories ret; - ret.reserve(input.size()); - for (auto&& batch : batchGenerator) { - auto result = search.search(engine_->graph_, batch); - ret.insert(ret.end(), result.begin(), result.end()); + bool first = true; + scheduler->started(); + while(scheduler->keepGoing()) { + batchGenerator.prepare(); + + LOG(info, "## NEW BATCHES"); + for(auto&& batch : batchGenerator) { + if(!scheduler->keepGoing()) + break; + + LOG(info, "### NEW BATCH"); + if(first) { + // This is a bit awkward but for some reason + // ICriterionFunction::build, which Initialize invokes underneath, + // expects a batch. So, afaik, this is the first time where i can + // invoke build and, as a result i can call SwapPointers only + // afterwards. TODO: verify last claim. + engine_->Initialize(batch); + engine_->SwapPointers(parameters_); + first = false; + } + + // Make an update step on the copy of the model + auto lossNode = engine_->builder_->build(engine_->graph_, batch); + engine_->graph_->forward(); + StaticLoss loss = *lossNode; + engine_->graph_->backward(); + + // Notify optimizer and scheduler + optimizer->update(engine_->graph_, 1); + scheduler->update(loss, batch); + } + if(scheduler->keepGoing()) + scheduler->increaseEpoch(); } - std::sort(ret.begin(), ret.end(),[](marian::Ptr a, marian::Ptr b){return a->getLineNum() < b->getLineNum();}); + scheduler->finished(); + engine_->SwapPointers(parameters_); - return ret; } CPULoadedModelTrain::CPULoadedModelTrain(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath) @@ -212,6 +255,20 @@ void GPULoadedModel::Load(const GPULoadedModel &from) { } } +void GPULoadedModel::Load(const GPULoadedModelTrain &from) { + srcVocabs_ = from.srcVocabs_; + trgVocab_ = from.trgVocab_; + + ABORT_IF(engine_->myDeviceId_ != from.engine_->myDeviceId_, "TODO: copy across GPUs."); + + for(size_t i = 0; i < parameters_.size(); ++i) { + swapper::copyGpuToGpu(reinterpret_cast(parameters_[i]->data()), + reinterpret_cast(from.parameters_[i]->data()), + parameters_[i]->size(), + engine_->myDeviceId_); + } +} + void GPULoadedModel::Load(const CPULoadedModel &from) { srcVocabs_ = from.SrcVocabs(); trgVocab_ = from.TrgVocab(); diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 4b525c580..b6e53a6c7 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -5,9 +5,10 @@ * vocabularies must have the same size. To make vocabulary the same size, pad * using scripts/contrib/pad_model_vocabulary.py offline. */ -#include "marian.h" #include "common/io.h" #include "data/vocab.h" +#include "marian.h" +#include "training/scheduler.h" #include "translator/history.h" #include @@ -27,12 +28,15 @@ class CPULoadedModel; class GPUEngineTrain { private: friend class GPULoadedModelTrain; + friend class GPULoadedModel; Ptr options_; Ptr graph_; - std::vector > scorers_; + Ptr builder_; const DeviceId myDeviceId_; Allocator allocator_; + bool initialized_ = false; + void Initialize(Ptr batch); void SwapPointers(std::vector &with); public: @@ -49,6 +53,8 @@ class GPUEngineTrain { /* A model loaded on the GPU that can be overwritten from CPU or GPU. */ class GPULoadedModelTrain { private: + friend class GPULoadedModel; + Ptr engine_; std::vector parameters_; @@ -68,7 +74,7 @@ class GPULoadedModelTrain { void Load(const CPULoadedModelTrain &from); void Load(const GPULoadedModelTrain &from); - Histories Translate(const std::vector &input); + void Train(const std::vector &input); }; /* A model loaded on the CPU. */ @@ -140,6 +146,7 @@ class GPULoadedModel { // Overwrite this model with parameters from a different one. void Load(const CPULoadedModel &from); void Load(const GPULoadedModel &from); + void Load(const GPULoadedModelTrain &from); Histories Translate(const std::vector &input); }; From 98b1ad1b1a6ab63bb232627c819a138b42f8990c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 20 Apr 2021 13:36:36 +0300 Subject: [PATCH 039/135] Remove CPULoadedModelTrain in favor of just using CPULoadedModel --- src/translator/swappable.cpp | 33 +-------------------------------- src/translator/swappable.h | 20 +------------------- 2 files changed, 2 insertions(+), 51 deletions(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index b61779323..e135566f1 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -100,7 +100,7 @@ void GPULoadedModelTrain::Load(const GPULoadedModelTrain &from) { } } -void GPULoadedModelTrain::Load(const CPULoadedModelTrain &from) { +void GPULoadedModelTrain::Load(const CPULoadedModel &from) { srcVocabs_ = from.SrcVocabs(); trgVocab_ = from.TrgVocab(); for (size_t i = 0; i < parameters_.size(); ++i) { @@ -161,37 +161,6 @@ void GPULoadedModelTrain::Train(const std::vector &input) { engine_->SwapPointers(parameters_); } -CPULoadedModelTrain::CPULoadedModelTrain(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath) - : parameters_(io::loadItems(parameters)) { - // Load parameters. - // Find the special element and remove it: - size_t special_idx = 0; - for (size_t i = 0; i < parameters_.size(); i++) { - if (parameters_[i].name == "special:model.yml") { - special_idx = i; - break; - } - } - parameters_.erase(parameters_.begin() + special_idx); - // Prepare the name so that it matches the named map - for (auto&& item : parameters_) { - item.name = "F0::" + item.name; - } - // Sort by name to match params order. - std::sort(parameters_.begin(), parameters_.end(), [](const io::Item &a, const io::Item &b){return a.name < b.name;}); - - // Load source vocabs. - const std::vector &maxVocabs = options->get>("dim-vocabs"); - for(size_t i = 0; i < sourceVocabPaths.size(); ++i) { - Ptr vocab = New(options, i); - vocab->load(sourceVocabPaths[i], maxVocabs[i]); - srcVocabs_.emplace_back(vocab); - } - - // Load target vocab. - trgVocab_ = New(options, sourceVocabPaths.size()); - trgVocab_->load(targetVocabPath); -} diff --git a/src/translator/swappable.h b/src/translator/swappable.h index b6e53a6c7..3d62d98c5 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -16,7 +16,6 @@ namespace marian { class GPULoadedModelTrain; -class CPULoadedModelTrain; class Scorer; @@ -71,29 +70,12 @@ class GPULoadedModelTrain { Ptr TrgVocab() const { return trgVocab_; } // Overwrite this model with parameters from a different one. - void Load(const CPULoadedModelTrain &from); + void Load(const CPULoadedModel &from); void Load(const GPULoadedModelTrain &from); void Train(const std::vector &input); }; -/* A model loaded on the CPU. */ -class CPULoadedModelTrain { - private: - std::vector parameters_; - std::vector> srcVocabs_; - Ptr trgVocab_; - - public: - // The parts of Options that relate to model and vocab are ignored. The files provided will be loaded. - CPULoadedModelTrain(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath); - - const std::vector &Parameters() const { return parameters_; } - - const std::vector> &SrcVocabs() const { return srcVocabs_; } - - Ptr TrgVocab() const { return trgVocab_; } -}; From d14da1b6b13eb04b9ea7d92331ab5d342d1c125e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 20 Apr 2021 17:39:08 +0300 Subject: [PATCH 040/135] Adapt self_adaptive.h to use the swappable stuff Haven't tested it. Likelly broken --- src/translator/self_adaptive.h | 186 +++++++++------------------------ src/translator/swappable.cpp | 16 +++ src/translator/swappable.h | 1 + 3 files changed, 66 insertions(+), 137 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 08a16e0d9..12d3f233d 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -7,6 +7,7 @@ #include "models/model_task.h" #include "training/scheduler.h" #include "training/validator.h" +#include "translator/swappable.h" namespace marian { @@ -74,37 +75,14 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { auto deviceId = Config::getDevices(options_)[0]; - // Initialize model for training - graph_ = New(); - graph_->setDevice(deviceId); - graph_->reserveWorkspaceMB(options_->get("workspace")); - builder_ = models::createCriterionFunctionFromOptions(options_, models::usage::training); - - optimizer_ = Optimizer(options_); - - // Initialize model for translation - Ptr opts = New(); - opts->merge(options_); - opts->set("inference", true); - builderTrans_ = models::createModelFromOptions(opts, models::usage::translation); - - // Initialize a scorer for translation - // auto model = options_->get("model"); - model = options_->get("model"); - Ptr scorer = New(builderTrans_, "", 1.0f, model); - scorers_.push_back(scorer); - - // Read vocabularies + auto modelFilename = options_->get("model"); auto vocabPaths = options_->get>("vocabs"); - std::vector maxVocabs = options_->get>("dim-vocabs"); - for(size_t i = 0; i < vocabPaths.size(); ++i) { - Ptr vocab = New(options_, i); - vocab->load(vocabPaths[i], maxVocabs[i]); - vocabs_.emplace_back(vocab); - } - - // Load model - builder_->load(graph_, model); + std::vector srcVocabPaths(vocabPaths.begin(), vocabPaths.end() - 1); + cpuModel_ = New(options_, modelFilename, srcVocabPaths, vocabPaths.back()); + translateEngine_ = New(options_, deviceId.no); + translateSlot_ = New(translateEngine_); + trainEngine_ = New(options_, deviceId.no); + trainSlot_ = New(trainEngine_); } std::string run(const std::string& json) override { @@ -119,8 +97,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { // Get input sentences auto input = yaml["input"].as(); - std::vector> srcVocabs(vocabs_.begin(), vocabs_.end() - 1); - auto testSet = New(std::vector({input}), srcVocabs, optionsTrans_); + auto testSet = New(std::vector({input}), cpuModel_->SrcVocabs(), optionsTrans_); // Prepare batches auto testBatches = New>(testSet, optionsTrans_); @@ -128,7 +105,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { // Initialize output printing auto collector = New(); - auto printer = New(options_, vocabs_.back()); + auto printer = New(options_, cpuModel_->TrgVocab()); // Get training sentences std::vector> contexts; @@ -140,11 +117,18 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { size_t id = 0; for(auto testBatch : *testBatches) { if(contexts.size() > id && !contexts[id].empty()) { - train(contexts[id]); - translate(testBatch, collector, printer, graphAdapt_); + trainSlot_->Load(*cpuModel_); + trainSlot_->Train(contexts[id]); + translateSlot_->Load(*trainSlot_); + translate(testBatch, collector, printer); + needsSwitching_ = true; } else { LOG(info, "No context provided for sentence {}", id); - translate(testBatch, collector, printer, graph_); + if(needsSwitching_) { + translateSlot_->Load(*cpuModel_); + needsSwitching_ = false; + } + translate(testBatch, collector, printer); } // iterating by 1 is quite safe because the mini-batch size for @@ -161,8 +145,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { void run() override { // Initialize input data auto srcPaths = options_->get>("input"); - std::vector> srcVocabs(vocabs_.begin(), vocabs_.end() - 1); - auto testSet = New(srcPaths, srcVocabs, optionsTrans_); + auto testSet = New(srcPaths, cpuModel_->SrcVocabs(), optionsTrans_); // Prepare batches auto testBatches = New>(testSet, optionsTrans_); @@ -172,7 +155,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { auto collector = New(options_->get("output")); if(options_->get("quiet-translation")) collector->setPrintingStrategy(New()); - auto printer = New(options_, vocabs_.back()); + auto printer = New(options_, cpuModel_->SrcVocabs().back()); // Initialize train data auto trainPaths = options_->get>("train-sets"); @@ -185,11 +168,18 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { if(!trainSet.empty()) { LOG(info, "# NEW TEST BATCH"); - train(trainSet); - translate(testBatch, collector, printer, graphAdapt_); + trainSlot_->Load(*cpuModel_); + trainSlot_->Train(trainSet); + translateSlot_->Load(*trainSlot_); + translate(testBatch, collector, printer); + needsSwitching_ = true; } else { LOG(info, "# EMPTY TEST BATCH"); - translate(testBatch, collector, printer, graph_); + if (needsSwitching_) { + translateSlot_->Load(*cpuModel_); + needsSwitching_ = false; + } + translate(testBatch, collector, printer); } } } @@ -197,105 +187,27 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { private: Ptr options_; // Options for training Ptr optionsTrans_; // Options for translator - - Ptr builder_; // Training model - Ptr secondBuilder_; // To not get a segfault when training model else could just use builder_ - Ptr builderTrans_; // Translation model - Ptr graph_; // A graph with original parameters - Ptr graphAdapt_; // A graph on which training is performed - std::string model; - - std::vector> vocabs_; - std::vector> scorers_; - Ptr optimizer_; - - void train(std::vector trainSents) { - auto state = New(options_->get("learn-rate")); - auto scheduler = New(options_, state); - scheduler->registerTrainingObserver(scheduler); - scheduler->registerTrainingObserver(optimizer_); - - auto trainSet = New(trainSents, vocabs_, options_); - auto trainBatches = New>(trainSet, options_); - - bool first = true; - - scheduler->started(); - while(scheduler->keepGoing()) { - trainBatches->prepare(); - - LOG(info, "## NEW BATCHES"); - for(auto batch : *trainBatches) { - if(!scheduler->keepGoing()) - break; - - LOG(info, "### NEW BATCH"); - // Copy params from the original model - if(first) { - secondBuilder_ - = models::createCriterionFunctionFromOptions(options_, models::usage::training); - // secondBuilder->load(graph_, model); - - // builder_->build(graph_, batch); - // // TODO: Why do we need to do a froward pass here? - // graph_->forward(); - - graphAdapt_ = New(); - // graphAdapt_->setDevice(graph_->getDeviceId()); - auto deviceId = Config::getDevices(options_)[0]; - graphAdapt_->setDevice(deviceId); - // graphAdapt_->reuseWorkspace(graph_); - graphAdapt_->reserveWorkspaceMB(options_->get("workspace")); - - // TODO: why aren't we using a builder before this? - // it's probably because the order doesn't matter and the - // builder is used below - // graphAdapt_->copyParams(graph_); - secondBuilder_->load(graphAdapt_, model); - first = false; - } - - // Make an update step on the copy of the model - auto lossNode = secondBuilder_->build(graphAdapt_, batch); - graphAdapt_->forward(); - StaticLoss loss = *lossNode; - graphAdapt_->backward(); - - // Notify optimizer and scheduler - optimizer_->update(graphAdapt_, 1); - scheduler->update(loss, batch); - } - if(scheduler->keepGoing()) - scheduler->increaseEpoch(); - } - scheduler->finished(); - } + Ptr cpuModel_; + Ptr trainSlot_; + Ptr translateSlot_; + Ptr trainEngine_; + Ptr translateEngine_; + bool needsSwitching_ = true; void translate(Ptr batch, Ptr collector, - Ptr printer, - Ptr graph) { - graph->setInference(true); - graph->clear(); - - { - auto search = New(options_, - scorers_, - vocabs_.back()); - auto histories = search->search(graph, batch); - - for(auto history : histories) { - std::stringstream best1; - std::stringstream bestn; - printer->print(history, best1, bestn); - collector->Write(history->getLineNum(), - best1.str(), - bestn.str(), - options_->get("n-best")); - } + Ptr printer) { + auto histories = translateSlot_->Translate(batch); + + for(auto history : histories) { + std::stringstream best1; + std::stringstream bestn; + printer->print(history, best1, bestn); + collector->Write(history->getLineNum(), + best1.str(), + bestn.str(), + options_->get("n-best")); } - - graph->setInference(false); } }; } diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index e135566f1..30760e67e 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -265,6 +265,22 @@ Histories GPULoadedModel::Translate(const std::vector &input) { return ret; } +Histories GPULoadedModel::Translate(const Ptr batch) { + ABORT_IF(!trgVocab_, "GPULoadedModel needs to be overwritten by a CPU model first."); + engine_->SwapPointers(parameters_); + + BeamSearch search(engine_->options_, engine_->scorers_, trgVocab_); + Histories ret; + ret.reserve(batch->size()); // TODO: input.size() was here previously, this is likely wrong + + auto result = search.search(engine_->graph_, batch); + ret.insert(ret.end(), result.begin(), result.end()); + + std::sort(ret.begin(), ret.end(),[](marian::Ptr a, marian::Ptr b){return a->getLineNum() < b->getLineNum();}); + engine_->SwapPointers(parameters_); + return ret; +} + CPULoadedModel::CPULoadedModel(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath) : parameters_(io::loadItems(parameters)) { // Load parameters. diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 3d62d98c5..c018908f0 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -131,6 +131,7 @@ class GPULoadedModel { void Load(const GPULoadedModelTrain &from); Histories Translate(const std::vector &input); + Histories Translate(const Ptr batch); }; /* A model loaded on the CPU. */ From 07658fb7221f3d15dd3f2571455642897668fc1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 21 Apr 2021 15:45:16 +0300 Subject: [PATCH 041/135] Fix some runtime issues related to configuration --- src/translator/self_adaptive.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 12d3f233d..bbdda2c84 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -76,12 +76,13 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { auto deviceId = Config::getDevices(options_)[0]; auto modelFilename = options_->get("model"); + options_->set>("models", {modelFilename}); auto vocabPaths = options_->get>("vocabs"); std::vector srcVocabPaths(vocabPaths.begin(), vocabPaths.end() - 1); cpuModel_ = New(options_, modelFilename, srcVocabPaths, vocabPaths.back()); - translateEngine_ = New(options_, deviceId.no); + translateEngine_ = New(options_, 0); translateSlot_ = New(translateEngine_); - trainEngine_ = New(options_, deviceId.no); + trainEngine_ = New(options_, 0); trainSlot_ = New(trainEngine_); } From 06ee187604753c4283140c250a7ed07b01bf180d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Sat, 8 May 2021 16:19:58 +0300 Subject: [PATCH 042/135] Fix issues woth vocab initialization and memory allocation --- src/translator/self_adaptive.h | 10 +++++++--- src/translator/swappable.cpp | 23 ++++++++++++++++++++--- src/translator/swappable.h | 1 + 3 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index bbdda2c84..e10920a62 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -76,14 +76,18 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { auto deviceId = Config::getDevices(options_)[0]; auto modelFilename = options_->get("model"); - options_->set>("models", {modelFilename}); + optionsTrans_->set>("models", {modelFilename}); + auto vocabPaths = options_->get>("vocabs"); std::vector srcVocabPaths(vocabPaths.begin(), vocabPaths.end() - 1); + // TODO: or use optionsTrans_ here? cpuModel_ is used by both, trainin and translation, code + // so i don't yet know what's the correct approach cpuModel_ = New(options_, modelFilename, srcVocabPaths, vocabPaths.back()); - translateEngine_ = New(options_, 0); + translateEngine_ = New(optionsTrans_, 0); translateSlot_ = New(translateEngine_); trainEngine_ = New(options_, 0); trainSlot_ = New(trainEngine_); + trainSlot_->AllocateParamsLike(*cpuModel_); } std::string run(const std::string& json) override { @@ -106,7 +110,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { // Initialize output printing auto collector = New(); - auto printer = New(options_, cpuModel_->TrgVocab()); + auto printer = New(optionsTrans_, cpuModel_->TrgVocab()); // Get training sentences std::vector> contexts; diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 30760e67e..95e1073c6 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -78,8 +78,15 @@ GPUEngineTrain::GPUEngineTrain(Ptr options, size_t deviceIdx) GPUEngineTrain::~GPUEngineTrain() {} GPULoadedModelTrain::GPULoadedModelTrain(Ptr gpu) : engine_(gpu) { - for (auto ¶m : *engine_->graph_->params()) { - parameters_.push_back(engine_->allocator_.alloc(param->val()->memory()->size())); + // NOTE: engine_ must contain an initialized graph already at this point + // for (auto ¶m : *engine_->graph_->params()) { + // parameters_.push_back(engine_->allocator_.alloc(param->val()->memory()->size())); + // } +} + +void GPULoadedModelTrain::AllocateParamsLike(const CPULoadedModel &from) { + for (auto ¶m : from.Parameters()) { + parameters_.push_back(engine_->allocator_.alloc(param.size())); } } @@ -118,7 +125,17 @@ void GPULoadedModelTrain::Train(const std::vector &input) { scheduler->registerTrainingObserver(scheduler); scheduler->registerTrainingObserver(optimizer); - auto corpus = New(std::vector(1, MultilineInputHack(input)), srcVocabs_, engine_->options_); // @TODO dirty hack + // LOG(info, "GAAAH: vocabs is {}", srcVocabs_); + for (auto vocab: srcVocabs_) { + LOG(info, "GAAAH: single vocab is {}", vocab); + } + + std::vector> allVocabs; + allVocabs.reserve(srcVocabs_.size() + 1); + allVocabs.insert(allVocabs.end(), srcVocabs_.begin(), srcVocabs_.end()); + allVocabs.emplace_back(trgVocab_); + auto corpus = New(input, allVocabs, engine_->options_); // @TODO dirty hack + // auto corpus = New(input, srcVocabs_, engine_->options_); // @TODO dirty hack data::BatchGenerator batchGenerator(corpus, engine_->options_, nullptr, false); // @TODO if the asynchronous batch preparation = true, but we supply less text than the mini-batch size we crash bool first = true; diff --git a/src/translator/swappable.h b/src/translator/swappable.h index c018908f0..3615f3080 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -72,6 +72,7 @@ class GPULoadedModelTrain { // Overwrite this model with parameters from a different one. void Load(const CPULoadedModel &from); void Load(const GPULoadedModelTrain &from); + void AllocateParamsLike(const CPULoadedModel &from); void Train(const std::vector &input); }; From c4ff8b9496ede8408737a29b74573c898820844c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 14 May 2021 10:34:19 +0300 Subject: [PATCH 043/135] Initialize the ExpressionGraph for translation with inference=true --- src/translator/swappable.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 95e1073c6..86eb4a74e 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -196,7 +196,7 @@ void GPUEngine::SwapPointers(std::vector &with) { } GPUEngine::GPUEngine(Ptr options, size_t deviceIdx) - : options_(options), graph_(New()), myDeviceId_(LookupGPU(options, deviceIdx)), allocator_(myDeviceId_, 0, 128 * 1048576) { + : options_(options), graph_(New(true)), myDeviceId_(LookupGPU(options, deviceIdx)), allocator_(myDeviceId_, 0, 128 * 1048576) { ABORT_IF(myDeviceId_.type == DeviceType::cpu, "Swappable slot only works for GPU devices."); options_->set("inference", true); options_->set("shuffle", "none"); From 16ec013111671f1dd9461f0f8f1159f8d2e7c5f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 14 May 2021 16:02:21 +0300 Subject: [PATCH 044/135] Seek to beginning of the istringstream when resetting text input This solves an issue where a BatchGenerator cannot be initialized with a TextInput because iterating over batches would then exhaust the TextInput and it wouldn't reset upon BatchGenerator::prepare. --- src/data/text_input.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/data/text_input.h b/src/data/text_input.h index b08a4fdcc..5ea0b45e9 100644 --- a/src/data/text_input.h +++ b/src/data/text_input.h @@ -45,7 +45,12 @@ class TextInput : public DatasetBase { Sample next() override; void shuffle() override {} - void reset() override {} + void reset() override { + for (auto& file : files_) { + file->clear(); + file->seekg(0); + } + } iterator begin() override { return iterator(*this); } iterator end() override { return iterator(); } From a220a2b5d5c4bff45a21fd5b4574f7dd8a37757e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 19 May 2021 11:30:02 +0300 Subject: [PATCH 045/135] When translating, directly use the trained parameters instead of loading them --- src/translator/self_adaptive.h | 4 ++-- src/translator/swappable.cpp | 13 +++---------- src/translator/swappable.h | 2 +- 3 files changed, 6 insertions(+), 13 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index e10920a62..18e3a18dc 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -124,7 +124,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { if(contexts.size() > id && !contexts[id].empty()) { trainSlot_->Load(*cpuModel_); trainSlot_->Train(contexts[id]); - translateSlot_->Load(*trainSlot_); + translateSlot_->PointToParams(*trainSlot_); translate(testBatch, collector, printer); needsSwitching_ = true; } else { @@ -175,7 +175,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { LOG(info, "# NEW TEST BATCH"); trainSlot_->Load(*cpuModel_); trainSlot_->Train(trainSet); - translateSlot_->Load(*trainSlot_); + translateSlot_->PointToParams(*trainSlot_); translate(testBatch, collector, printer); needsSwitching_ = true; } else { diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 86eb4a74e..4afe266b3 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -241,18 +241,11 @@ void GPULoadedModel::Load(const GPULoadedModel &from) { } } -void GPULoadedModel::Load(const GPULoadedModelTrain &from) { +void GPULoadedModel::PointToParams(const GPULoadedModelTrain &from) { + ABORT_IF(engine_->myDeviceId_ != from.engine_->myDeviceId_, "TODO: copy across GPUs."); srcVocabs_ = from.srcVocabs_; trgVocab_ = from.trgVocab_; - - ABORT_IF(engine_->myDeviceId_ != from.engine_->myDeviceId_, "TODO: copy across GPUs."); - - for(size_t i = 0; i < parameters_.size(); ++i) { - swapper::copyGpuToGpu(reinterpret_cast(parameters_[i]->data()), - reinterpret_cast(from.parameters_[i]->data()), - parameters_[i]->size(), - engine_->myDeviceId_); - } + parameters_ = from.parameters_; } void GPULoadedModel::Load(const CPULoadedModel &from) { diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 3615f3080..d0bee4667 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -129,7 +129,7 @@ class GPULoadedModel { // Overwrite this model with parameters from a different one. void Load(const CPULoadedModel &from); void Load(const GPULoadedModel &from); - void Load(const GPULoadedModelTrain &from); + void PointToParams(const GPULoadedModelTrain &from); Histories Translate(const std::vector &input); Histories Translate(const Ptr batch); From 4f67aabf26b12435759f6aa69092d3b4a31ddb97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 19 May 2021 11:31:21 +0300 Subject: [PATCH 046/135] Ensure that SwapPointers is called an even number of times --- src/translator/swappable.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 4afe266b3..ee8ef767a 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -175,7 +175,9 @@ void GPULoadedModelTrain::Train(const std::vector &input) { } scheduler->finished(); - engine_->SwapPointers(parameters_); + if(!first) { + engine_->SwapPointers(parameters_); + } } From ea1380d8abdf3c5ec7f620fd71fdc735285a6c0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 26 May 2021 14:59:52 +0300 Subject: [PATCH 047/135] Get some params from the gpu memory for debugging --- src/translator/self_adaptive.h | 2 +- src/translator/swappable.cpp | 36 ++++++++++++++++++++++++++++++++++ src/translator/swappable.h | 1 + 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 18e3a18dc..b276b9d69 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -175,7 +175,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { LOG(info, "# NEW TEST BATCH"); trainSlot_->Load(*cpuModel_); trainSlot_->Train(trainSet); - translateSlot_->PointToParams(*trainSlot_); + translateSlot_->Load(*trainSlot_); translate(testBatch, collector, printer); needsSwitching_ = true; } else { diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index ee8ef767a..7ae584987 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -53,6 +53,11 @@ void GPUEngineTrain::Initialize(Ptr batch) { } } +void get(std::vector &v, MemoryPiece::PtrType mem, Ptr backend) { + v.resize(mem->size()); + gpu::copy(backend, mem->data(), mem->data() + mem->size(), v.data()); +} + GPUEngineTrain::GPUEngineTrain(Ptr options, size_t deviceIdx) : options_(options), graph_(New()), myDeviceId_(LookupGPU(options, deviceIdx)), allocator_(myDeviceId_, 0, 128 * 1048576) { ABORT_IF(myDeviceId_.type == DeviceType::cpu, "Swappable slot only works for GPU devices."); @@ -118,6 +123,8 @@ void GPULoadedModelTrain::Load(const CPULoadedModel &from) { void GPULoadedModelTrain::Train(const std::vector &input) { ABORT_IF(!trgVocab_, "GPULoadedModelTrain needs to be overwritten by a CPU model first."); // engine_->SwapPointers(parameters_); + std::vector outvec; + get(outvec, parameters_[0], engine_->graph_->getBackend()); auto state = New(engine_->options_->get("learn-rate")); auto scheduler = New(engine_->options_, state); @@ -156,12 +163,16 @@ void GPULoadedModelTrain::Train(const std::vector &input) { // invoke build and, as a result i can call SwapPointers only // afterwards. TODO: verify last claim. engine_->Initialize(batch); + std::vector outvec; + get(outvec, parameters_[0], engine_->graph_->getBackend()); engine_->SwapPointers(parameters_); + get(outvec, parameters_[0], engine_->graph_->getBackend()); first = false; } // Make an update step on the copy of the model auto lossNode = engine_->builder_->build(engine_->graph_, batch); + // LOG(info, "Before: {}", engine_->graph_->params()->vals()->debug()); engine_->graph_->forward(); StaticLoss loss = *lossNode; engine_->graph_->backward(); @@ -169,6 +180,7 @@ void GPULoadedModelTrain::Train(const std::vector &input) { // Notify optimizer and scheduler optimizer->update(engine_->graph_, 1); scheduler->update(loss, batch); + // LOG(info, "After: {}", engine_->graph_->params()->vals()->debug()); } if(scheduler->keepGoing()) scheduler->increaseEpoch(); @@ -176,7 +188,12 @@ void GPULoadedModelTrain::Train(const std::vector &input) { scheduler->finished(); if(!first) { + std::vector outvec; + get(outvec, parameters_[0], engine_->graph_->getBackend()); engine_->SwapPointers(parameters_); + get(outvec, parameters_[0], engine_->graph_->getBackend()); + // does nothing, need a place for a breakpoint + first = false; } } @@ -243,6 +260,20 @@ void GPULoadedModel::Load(const GPULoadedModel &from) { } } +void GPULoadedModel::Load(const GPULoadedModelTrain &from) { + srcVocabs_ = from.srcVocabs_; + trgVocab_ = from.trgVocab_; + + ABORT_IF(engine_->myDeviceId_ != from.engine_->myDeviceId_, "TODO: copy across GPUs."); + + for(size_t i = 0; i < parameters_.size(); ++i) { + swapper::copyGpuToGpu(reinterpret_cast(parameters_[i]->data()), + reinterpret_cast(from.parameters_[i]->data()), + parameters_[i]->size(), + engine_->myDeviceId_); + } +} + void GPULoadedModel::PointToParams(const GPULoadedModelTrain &from) { ABORT_IF(engine_->myDeviceId_ != from.engine_->myDeviceId_, "TODO: copy across GPUs."); srcVocabs_ = from.srcVocabs_; @@ -279,7 +310,10 @@ Histories GPULoadedModel::Translate(const std::vector &input) { Histories GPULoadedModel::Translate(const Ptr batch) { ABORT_IF(!trgVocab_, "GPULoadedModel needs to be overwritten by a CPU model first."); + std::vector outvec; + get(outvec, parameters_[0], engine_->graph_->getBackend()); engine_->SwapPointers(parameters_); + // LOG(info, "Before translation: {}", engine_->graph_->params()->vals()->debug()); BeamSearch search(engine_->options_, engine_->scorers_, trgVocab_); Histories ret; @@ -289,6 +323,8 @@ Histories GPULoadedModel::Translate(const Ptr batch) { ret.insert(ret.end(), result.begin(), result.end()); std::sort(ret.begin(), ret.end(),[](marian::Ptr a, marian::Ptr b){return a->getLineNum() < b->getLineNum();}); + + // LOG(info, "After translation: {}", engine_->graph_->params()->vals()->debug()); engine_->SwapPointers(parameters_); return ret; } diff --git a/src/translator/swappable.h b/src/translator/swappable.h index d0bee4667..370a2858c 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -129,6 +129,7 @@ class GPULoadedModel { // Overwrite this model with parameters from a different one. void Load(const CPULoadedModel &from); void Load(const GPULoadedModel &from); + void Load(const GPULoadedModelTrain &from); void PointToParams(const GPULoadedModelTrain &from); Histories Translate(const std::vector &input); From dda5995ff1f43bd4d569705515f20a5922ca2359 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 16 Jun 2021 13:22:56 +0300 Subject: [PATCH 048/135] Retrieve some debugging information in SwapPointers --- src/translator/swappable.cpp | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 7ae584987..234fe28f7 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -37,12 +37,33 @@ namespace { } } // namespace -void GPUEngineTrain::SwapPointers(std::vector &with) { +void get(std::vector &v, MemoryPiece::PtrType mem, Ptr backend) { + v.resize(mem->size()); + gpu::copy(backend, mem->data(), mem->data() + mem->size(), v.data()); +} + +void GPUEngineTrain::SwapPointers( + std::vector &with /*, std::vector &with_names*/) { auto write_it = graph_->params()->begin(); auto read_it = with.begin(); - for (; read_it != with.end(); ++write_it, ++read_it) { + // auto read_it_names = with_names.begin(); + bool first = true; + std::vector outvec; + for(; read_it != with.end(); ++write_it, ++read_it /*, ++read_it_names*/ ) { + if (first){ + get(outvec, (*write_it)->val()->memory(), graph_->getBackend()); + get(outvec, *read_it, graph_->getBackend()); + } std::swap(*(*write_it)->val()->memory(), **read_it); + // *graph_->params()->get(*read_it_names)->val()->memory() = std::move(**read_it); + // assign(*graph_->params()->get(*read_it_names)->val()->memory(), **read_it); + if(first) { + get(outvec, (*write_it)->val()->memory(), graph_->getBackend()); + get(outvec, *read_it, graph_->getBackend()); + first = false; + } } + // graph_->params()->init(graph_->getBackend(), graph_->getDeviceId()); } void GPUEngineTrain::Initialize(Ptr batch) { @@ -53,11 +74,6 @@ void GPUEngineTrain::Initialize(Ptr batch) { } } -void get(std::vector &v, MemoryPiece::PtrType mem, Ptr backend) { - v.resize(mem->size()); - gpu::copy(backend, mem->data(), mem->data() + mem->size(), v.data()); -} - GPUEngineTrain::GPUEngineTrain(Ptr options, size_t deviceIdx) : options_(options), graph_(New()), myDeviceId_(LookupGPU(options, deviceIdx)), allocator_(myDeviceId_, 0, 128 * 1048576) { ABORT_IF(myDeviceId_.type == DeviceType::cpu, "Swappable slot only works for GPU devices."); From 4e743bf6983e3a2792461ef3246879d5c171d11c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 30 Jun 2021 16:16:24 +0300 Subject: [PATCH 049/135] Attempt to load the io::Items representing parameters directly into the training graph --- src/graph/expression_graph.h | 2 +- src/graph/parameters.h | 11 ++++ src/translator/self_adaptive.h | 9 ++-- src/translator/swappable.cpp | 92 +++++++++++++++++++--------------- src/translator/swappable.h | 10 ++-- 5 files changed, 74 insertions(+), 50 deletions(-) diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h index 2fa28f67b..75d89a82b 100644 --- a/src/graph/expression_graph.h +++ b/src/graph/expression_graph.h @@ -743,7 +743,7 @@ class ExpressionGraph : public std::enable_shared_from_this { public: /** Load model (mainly parameter objects) from array of io::Items */ - void load(std::vector& ioItems, bool markReloaded = true) { + void load(const std::vector& ioItems, bool markReloaded = true) { setReloaded(false); for(auto& item : ioItems) { std::string pName = item.name; diff --git a/src/graph/parameters.h b/src/graph/parameters.h index d5ede0b4e..40b311b7c 100644 --- a/src/graph/parameters.h +++ b/src/graph/parameters.h @@ -45,6 +45,17 @@ class Parameters { LOG(debug, "Destroyed parameter object of type {}", acceptedElementType_); } + std::vector toMemoryPieces() { + std::vector res(params_.size()); + auto read_it = begin(); + int i = 0; + for(; read_it != end(); ++read_it) { + i++; + res.push_back((*read_it)->val()->memory()); + } + return res; + } + auto begin() -> decltype(params_.begin()) { return params_.begin(); } auto end() -> decltype(params_.begin()) { return params_.end(); } diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index b276b9d69..8bb6f2577 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -87,7 +87,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { translateSlot_ = New(translateEngine_); trainEngine_ = New(options_, 0); trainSlot_ = New(trainEngine_); - trainSlot_->AllocateParamsLike(*cpuModel_); + // trainSlot_->AllocateParamsLike(*cpuModel_); } std::string run(const std::string& json) override { @@ -122,7 +122,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { size_t id = 0; for(auto testBatch : *testBatches) { if(contexts.size() > id && !contexts[id].empty()) { - trainSlot_->Load(*cpuModel_); + trainSlot_->Load(cpuModel_); trainSlot_->Train(contexts[id]); translateSlot_->PointToParams(*trainSlot_); translate(testBatch, collector, printer); @@ -173,9 +173,10 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { if(!trainSet.empty()) { LOG(info, "# NEW TEST BATCH"); - trainSlot_->Load(*cpuModel_); + trainSlot_->Load(cpuModel_); trainSlot_->Train(trainSet); - translateSlot_->Load(*trainSlot_); + // translateSlot_->Load(*trainSlot_); + translateSlot_->PointToParams(*trainSlot_); translate(testBatch, collector, printer); needsSwitching_ = true; } else { diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 234fe28f7..fb3f07cb0 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -105,42 +105,48 @@ GPULoadedModelTrain::GPULoadedModelTrain(Ptr gpu) : engine_(gpu) // } } -void GPULoadedModelTrain::AllocateParamsLike(const CPULoadedModel &from) { - for (auto ¶m : from.Parameters()) { - parameters_.push_back(engine_->allocator_.alloc(param.size())); - } -} +// void GPULoadedModelTrain::AllocateParamsLike(const CPULoadedModel &from) { +// for (auto ¶m : from.Parameters()) { +// parameters_.push_back(engine_->allocator_.alloc(param.size())); +// } +// } GPULoadedModelTrain::~GPULoadedModelTrain() { - for (MemoryPiece::PtrType &p : parameters_) { - engine_->allocator_.free(p); - } + // for (MemoryPiece::PtrType &p : parameters_) { + // engine_->allocator_.free(p); + // } } -void GPULoadedModelTrain::Load(const GPULoadedModelTrain &from) { - srcVocabs_ = from.srcVocabs_; - trgVocab_ = from.trgVocab_; +// void GPULoadedModelTrain::Load(const GPULoadedModelTrain &from) { +// srcVocabs_ = from.srcVocabs_; +// trgVocab_ = from.trgVocab_; - ABORT_IF(engine_ != from.engine_, "TODO: copy across GPUs."); +// ABORT_IF(engine_ != from.engine_, "TODO: copy across GPUs."); - for (size_t i = 0; i < parameters_.size(); ++i) { - swapper::copyGpuToGpu(reinterpret_cast(parameters_[i]->data()), reinterpret_cast(from.parameters_[i]->data()), parameters_[i]->size(), engine_->myDeviceId_); - } -} +// for (size_t i = 0; i < parameters_.size(); ++i) { +// swapper::copyGpuToGpu(reinterpret_cast(parameters_[i]->data()), reinterpret_cast(from.parameters_[i]->data()), parameters_[i]->size(), engine_->myDeviceId_); +// } +// } -void GPULoadedModelTrain::Load(const CPULoadedModel &from) { - srcVocabs_ = from.SrcVocabs(); - trgVocab_ = from.TrgVocab(); - for (size_t i = 0; i < parameters_.size(); ++i) { - swapper::copyCpuToGpu(reinterpret_cast(parameters_[i]->data()), from.Parameters()[i].data(), from.Parameters()[i].size(), engine_->myDeviceId_); - } +void GPULoadedModelTrain::Load(Ptr from) { + srcVocabs_ = from->SrcVocabs(); + trgVocab_ = from->TrgVocab(); + cpuModel_ = from; } +// void GPULoadedModelTrain::Load(const CPULoadedModel &from) { +// srcVocabs_ = from.SrcVocabs(); +// trgVocab_ = from.TrgVocab(); +// for (size_t i = 0; i < parameters_.size(); ++i) { +// swapper::copyCpuToGpu(reinterpret_cast(parameters_[i]->data()), from.Parameters()[i].data(), from.Parameters()[i].size(), engine_->myDeviceId_); +// } +// } + void GPULoadedModelTrain::Train(const std::vector &input) { ABORT_IF(!trgVocab_, "GPULoadedModelTrain needs to be overwritten by a CPU model first."); // engine_->SwapPointers(parameters_); std::vector outvec; - get(outvec, parameters_[0], engine_->graph_->getBackend()); + // get(outvec, parameters_[0], engine_->graph_->getBackend()); auto state = New(engine_->options_->get("learn-rate")); auto scheduler = New(engine_->options_, state); @@ -180,9 +186,10 @@ void GPULoadedModelTrain::Train(const std::vector &input) { // afterwards. TODO: verify last claim. engine_->Initialize(batch); std::vector outvec; - get(outvec, parameters_[0], engine_->graph_->getBackend()); - engine_->SwapPointers(parameters_); - get(outvec, parameters_[0], engine_->graph_->getBackend()); + // get(outvec, parameters_[0], engine_->graph_->getBackend()); + // engine_->SwapPointers(parameters_); + engine_->graph_->load(cpuModel_->Parameters(), false); + // get(outvec, parameters_[0], engine_->graph_->getBackend()); first = false; } @@ -193,6 +200,8 @@ void GPULoadedModelTrain::Train(const std::vector &input) { StaticLoss loss = *lossNode; engine_->graph_->backward(); + // auto out = engine_->graph_->params()->toMemoryPieces(); + // Notify optimizer and scheduler optimizer->update(engine_->graph_, 1); scheduler->update(loss, batch); @@ -205,9 +214,9 @@ void GPULoadedModelTrain::Train(const std::vector &input) { if(!first) { std::vector outvec; - get(outvec, parameters_[0], engine_->graph_->getBackend()); - engine_->SwapPointers(parameters_); - get(outvec, parameters_[0], engine_->graph_->getBackend()); + // get(outvec, parameters_[0], engine_->graph_->getBackend()); + // engine_->SwapPointers(parameters_); + // get(outvec, parameters_[0], engine_->graph_->getBackend()); // does nothing, need a place for a breakpoint first = false; } @@ -276,25 +285,26 @@ void GPULoadedModel::Load(const GPULoadedModel &from) { } } -void GPULoadedModel::Load(const GPULoadedModelTrain &from) { - srcVocabs_ = from.srcVocabs_; - trgVocab_ = from.trgVocab_; +// void GPULoadedModel::Load(const GPULoadedModelTrain &from) { +// srcVocabs_ = from.srcVocabs_; +// trgVocab_ = from.trgVocab_; - ABORT_IF(engine_->myDeviceId_ != from.engine_->myDeviceId_, "TODO: copy across GPUs."); +// ABORT_IF(engine_->myDeviceId_ != from.engine_->myDeviceId_, "TODO: copy across GPUs."); - for(size_t i = 0; i < parameters_.size(); ++i) { - swapper::copyGpuToGpu(reinterpret_cast(parameters_[i]->data()), - reinterpret_cast(from.parameters_[i]->data()), - parameters_[i]->size(), - engine_->myDeviceId_); - } -} +// for(size_t i = 0; i < parameters_.size(); ++i) { +// swapper::copyGpuToGpu(reinterpret_cast(parameters_[i]->data()), +// reinterpret_cast(from.parameters_[i]->data()), +// parameters_[i]->size(), +// engine_->myDeviceId_); +// } +// } void GPULoadedModel::PointToParams(const GPULoadedModelTrain &from) { ABORT_IF(engine_->myDeviceId_ != from.engine_->myDeviceId_, "TODO: copy across GPUs."); srcVocabs_ = from.srcVocabs_; trgVocab_ = from.trgVocab_; - parameters_ = from.parameters_; + // TODO: this might be wrong and could be droped in favor of using SwapPointers + parameters_ = from.engine_->graph_->params()->toMemoryPieces(); } void GPULoadedModel::Load(const CPULoadedModel &from) { diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 370a2858c..ca0ba0caa 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -56,7 +56,8 @@ class GPULoadedModelTrain { Ptr engine_; - std::vector parameters_; + // std::vector parameters_; + Ptr cpuModel_; std::vector> srcVocabs_; Ptr trgVocab_; @@ -70,9 +71,10 @@ class GPULoadedModelTrain { Ptr TrgVocab() const { return trgVocab_; } // Overwrite this model with parameters from a different one. - void Load(const CPULoadedModel &from); - void Load(const GPULoadedModelTrain &from); - void AllocateParamsLike(const CPULoadedModel &from); + // void Load(const CPULoadedModel &from); + void Load(Ptr from); + // void Load(const GPULoadedModelTrain &from); + // void AllocateParamsLike(const CPULoadedModel &from); void Train(const std::vector &input); }; From 9e898b07dd88db9a83a409e42f8338c74c4d4697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 14 Jul 2021 16:15:12 +0300 Subject: [PATCH 050/135] Only reserve memory not fill it with values when initializing the memory piece vector --- src/graph/parameters.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/graph/parameters.h b/src/graph/parameters.h index 40b311b7c..8dd579af1 100644 --- a/src/graph/parameters.h +++ b/src/graph/parameters.h @@ -46,7 +46,8 @@ class Parameters { } std::vector toMemoryPieces() { - std::vector res(params_.size()); + std::vector res; + res.reserve(params_.size()); auto read_it = begin(); int i = 0; for(; read_it != end(); ++read_it) { From e7d339b4d58b2ab3543f2b08b2279e98a1ea8f6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 19 Jul 2021 13:50:15 +0300 Subject: [PATCH 051/135] Load params before building the graph, drop the F0:: prefix, clear params celarParams() seems to be unnecessary here, though. Left it in because didn't want to recompile and test if working again. This approach still doesn't work though, btw. Managed to fix the issue where toMemoryPieces() was running into some null pointer problems, but ran into a different issue afterwards - "Parameters should be allocated by their graph. Parameter encoder_l1_self_Wq was not" --- src/graph/expression_graph.h | 10 +++++++++- src/translator/swappable.cpp | 3 ++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h index 75d89a82b..75b9c28d0 100644 --- a/src/graph/expression_graph.h +++ b/src/graph/expression_graph.h @@ -186,6 +186,11 @@ class ExpressionGraph : public std::enable_shared_from_this { kvParams.second->clear(); } + void clearParams() { + for(auto kvParams : paramsByElementType_) + kvParams.second->clear(); + } + /** * Set device options used to run the graph. * @param deviceId a struct type which stores device no. (size_t) @@ -743,10 +748,13 @@ class ExpressionGraph : public std::enable_shared_from_this { public: /** Load model (mainly parameter objects) from array of io::Items */ - void load(const std::vector& ioItems, bool markReloaded = true) { + void load(const std::vector& ioItems, bool markReloaded = true, bool dropF0prefix = false) { setReloaded(false); for(auto& item : ioItems) { std::string pName = item.name; + if (dropF0prefix && pName.substr(0, 4) == "F0::") { + pName = pName.substr(4); + } // skip over special parameters starting with "special:" if(pName.substr(0, 8) == "special:") continue; diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index fb3f07cb0..bdaf39d4c 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -184,11 +184,12 @@ void GPULoadedModelTrain::Train(const std::vector &input) { // expects a batch. So, afaik, this is the first time where i can // invoke build and, as a result i can call SwapPointers only // afterwards. TODO: verify last claim. + engine_->graph_->clearParams(); + engine_->graph_->load(cpuModel_->Parameters(), true, true); engine_->Initialize(batch); std::vector outvec; // get(outvec, parameters_[0], engine_->graph_->getBackend()); // engine_->SwapPointers(parameters_); - engine_->graph_->load(cpuModel_->Parameters(), false); // get(outvec, parameters_[0], engine_->graph_->getBackend()); first = false; } From 26e757459242f83e78d633fff8c780a6809be086 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 23 Jul 2021 14:05:14 +0300 Subject: [PATCH 052/135] Try to clear the graph before loading the parameters in an attempt to solve null parameter values It doesn't help though --- src/translator/swappable.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index bdaf39d4c..7b738b54d 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -184,6 +184,7 @@ void GPULoadedModelTrain::Train(const std::vector &input) { // expects a batch. So, afaik, this is the first time where i can // invoke build and, as a result i can call SwapPointers only // afterwards. TODO: verify last claim. + engine_->graph_->clear(); engine_->graph_->clearParams(); engine_->graph_->load(cpuModel_->Parameters(), true, true); engine_->Initialize(batch); From 58851ae09f19c563ea9a4388fab4a0f67a76da15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 23 Jul 2021 17:18:17 +0300 Subject: [PATCH 053/135] Recreate the graph upon every training invocation This works, finally --- src/translator/swappable.cpp | 16 ++++++++++++++-- src/translator/swappable.h | 1 + 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 7b738b54d..049e36b8f 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -96,6 +96,17 @@ GPUEngineTrain::GPUEngineTrain(Ptr options, size_t deviceIdx) // // TODO: reach into graph_->params() private members and free the parameter memory. } +void GPUEngineTrain::recreateGraphAndBuilder() { + // Create graph + graph_ = New(); + auto prec = options_->get>("precision", {"float32"}); + graph_->setDefaultElementType(typeFromString(prec[0])); + graph_->setDevice(myDeviceId_); + graph_->reserveWorkspaceMB(options_->get("workspace")); + + builder_ = models::createCriterionFunctionFromOptions(options_, models::usage::training); +} + GPUEngineTrain::~GPUEngineTrain() {} GPULoadedModelTrain::GPULoadedModelTrain(Ptr gpu) : engine_(gpu) { @@ -184,8 +195,9 @@ void GPULoadedModelTrain::Train(const std::vector &input) { // expects a batch. So, afaik, this is the first time where i can // invoke build and, as a result i can call SwapPointers only // afterwards. TODO: verify last claim. - engine_->graph_->clear(); - engine_->graph_->clearParams(); + + // Create graph + engine_->recreateGraphAndBuilder(); engine_->graph_->load(cpuModel_->Parameters(), true, true); engine_->Initialize(batch); std::vector outvec; diff --git a/src/translator/swappable.h b/src/translator/swappable.h index ca0ba0caa..1ee9001bf 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -37,6 +37,7 @@ class GPUEngineTrain { void Initialize(Ptr batch); void SwapPointers(std::vector &with); + void recreateGraphAndBuilder(); public: /** From 2765e65a0560ff88e9beb254d1e88680d9df0c31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 10 Aug 2021 14:15:21 +0300 Subject: [PATCH 054/135] The wrong vocab was being passed to the printer Or at least i think so --- src/translator/self_adaptive.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 8bb6f2577..9522e903d 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -160,7 +160,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { auto collector = New(options_->get("output")); if(options_->get("quiet-translation")) collector->setPrintingStrategy(New()); - auto printer = New(options_, cpuModel_->SrcVocabs().back()); + auto printer = New(options_, cpuModel_->TrgVocab()); // Initialize train data auto trainPaths = options_->get>("train-sets"); From c28939788eb66a6a8f51257a1baa87172302437c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 11 Aug 2021 15:08:39 +0300 Subject: [PATCH 055/135] Clean up and move memory piece extraction to a better place --- src/translator/swappable.cpp | 13 +++++-------- src/translator/swappable.h | 6 ++---- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 049e36b8f..015907461 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -145,6 +145,10 @@ void GPULoadedModelTrain::Load(Ptr from) { cpuModel_ = from; } +std::vector GPULoadedModelTrain::Parameters() const { + return engine_->graph_->params()->toMemoryPieces(); +} + // void GPULoadedModelTrain::Load(const CPULoadedModel &from) { // srcVocabs_ = from.SrcVocabs(); // trgVocab_ = from.TrgVocab(); @@ -165,17 +169,11 @@ void GPULoadedModelTrain::Train(const std::vector &input) { scheduler->registerTrainingObserver(scheduler); scheduler->registerTrainingObserver(optimizer); - // LOG(info, "GAAAH: vocabs is {}", srcVocabs_); - for (auto vocab: srcVocabs_) { - LOG(info, "GAAAH: single vocab is {}", vocab); - } - std::vector> allVocabs; allVocabs.reserve(srcVocabs_.size() + 1); allVocabs.insert(allVocabs.end(), srcVocabs_.begin(), srcVocabs_.end()); allVocabs.emplace_back(trgVocab_); auto corpus = New(input, allVocabs, engine_->options_); // @TODO dirty hack - // auto corpus = New(input, srcVocabs_, engine_->options_); // @TODO dirty hack data::BatchGenerator batchGenerator(corpus, engine_->options_, nullptr, false); // @TODO if the asynchronous batch preparation = true, but we supply less text than the mini-batch size we crash bool first = true; @@ -317,8 +315,7 @@ void GPULoadedModel::PointToParams(const GPULoadedModelTrain &from) { ABORT_IF(engine_->myDeviceId_ != from.engine_->myDeviceId_, "TODO: copy across GPUs."); srcVocabs_ = from.srcVocabs_; trgVocab_ = from.trgVocab_; - // TODO: this might be wrong and could be droped in favor of using SwapPointers - parameters_ = from.engine_->graph_->params()->toMemoryPieces(); + parameters_ = from.Parameters(); } void GPULoadedModel::Load(const CPULoadedModel &from) { diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 1ee9001bf..c062ab3d4 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -57,7 +57,6 @@ class GPULoadedModelTrain { Ptr engine_; - // std::vector parameters_; Ptr cpuModel_; std::vector> srcVocabs_; Ptr trgVocab_; @@ -72,10 +71,9 @@ class GPULoadedModelTrain { Ptr TrgVocab() const { return trgVocab_; } // Overwrite this model with parameters from a different one. - // void Load(const CPULoadedModel &from); void Load(Ptr from); - // void Load(const GPULoadedModelTrain &from); - // void AllocateParamsLike(const CPULoadedModel &from); + + std::vector Parameters() const; void Train(const std::vector &input); }; From 20c893d8be548cd0c55c501f74b1cac5c06f650e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 11 Aug 2021 15:45:12 +0300 Subject: [PATCH 056/135] Rename for readability; remove commented out code; remove debugging code --- src/translator/self_adaptive.h | 4 +- src/translator/swappable.cpp | 102 +++------------------------------ src/translator/swappable.h | 4 +- 3 files changed, 12 insertions(+), 98 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 9522e903d..f2352098b 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -122,7 +122,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { size_t id = 0; for(auto testBatch : *testBatches) { if(contexts.size() > id && !contexts[id].empty()) { - trainSlot_->Load(cpuModel_); + trainSlot_->SetModel(cpuModel_); trainSlot_->Train(contexts[id]); translateSlot_->PointToParams(*trainSlot_); translate(testBatch, collector, printer); @@ -173,7 +173,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { if(!trainSet.empty()) { LOG(info, "# NEW TEST BATCH"); - trainSlot_->Load(cpuModel_); + trainSlot_->SetModel(cpuModel_); trainSlot_->Train(trainSet); // translateSlot_->Load(*trainSlot_); translateSlot_->PointToParams(*trainSlot_); diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 015907461..06a7c31f3 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -37,33 +37,21 @@ namespace { } } // namespace -void get(std::vector &v, MemoryPiece::PtrType mem, Ptr backend) { - v.resize(mem->size()); - gpu::copy(backend, mem->data(), mem->data() + mem->size(), v.data()); +// For debugging memory +void get(std::vector &out, MemoryPiece::PtrType mem, Ptr backend) { + out.resize(mem->size()); + gpu::copy(backend, mem->data(), mem->data() + mem->size(), out.data()); } void GPUEngineTrain::SwapPointers( - std::vector &with /*, std::vector &with_names*/) { + std::vector &with) { auto write_it = graph_->params()->begin(); auto read_it = with.begin(); - // auto read_it_names = with_names.begin(); - bool first = true; + std::vector outvec; - for(; read_it != with.end(); ++write_it, ++read_it /*, ++read_it_names*/ ) { - if (first){ - get(outvec, (*write_it)->val()->memory(), graph_->getBackend()); - get(outvec, *read_it, graph_->getBackend()); - } + for(; read_it != with.end(); ++write_it, ++read_it) { std::swap(*(*write_it)->val()->memory(), **read_it); - // *graph_->params()->get(*read_it_names)->val()->memory() = std::move(**read_it); - // assign(*graph_->params()->get(*read_it_names)->val()->memory(), **read_it); - if(first) { - get(outvec, (*write_it)->val()->memory(), graph_->getBackend()); - get(outvec, *read_it, graph_->getBackend()); - first = false; - } } - // graph_->params()->init(graph_->getBackend(), graph_->getDeviceId()); } void GPUEngineTrain::Initialize(Ptr batch) { @@ -87,13 +75,6 @@ GPUEngineTrain::GPUEngineTrain(Ptr options, size_t deviceIdx) graph_->reserveWorkspaceMB(options_->get("workspace")); builder_ = models::createCriterionFunctionFromOptions(options_, models::usage::training); - // scorers_ = createScorers(options_); - // for (auto scorer : scorers_) { - // scorer->init(graph_); - // // TODO lexical shortlists are not supported yet. - // } - // graph_->forward(); - // // TODO: reach into graph_->params() private members and free the parameter memory. } void GPUEngineTrain::recreateGraphAndBuilder() { @@ -110,36 +91,12 @@ void GPUEngineTrain::recreateGraphAndBuilder() { GPUEngineTrain::~GPUEngineTrain() {} GPULoadedModelTrain::GPULoadedModelTrain(Ptr gpu) : engine_(gpu) { - // NOTE: engine_ must contain an initialized graph already at this point - // for (auto ¶m : *engine_->graph_->params()) { - // parameters_.push_back(engine_->allocator_.alloc(param->val()->memory()->size())); - // } } -// void GPULoadedModelTrain::AllocateParamsLike(const CPULoadedModel &from) { -// for (auto ¶m : from.Parameters()) { -// parameters_.push_back(engine_->allocator_.alloc(param.size())); -// } -// } - GPULoadedModelTrain::~GPULoadedModelTrain() { - // for (MemoryPiece::PtrType &p : parameters_) { - // engine_->allocator_.free(p); - // } } -// void GPULoadedModelTrain::Load(const GPULoadedModelTrain &from) { -// srcVocabs_ = from.srcVocabs_; -// trgVocab_ = from.trgVocab_; - -// ABORT_IF(engine_ != from.engine_, "TODO: copy across GPUs."); - -// for (size_t i = 0; i < parameters_.size(); ++i) { -// swapper::copyGpuToGpu(reinterpret_cast(parameters_[i]->data()), reinterpret_cast(from.parameters_[i]->data()), parameters_[i]->size(), engine_->myDeviceId_); -// } -// } - -void GPULoadedModelTrain::Load(Ptr from) { +void GPULoadedModelTrain::SetModel(Ptr from) { srcVocabs_ = from->SrcVocabs(); trgVocab_ = from->TrgVocab(); cpuModel_ = from; @@ -149,19 +106,8 @@ std::vector GPULoadedModelTrain::Parameters() const { return engine_->graph_->params()->toMemoryPieces(); } -// void GPULoadedModelTrain::Load(const CPULoadedModel &from) { -// srcVocabs_ = from.SrcVocabs(); -// trgVocab_ = from.TrgVocab(); -// for (size_t i = 0; i < parameters_.size(); ++i) { -// swapper::copyCpuToGpu(reinterpret_cast(parameters_[i]->data()), from.Parameters()[i].data(), from.Parameters()[i].size(), engine_->myDeviceId_); -// } -// } - void GPULoadedModelTrain::Train(const std::vector &input) { ABORT_IF(!trgVocab_, "GPULoadedModelTrain needs to be overwritten by a CPU model first."); - // engine_->SwapPointers(parameters_); - std::vector outvec; - // get(outvec, parameters_[0], engine_->graph_->getBackend()); auto state = New(engine_->options_->get("learn-rate")); auto scheduler = New(engine_->options_, state); @@ -199,39 +145,23 @@ void GPULoadedModelTrain::Train(const std::vector &input) { engine_->graph_->load(cpuModel_->Parameters(), true, true); engine_->Initialize(batch); std::vector outvec; - // get(outvec, parameters_[0], engine_->graph_->getBackend()); - // engine_->SwapPointers(parameters_); - // get(outvec, parameters_[0], engine_->graph_->getBackend()); first = false; } // Make an update step on the copy of the model auto lossNode = engine_->builder_->build(engine_->graph_, batch); - // LOG(info, "Before: {}", engine_->graph_->params()->vals()->debug()); engine_->graph_->forward(); StaticLoss loss = *lossNode; engine_->graph_->backward(); - // auto out = engine_->graph_->params()->toMemoryPieces(); - // Notify optimizer and scheduler optimizer->update(engine_->graph_, 1); scheduler->update(loss, batch); - // LOG(info, "After: {}", engine_->graph_->params()->vals()->debug()); } if(scheduler->keepGoing()) scheduler->increaseEpoch(); } scheduler->finished(); - - if(!first) { - std::vector outvec; - // get(outvec, parameters_[0], engine_->graph_->getBackend()); - // engine_->SwapPointers(parameters_); - // get(outvec, parameters_[0], engine_->graph_->getBackend()); - // does nothing, need a place for a breakpoint - first = false; - } } @@ -297,20 +227,6 @@ void GPULoadedModel::Load(const GPULoadedModel &from) { } } -// void GPULoadedModel::Load(const GPULoadedModelTrain &from) { -// srcVocabs_ = from.srcVocabs_; -// trgVocab_ = from.trgVocab_; - -// ABORT_IF(engine_->myDeviceId_ != from.engine_->myDeviceId_, "TODO: copy across GPUs."); - -// for(size_t i = 0; i < parameters_.size(); ++i) { -// swapper::copyGpuToGpu(reinterpret_cast(parameters_[i]->data()), -// reinterpret_cast(from.parameters_[i]->data()), -// parameters_[i]->size(), -// engine_->myDeviceId_); -// } -// } - void GPULoadedModel::PointToParams(const GPULoadedModelTrain &from) { ABORT_IF(engine_->myDeviceId_ != from.engine_->myDeviceId_, "TODO: copy across GPUs."); srcVocabs_ = from.srcVocabs_; @@ -350,7 +266,6 @@ Histories GPULoadedModel::Translate(const Ptr batch) { std::vector outvec; get(outvec, parameters_[0], engine_->graph_->getBackend()); engine_->SwapPointers(parameters_); - // LOG(info, "Before translation: {}", engine_->graph_->params()->vals()->debug()); BeamSearch search(engine_->options_, engine_->scorers_, trgVocab_); Histories ret; @@ -361,7 +276,6 @@ Histories GPULoadedModel::Translate(const Ptr batch) { std::sort(ret.begin(), ret.end(),[](marian::Ptr a, marian::Ptr b){return a->getLineNum() < b->getLineNum();}); - // LOG(info, "After translation: {}", engine_->graph_->params()->vals()->debug()); engine_->SwapPointers(parameters_); return ret; } diff --git a/src/translator/swappable.h b/src/translator/swappable.h index c062ab3d4..65a36405f 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -70,8 +70,8 @@ class GPULoadedModelTrain { Ptr TrgVocab() const { return trgVocab_; } - // Overwrite this model with parameters from a different one. - void Load(Ptr from); + // Change the internal pointers to vocabularies and CPULoadedModel to different ones + void SetModel(Ptr from); std::vector Parameters() const; From 724b9100215eb74ba9934f48e7255d07fa416aa2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 12 Aug 2021 13:36:04 +0300 Subject: [PATCH 057/135] Remove some redundant initialization code --- src/translator/swappable.cpp | 33 ++++++--------------------------- src/translator/swappable.h | 3 --- 2 files changed, 6 insertions(+), 30 deletions(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 06a7c31f3..e887d903f 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -54,27 +54,14 @@ void GPUEngineTrain::SwapPointers( } } -void GPUEngineTrain::Initialize(Ptr batch) { - if (!initialized_) { - builder_->build(graph_, batch); - graph_->forward(); - initialized_ = true; - } -} - -GPUEngineTrain::GPUEngineTrain(Ptr options, size_t deviceIdx) - : options_(options), graph_(New()), myDeviceId_(LookupGPU(options, deviceIdx)), allocator_(myDeviceId_, 0, 128 * 1048576) { +GPUEngineTrain::GPUEngineTrain(Ptr options, size_t deviceIdx) + : options_(options), myDeviceId_(LookupGPU(options, deviceIdx)) { ABORT_IF(myDeviceId_.type == DeviceType::cpu, "Swappable slot only works for GPU devices."); options_->set("inference", false); options_->set("shuffle", "none"); - // Create graph - auto prec = options_->get>("precision", {"float32"}); - graph_->setDefaultElementType(typeFromString(prec[0])); - graph_->setDevice(myDeviceId_); - graph_->reserveWorkspaceMB(options_->get("workspace")); - - builder_ = models::createCriterionFunctionFromOptions(options_, models::usage::training); + // There is no need to initialize the graph or builder here because that's done before + // each Train() invokation } void GPUEngineTrain::recreateGraphAndBuilder() { @@ -134,17 +121,9 @@ void GPULoadedModelTrain::Train(const std::vector &input) { LOG(info, "### NEW BATCH"); if(first) { - // This is a bit awkward but for some reason - // ICriterionFunction::build, which Initialize invokes underneath, - // expects a batch. So, afaik, this is the first time where i can - // invoke build and, as a result i can call SwapPointers only - // afterwards. TODO: verify last claim. - // Create graph engine_->recreateGraphAndBuilder(); engine_->graph_->load(cpuModel_->Parameters(), true, true); - engine_->Initialize(batch); - std::vector outvec; first = false; } @@ -263,8 +242,8 @@ Histories GPULoadedModel::Translate(const std::vector &input) { Histories GPULoadedModel::Translate(const Ptr batch) { ABORT_IF(!trgVocab_, "GPULoadedModel needs to be overwritten by a CPU model first."); - std::vector outvec; - get(outvec, parameters_[0], engine_->graph_->getBackend()); + // std::vector outvec; + // get(outvec, parameters_[0], engine_->graph_->getBackend()); engine_->SwapPointers(parameters_); BeamSearch search(engine_->options_, engine_->scorers_, trgVocab_); diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 65a36405f..55f5b1def 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -32,10 +32,7 @@ class GPUEngineTrain { Ptr graph_; Ptr builder_; const DeviceId myDeviceId_; - Allocator allocator_; - bool initialized_ = false; - void Initialize(Ptr batch); void SwapPointers(std::vector &with); void recreateGraphAndBuilder(); From 7a790271207eabe9a220684bd81c2786edd26df1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 8 Sep 2021 14:14:34 +0300 Subject: [PATCH 058/135] Make method naming consistent in GPUEngineTrain --- src/translator/swappable.cpp | 4 ++-- src/translator/swappable.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index e887d903f..a2ca534ef 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -64,7 +64,7 @@ GPUEngineTrain::GPUEngineTrain(Ptr options, size_t deviceIdx) // each Train() invokation } -void GPUEngineTrain::recreateGraphAndBuilder() { +void GPUEngineTrain::RecreateGraphAndBuilder() { // Create graph graph_ = New(); auto prec = options_->get>("precision", {"float32"}); @@ -122,7 +122,7 @@ void GPULoadedModelTrain::Train(const std::vector &input) { LOG(info, "### NEW BATCH"); if(first) { // Create graph - engine_->recreateGraphAndBuilder(); + engine_->RecreateGraphAndBuilder(); engine_->graph_->load(cpuModel_->Parameters(), true, true); first = false; } diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 55f5b1def..7aee61c9d 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -34,7 +34,7 @@ class GPUEngineTrain { const DeviceId myDeviceId_; void SwapPointers(std::vector &with); - void recreateGraphAndBuilder(); + void RecreateGraphAndBuilder(); public: /** From 1790ea11888265992db1eb7507d54e2df1976674 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 8 Sep 2021 15:14:36 +0300 Subject: [PATCH 059/135] Clean up some comments --- src/translator/self_adaptive.h | 12 ++++++------ src/translator/swappable.cpp | 1 - 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index f2352098b..5b02234ce 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -68,26 +68,27 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { options_->set("shuffle", "none"); // Set up translator options optionsTrans_ = New(options_->clone()); + // We will only ever translate a single sentence at a time because dynamic + // adaptation happens per sentence optionsTrans_->set("mini-batch", 1); optionsTrans_->set("maxi-batch", 1); + // TODO: should probably un-hardcode this? The issue is, though, that the users + // might want separate options for training and translation optionsTrans_->set("max-length", 1000); optionsTrans_->set("shuffle", "none"); - auto deviceId = Config::getDevices(options_)[0]; - auto modelFilename = options_->get("model"); + // Training has a single "model", translation can have multiple "models" in the general case. + // Adaptive options also take a single "model" so we have to adapt translation options manually. optionsTrans_->set>("models", {modelFilename}); auto vocabPaths = options_->get>("vocabs"); std::vector srcVocabPaths(vocabPaths.begin(), vocabPaths.end() - 1); - // TODO: or use optionsTrans_ here? cpuModel_ is used by both, trainin and translation, code - // so i don't yet know what's the correct approach cpuModel_ = New(options_, modelFilename, srcVocabPaths, vocabPaths.back()); translateEngine_ = New(optionsTrans_, 0); translateSlot_ = New(translateEngine_); trainEngine_ = New(options_, 0); trainSlot_ = New(trainEngine_); - // trainSlot_->AllocateParamsLike(*cpuModel_); } std::string run(const std::string& json) override { @@ -175,7 +176,6 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { LOG(info, "# NEW TEST BATCH"); trainSlot_->SetModel(cpuModel_); trainSlot_->Train(trainSet); - // translateSlot_->Load(*trainSlot_); translateSlot_->PointToParams(*trainSlot_); translate(testBatch, collector, printer); needsSwitching_ = true; diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index a2ca534ef..04b14350f 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -65,7 +65,6 @@ GPUEngineTrain::GPUEngineTrain(Ptr options, size_t deviceIdx) } void GPUEngineTrain::RecreateGraphAndBuilder() { - // Create graph graph_ = New(); auto prec = options_->get>("precision", {"float32"}); graph_->setDefaultElementType(typeFromString(prec[0])); From f8fe981150c1c2f0eb4bd6647bc15c3679521435 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 14 Sep 2021 10:12:36 +0300 Subject: [PATCH 060/135] Simplify the training loop --- src/translator/swappable.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 04b14350f..fa66c0ae9 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -92,6 +92,7 @@ std::vector GPULoadedModelTrain::Parameters() const { return engine_->graph_->params()->toMemoryPieces(); } +// Load the initial model (dropping any previous changes) and train it on the provided input void GPULoadedModelTrain::Train(const std::vector &input) { ABORT_IF(!trgVocab_, "GPULoadedModelTrain needs to be overwritten by a CPU model first."); @@ -108,7 +109,11 @@ void GPULoadedModelTrain::Train(const std::vector &input) { auto corpus = New(input, allVocabs, engine_->options_); // @TODO dirty hack data::BatchGenerator batchGenerator(corpus, engine_->options_, nullptr, false); // @TODO if the asynchronous batch preparation = true, but we supply less text than the mini-batch size we crash - bool first = true; + // We reset the training graph to the original model parameters to prepare + // for adapting it to the new inputs + engine_->RecreateGraphAndBuilder(); + engine_->graph_->load(cpuModel_->Parameters(), true, true); + scheduler->started(); while(scheduler->keepGoing()) { batchGenerator.prepare(); @@ -119,13 +124,6 @@ void GPULoadedModelTrain::Train(const std::vector &input) { break; LOG(info, "### NEW BATCH"); - if(first) { - // Create graph - engine_->RecreateGraphAndBuilder(); - engine_->graph_->load(cpuModel_->Parameters(), true, true); - first = false; - } - // Make an update step on the copy of the model auto lossNode = engine_->builder_->build(engine_->graph_, batch); engine_->graph_->forward(); From 4a4214ab1055718aec4af2f9b4685e64daf9c3a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 14 Sep 2021 10:12:53 +0300 Subject: [PATCH 061/135] Make CorpusBase understand that stdin is not a file --- src/data/corpus_base.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/data/corpus_base.cpp b/src/data/corpus_base.cpp index 5be4298be..f89ad0e61 100644 --- a/src/data/corpus_base.cpp +++ b/src/data/corpus_base.cpp @@ -54,9 +54,13 @@ CorpusBase::CorpusBase(const std::vector& paths, } for(auto path : paths_) { - UPtr strm(new io::InputFileStream(path)); - ABORT_IF(strm->empty(), "File '{}' is empty", path); - files_.emplace_back(std::move(strm)); + if(path == "stdin" || path == "-") + files_.emplace_back(new std::istream(std::cin.rdbuf())); + else { + UPtr strm(new io::InputFileStream(path)); + ABORT_IF(strm->empty(), "File '{}' is empty", path); + files_.emplace_back(std::move(strm)); + } } initEOS(/*training=*/true); From 0c974ebafc4ea4c7f7551627943fc446893db0f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 16 Sep 2021 12:45:46 +0300 Subject: [PATCH 062/135] Move common training/translation stuff out into a separate method --- src/translator/self_adaptive.h | 124 +++++++++++++++++++++++---------- 1 file changed, 89 insertions(+), 35 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 5b02234ce..597873792 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -3,6 +3,7 @@ #include "common/config.h" #include "common/file_stream.h" #include "data/batch_generator.h" +#include "data/iterator_facade.h" #include "data/text_input.h" #include "models/model_task.h" #include "training/scheduler.h" @@ -13,8 +14,29 @@ namespace marian { using namespace data; +class TrainSetReader; + +class TrainSetIterator : public IteratorFacade> { +private: + TrainSetReader* trainSetReader_; + std::vector currentSamples_; +public: + // TODO: should we use a smart pointer here instead? The TrainSetReader::begin() method + // would make it difficult + TrainSetIterator(TrainSetReader* trainSetReader); + + bool equal(const TrainSetIterator& other) const override { + return other.trainSetReader_ == trainSetReader_; + } + + const std::vector& dereference() const override { return currentSamples_; } + + void increment() override; +}; + class TrainSetReader { std::vector> files_; + bool eof_ = false; public: TrainSetReader(std::vector paths) { @@ -22,25 +44,44 @@ class TrainSetReader { files_.emplace_back(new io::InputFileStream(path)); } + TrainSetIterator begin() { + return TrainSetIterator(this); + } + + TrainSetIterator end() { + return TrainSetIterator(nullptr); + } + + bool eof() { + return eof_; + } + std::vector getSamples() { // extracted lines for source and target corpora std::vector samples; // counters of number of lines extracted for source and target std::vector counts; + // Early exit if files are exhausted + if (eof_) return samples; + for(auto const& file : files_) { size_t currCount = 0; std::string lines; std::string line; + bool fileEnded = true; while(io::getline(*file, line)) { - if(line.empty()) + if(line.empty()) { + fileEnded = false; break; + } if(currCount) lines += "\n"; lines += line; currCount += 1; } + eof_ = fileEnded; if(!lines.empty()) samples.emplace_back(lines); @@ -59,6 +100,26 @@ class TrainSetReader { } }; +TrainSetIterator::TrainSetIterator(TrainSetReader* trainSetReader) : trainSetReader_(trainSetReader) { + if(trainSetReader) { + currentSamples_ = trainSetReader_->getSamples(); + } +} + +void TrainSetIterator::increment() { + // If the previous increment has exhausted the file, we must indicate that the we've reached + // the iterator's end + if(trainSetReader_->eof() && trainSetReader_ != nullptr) { + trainSetReader_ = nullptr; + return; + } + // If we're at the end of the iterator and increment has been called yet another time, there's + // a bug in the calling code + ABORT_IF(trainSetReader_ == nullptr, "Incrementing the end of the iterator isn't allowed"); + + currentSamples_ = trainSetReader_->getSamples(); +} + class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { public: TrainSelfAdaptive(Ptr options) : options_(options) { @@ -111,7 +172,6 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { // Initialize output printing auto collector = New(); - auto printer = New(optionsTrans_, cpuModel_->TrgVocab()); // Get training sentences std::vector> contexts; @@ -120,32 +180,45 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { LOG(info, "Running..."); - size_t id = 0; + translate(testBatches, contexts.begin(), contexts.end(), collector); + + auto translations = collector->collect(options_->get("n-best")); + YAML::Emitter output; + output << YAML::DoubleQuoted << YAML::Flow << utils::join(translations, "\\n"); + return "{\"output\":" + std::string(output.c_str()) + "}"; + } + + template + void translate( + Ptr> + testBatches, + Iterator trainBegin, + Iterator trainEnd, + Ptr collector) { + auto printer = New(options_, cpuModel_->TrgVocab()); + for(auto testBatch : *testBatches) { - if(contexts.size() > id && !contexts[id].empty()) { + ABORT_IF(trainBegin == trainEnd, "Context batches ran out before test batches"); + + auto trainSet = *trainBegin; + ++trainBegin; + + if(!trainSet.empty()) { + LOG(info, "# NEW TEST BATCH"); trainSlot_->SetModel(cpuModel_); - trainSlot_->Train(contexts[id]); + trainSlot_->Train(trainSet); translateSlot_->PointToParams(*trainSlot_); translate(testBatch, collector, printer); needsSwitching_ = true; } else { - LOG(info, "No context provided for sentence {}", id); + LOG(info, "# EMPTY TEST BATCH"); if(needsSwitching_) { translateSlot_->Load(*cpuModel_); needsSwitching_ = false; } translate(testBatch, collector, printer); } - - // iterating by 1 is quite safe because the mini-batch size for - // translation is always 1 - ++id; } - - auto translations = collector->collect(options_->get("n-best")); - YAML::Emitter output; - output << YAML::DoubleQuoted << YAML::Flow << utils::join(translations, "\\n"); - return "{\"output\":" + std::string(output.c_str()) + "}"; } void run() override { @@ -161,7 +234,6 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { auto collector = New(options_->get("output")); if(options_->get("quiet-translation")) collector->setPrintingStrategy(New()); - auto printer = New(options_, cpuModel_->TrgVocab()); // Initialize train data auto trainPaths = options_->get>("train-sets"); @@ -169,25 +241,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { LOG(info, "Running..."); - for(auto testBatch : *testBatches) { - auto trainSet = trainSets->getSamples(); - - if(!trainSet.empty()) { - LOG(info, "# NEW TEST BATCH"); - trainSlot_->SetModel(cpuModel_); - trainSlot_->Train(trainSet); - translateSlot_->PointToParams(*trainSlot_); - translate(testBatch, collector, printer); - needsSwitching_ = true; - } else { - LOG(info, "# EMPTY TEST BATCH"); - if (needsSwitching_) { - translateSlot_->Load(*cpuModel_); - needsSwitching_ = false; - } - translate(testBatch, collector, printer); - } - } + translate(testBatches, trainSets->begin(), trainSets->end(), collector); } private: From 1176c3fbbcfa26d965c4e10b536f907da750d025 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 16 Sep 2021 15:13:33 +0300 Subject: [PATCH 063/135] Rename TrainSet{Reader,Iterator} to AdaptiveContext{Reader,Iterator} --- src/translator/self_adaptive.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 597873792..b0085478f 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -14,18 +14,18 @@ namespace marian { using namespace data; -class TrainSetReader; +class AdaptiveContextReader; -class TrainSetIterator : public IteratorFacade> { +class AdaptiveContextIterator : public IteratorFacade> { private: - TrainSetReader* trainSetReader_; + AdaptiveContextReader* trainSetReader_; std::vector currentSamples_; public: // TODO: should we use a smart pointer here instead? The TrainSetReader::begin() method // would make it difficult - TrainSetIterator(TrainSetReader* trainSetReader); + AdaptiveContextIterator(AdaptiveContextReader* trainSetReader); - bool equal(const TrainSetIterator& other) const override { + bool equal(const AdaptiveContextIterator& other) const override { return other.trainSetReader_ == trainSetReader_; } @@ -34,22 +34,22 @@ class TrainSetIterator : public IteratorFacade> files_; bool eof_ = false; public: - TrainSetReader(std::vector paths) { + AdaptiveContextReader(std::vector paths) { for(auto& path : paths) files_.emplace_back(new io::InputFileStream(path)); } - TrainSetIterator begin() { - return TrainSetIterator(this); + AdaptiveContextIterator begin() { + return AdaptiveContextIterator(this); } - TrainSetIterator end() { - return TrainSetIterator(nullptr); + AdaptiveContextIterator end() { + return AdaptiveContextIterator(nullptr); } bool eof() { @@ -100,13 +100,13 @@ class TrainSetReader { } }; -TrainSetIterator::TrainSetIterator(TrainSetReader* trainSetReader) : trainSetReader_(trainSetReader) { +AdaptiveContextIterator::AdaptiveContextIterator(AdaptiveContextReader* trainSetReader) : trainSetReader_(trainSetReader) { if(trainSetReader) { currentSamples_ = trainSetReader_->getSamples(); } } -void TrainSetIterator::increment() { +void AdaptiveContextIterator::increment() { // If the previous increment has exhausted the file, we must indicate that the we've reached // the iterator's end if(trainSetReader_->eof() && trainSetReader_ != nullptr) { @@ -237,7 +237,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { // Initialize train data auto trainPaths = options_->get>("train-sets"); - auto trainSets = New(trainPaths); + auto trainSets = New(trainPaths); LOG(info, "Running..."); From 79002cbaee38251a8d70b81367e91a9077fd7f2d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 16 Sep 2021 16:18:49 +0300 Subject: [PATCH 064/135] Add documentation comments for adaptive context reader classes --- src/translator/self_adaptive.h | 49 +++++++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index b0085478f..dd242e354 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -16,7 +16,12 @@ using namespace data; class AdaptiveContextReader; -class AdaptiveContextIterator : public IteratorFacade> { +/** + * @brief An iterator for easier access of the context sentences produced by + * `AdaptiveContextReader::getSamples()` + */ +class AdaptiveContextIterator + : public IteratorFacade> { private: AdaptiveContextReader* trainSetReader_; std::vector currentSamples_; @@ -34,16 +39,38 @@ class AdaptiveContextIterator : public IteratorFacade> files_; + /// Indicates whether the input files have been exhausted. bool eof_ = false; public: + /** + * @brief Initializes a new reader by supplying paths to the files with + * context sentences + * + * @param paths paths to the input files. The input files contain + * newline-separated parallel sentence pairs (as usual for MT). Sentences are + * grouped by the translatable sentences (which are provided in a different + * file). Each group is delimited by a single empty line. The sentence group + * can be empty (no context is provided for the respective translatable + * sentence) in which case it is also represented by a single empty line. + */ AdaptiveContextReader(std::vector paths) { for(auto& path : paths) files_.emplace_back(new io::InputFileStream(path)); } + /** + * @brief Returns an iterator over the sets of context sentences produced by + * `getSamples()` + * + * @return the beginning of the iterator. + */ AdaptiveContextIterator begin() { return AdaptiveContextIterator(this); } @@ -56,13 +83,29 @@ class AdaptiveContextReader { return eof_; } + /** + * @brief Reads the next set of samples -- the contaxt sentences -- for + * on-the-fly training in the self-adaptive translation mode. + * + * @details The input files contain newline-separated parallel sentence pairs + * (as usual for MT). Sentences are grouped by the translatable sentences + * (which are provided in a different file). Each group is delimited by a + * single empty line. The sentence group can be empty (no context is provided + * for the respective translatable sentence) in which case it is also + * represented by a single empty line. + * + * @return a vector representing a single group of context sentences. Each + * element in the vector contains newline seperated input lines comming from a + * single file, e.g., [0] could contain 3 newline separated sentences in + * English and [1] would contain their 3 respective translations in Latvian. + */ std::vector getSamples() { // extracted lines for source and target corpora std::vector samples; // counters of number of lines extracted for source and target std::vector counts; - // Early exit if files are exhausted + // Early exit if input files are exhausted if (eof_) return samples; for(auto const& file : files_) { @@ -115,7 +158,7 @@ void AdaptiveContextIterator::increment() { } // If we're at the end of the iterator and increment has been called yet another time, there's // a bug in the calling code - ABORT_IF(trainSetReader_ == nullptr, "Incrementing the end of the iterator isn't allowed"); + ABORT_IF(trainSetReader_ == nullptr, "Incrementing past the end of the iterator isn't allowed"); currentSamples_ = trainSetReader_->getSamples(); } From 632d05fb310e9b3c41d887df22ccad0a817a3b92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 16 Sep 2021 16:42:43 +0300 Subject: [PATCH 065/135] Move self-adaptive data stuff to a separate file --- src/data/adaptive_context.h | 156 +++++++++++++++++++++++++++++++++ src/translator/self_adaptive.h | 151 +------------------------------ 2 files changed, 157 insertions(+), 150 deletions(-) create mode 100644 src/data/adaptive_context.h diff --git a/src/data/adaptive_context.h b/src/data/adaptive_context.h new file mode 100644 index 000000000..4b61ff546 --- /dev/null +++ b/src/data/adaptive_context.h @@ -0,0 +1,156 @@ +#pragma once + +#include "common/file_stream.h" +#include "data/iterator_facade.h" + +namespace marian { + +class AdaptiveContextReader; + +/** + * @brief An iterator for easier access of the context sentences produced by + * `AdaptiveContextReader::getSamples()` + */ +class AdaptiveContextIterator + : public IteratorFacade> { +private: + AdaptiveContextReader* trainSetReader_; + std::vector currentSamples_; +public: + // TODO: should we use a smart pointer here instead? The TrainSetReader::begin() method + // would make it difficult + AdaptiveContextIterator(AdaptiveContextReader* trainSetReader); + + bool equal(const AdaptiveContextIterator& other) const override { + return other.trainSetReader_ == trainSetReader_; + } + + const std::vector& dereference() const override { return currentSamples_; } + + void increment() override; +}; + +/** + * @brief Reads the context sentences, that are used for on-the-fly training in + * the self-adaptive translation mode, from files. + */ +class AdaptiveContextReader { + std::vector> files_; + /// Indicates whether the input files have been exhausted. + bool eof_ = false; + +public: + /** + * @brief Initializes a new reader by supplying paths to the files with + * context sentences + * + * @param paths paths to the input files. The input files contain + * newline-separated parallel sentence pairs (as usual for MT). Sentences are + * grouped by the translatable sentences (which are provided in a different + * file). Each group is delimited by a single empty line. The sentence group + * can be empty (no context is provided for the respective translatable + * sentence) in which case it is also represented by a single empty line. + */ + AdaptiveContextReader(std::vector paths) { + for(auto& path : paths) + files_.emplace_back(new io::InputFileStream(path)); + } + + /** + * @brief Returns an iterator over the sets of context sentences produced by + * `getSamples()` + * + * @return the beginning of the iterator. + */ + AdaptiveContextIterator begin() { + return AdaptiveContextIterator(this); + } + + AdaptiveContextIterator end() { + return AdaptiveContextIterator(nullptr); + } + + bool eof() { + return eof_; + } + + /** + * @brief Reads the next set of samples -- the contaxt sentences -- for + * on-the-fly training in the self-adaptive translation mode. + * + * @details The input files contain newline-separated parallel sentence pairs + * (as usual for MT). Sentences are grouped by the translatable sentences + * (which are provided in a different file). Each group is delimited by a + * single empty line. The sentence group can be empty (no context is provided + * for the respective translatable sentence) in which case it is also + * represented by a single empty line. + * + * @return a vector representing a single group of context sentences. Each + * element in the vector contains newline seperated input lines comming from a + * single file, e.g., [0] could contain 3 newline separated sentences in + * English and [1] would contain their 3 respective translations in Latvian. + */ + std::vector getSamples() { + // extracted lines for source and target corpora + std::vector samples; + // counters of number of lines extracted for source and target + std::vector counts; + + // Early exit if input files are exhausted + if (eof_) return samples; + + for(auto const& file : files_) { + size_t currCount = 0; + std::string lines; + std::string line; + bool fileEnded = true; + while(io::getline(*file, line)) { + if(line.empty()) { + fileEnded = false; + break; + } + + if(currCount) + lines += "\n"; + lines += line; + currCount += 1; + } + eof_ = fileEnded; + + if(!lines.empty()) + samples.emplace_back(lines); + counts.push_back(currCount); + + // check if the same number of lines is extracted for source and target + size_t prevCount = counts[0]; + for(size_t i = 1; i < counts.size(); ++i) { + ABORT_IF(prevCount != counts[i], + "An empty source or target sentence has been encountered!"); + prevCount = counts[i]; + } + } + + return samples; + } +}; + +AdaptiveContextIterator::AdaptiveContextIterator(AdaptiveContextReader* trainSetReader) : trainSetReader_(trainSetReader) { + if(trainSetReader) { + currentSamples_ = trainSetReader_->getSamples(); + } +} + +void AdaptiveContextIterator::increment() { + // If the previous increment has exhausted the file, we must indicate that the we've reached + // the iterator's end + if(trainSetReader_->eof() && trainSetReader_ != nullptr) { + trainSetReader_ = nullptr; + return; + } + // If we're at the end of the iterator and increment has been called yet another time, there's + // a bug in the calling code + ABORT_IF(trainSetReader_ == nullptr, "Incrementing past the end of the iterator isn't allowed"); + + currentSamples_ = trainSetReader_->getSamples(); +} +} // namespace marian diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index dd242e354..8aa5189f7 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -3,166 +3,17 @@ #include "common/config.h" #include "common/file_stream.h" #include "data/batch_generator.h" -#include "data/iterator_facade.h" #include "data/text_input.h" #include "models/model_task.h" #include "training/scheduler.h" #include "training/validator.h" #include "translator/swappable.h" +#include "data/adaptive_context.h" namespace marian { using namespace data; -class AdaptiveContextReader; - -/** - * @brief An iterator for easier access of the context sentences produced by - * `AdaptiveContextReader::getSamples()` - */ -class AdaptiveContextIterator - : public IteratorFacade> { -private: - AdaptiveContextReader* trainSetReader_; - std::vector currentSamples_; -public: - // TODO: should we use a smart pointer here instead? The TrainSetReader::begin() method - // would make it difficult - AdaptiveContextIterator(AdaptiveContextReader* trainSetReader); - - bool equal(const AdaptiveContextIterator& other) const override { - return other.trainSetReader_ == trainSetReader_; - } - - const std::vector& dereference() const override { return currentSamples_; } - - void increment() override; -}; - -/** - * @brief Reads the context sentences, that are used for on-the-fly training in - * the self-adaptive translation mode, from files. - */ -class AdaptiveContextReader { - std::vector> files_; - /// Indicates whether the input files have been exhausted. - bool eof_ = false; - -public: - /** - * @brief Initializes a new reader by supplying paths to the files with - * context sentences - * - * @param paths paths to the input files. The input files contain - * newline-separated parallel sentence pairs (as usual for MT). Sentences are - * grouped by the translatable sentences (which are provided in a different - * file). Each group is delimited by a single empty line. The sentence group - * can be empty (no context is provided for the respective translatable - * sentence) in which case it is also represented by a single empty line. - */ - AdaptiveContextReader(std::vector paths) { - for(auto& path : paths) - files_.emplace_back(new io::InputFileStream(path)); - } - - /** - * @brief Returns an iterator over the sets of context sentences produced by - * `getSamples()` - * - * @return the beginning of the iterator. - */ - AdaptiveContextIterator begin() { - return AdaptiveContextIterator(this); - } - - AdaptiveContextIterator end() { - return AdaptiveContextIterator(nullptr); - } - - bool eof() { - return eof_; - } - - /** - * @brief Reads the next set of samples -- the contaxt sentences -- for - * on-the-fly training in the self-adaptive translation mode. - * - * @details The input files contain newline-separated parallel sentence pairs - * (as usual for MT). Sentences are grouped by the translatable sentences - * (which are provided in a different file). Each group is delimited by a - * single empty line. The sentence group can be empty (no context is provided - * for the respective translatable sentence) in which case it is also - * represented by a single empty line. - * - * @return a vector representing a single group of context sentences. Each - * element in the vector contains newline seperated input lines comming from a - * single file, e.g., [0] could contain 3 newline separated sentences in - * English and [1] would contain their 3 respective translations in Latvian. - */ - std::vector getSamples() { - // extracted lines for source and target corpora - std::vector samples; - // counters of number of lines extracted for source and target - std::vector counts; - - // Early exit if input files are exhausted - if (eof_) return samples; - - for(auto const& file : files_) { - size_t currCount = 0; - std::string lines; - std::string line; - bool fileEnded = true; - while(io::getline(*file, line)) { - if(line.empty()) { - fileEnded = false; - break; - } - - if(currCount) - lines += "\n"; - lines += line; - currCount += 1; - } - eof_ = fileEnded; - - if(!lines.empty()) - samples.emplace_back(lines); - counts.push_back(currCount); - - // check if the same number of lines is extracted for source and target - size_t prevCount = counts[0]; - for(size_t i = 1; i < counts.size(); ++i) { - ABORT_IF(prevCount != counts[i], - "An empty source or target sentence has been encountered!"); - prevCount = counts[i]; - } - } - - return samples; - } -}; - -AdaptiveContextIterator::AdaptiveContextIterator(AdaptiveContextReader* trainSetReader) : trainSetReader_(trainSetReader) { - if(trainSetReader) { - currentSamples_ = trainSetReader_->getSamples(); - } -} - -void AdaptiveContextIterator::increment() { - // If the previous increment has exhausted the file, we must indicate that the we've reached - // the iterator's end - if(trainSetReader_->eof() && trainSetReader_ != nullptr) { - trainSetReader_ = nullptr; - return; - } - // If we're at the end of the iterator and increment has been called yet another time, there's - // a bug in the calling code - ABORT_IF(trainSetReader_ == nullptr, "Incrementing past the end of the iterator isn't allowed"); - - currentSamples_ = trainSetReader_->getSamples(); -} - class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { public: TrainSelfAdaptive(Ptr options) : options_(options) { From 95ed9afc2a8423ed3318b5aeafa31851e70b6b9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 17 Sep 2021 11:33:36 +0300 Subject: [PATCH 066/135] Move method definitions from adaptive_context.h to .cpp --- src/CMakeLists.txt | 1 + src/data/adaptive_context.cpp | 97 +++++++++++++++++++++++++++++++++++ src/data/adaptive_context.h | 87 ++++--------------------------- 3 files changed, 107 insertions(+), 78 deletions(-) create mode 100644 src/data/adaptive_context.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 6084f091e..44aebe6f4 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -41,6 +41,7 @@ set(MARIAN_SOURCES data/corpus_nbest.cpp data/text_input.cpp data/shortlist.cpp + data/adaptive_context.cpp 3rd_party/cnpy/cnpy.cpp 3rd_party/ExceptionWithCallStack.cpp diff --git a/src/data/adaptive_context.cpp b/src/data/adaptive_context.cpp new file mode 100644 index 000000000..f63a9fc54 --- /dev/null +++ b/src/data/adaptive_context.cpp @@ -0,0 +1,97 @@ +#include "data/adaptive_context.h" + +namespace marian { +namespace data { + +AdaptiveContextIterator::AdaptiveContextIterator(AdaptiveContextReader* trainSetReader) + : trainSetReader_(trainSetReader) { + if(trainSetReader) { + currentSamples_ = trainSetReader_->getSamples(); + } +} + +bool AdaptiveContextIterator::equal(const AdaptiveContextIterator& other) const { + return other.trainSetReader_ == trainSetReader_; +} + +const std::vector& AdaptiveContextIterator::dereference() const { + return currentSamples_; +} + +void AdaptiveContextIterator::increment() { + // If the previous increment has exhausted the file, we must indicate that the we've reached + // the iterator's end + if(trainSetReader_->eof() && trainSetReader_ != nullptr) { + trainSetReader_ = nullptr; + return; + } + // If we're at the end of the iterator and increment has been called yet another time, there's + // a bug in the calling code + ABORT_IF(trainSetReader_ == nullptr, "Incrementing past the end of the iterator isn't allowed"); + + currentSamples_ = trainSetReader_->getSamples(); +} + + +AdaptiveContextReader::AdaptiveContextReader(std::vector paths) { + for(auto& path : paths) + files_.emplace_back(new io::InputFileStream(path)); +} + +AdaptiveContextIterator AdaptiveContextReader::begin() { + return AdaptiveContextIterator(this); +} + +AdaptiveContextIterator AdaptiveContextReader::end() { + return AdaptiveContextIterator(nullptr); +} + +bool AdaptiveContextReader::eof() { + return eof_; +} + +std::vector AdaptiveContextReader::getSamples() { + // extracted lines for source and target corpora + std::vector samples; + // counters of number of lines extracted for source and target + std::vector counts; + + // Early exit if input files are exhausted + if (eof_) return samples; + + for(auto const& file : files_) { + size_t currCount = 0; + std::string lines; + std::string line; + bool fileEnded = true; + while(io::getline(*file, line)) { + if(line.empty()) { + fileEnded = false; + break; + } + + if(currCount) + lines += "\n"; + lines += line; + currCount += 1; + } + eof_ = fileEnded; + + if(!lines.empty()) + samples.emplace_back(lines); + counts.push_back(currCount); + + // check if the same number of lines is extracted for source and target + size_t prevCount = counts[0]; + for(size_t i = 1; i < counts.size(); ++i) { + ABORT_IF(prevCount != counts[i], + "An empty source or target sentence has been encountered!"); + prevCount = counts[i]; + } + } + + return samples; +} + +} // namespace data +} // namespace marian diff --git a/src/data/adaptive_context.h b/src/data/adaptive_context.h index 4b61ff546..167cd1efe 100644 --- a/src/data/adaptive_context.h +++ b/src/data/adaptive_context.h @@ -4,6 +4,7 @@ #include "data/iterator_facade.h" namespace marian { +namespace data { class AdaptiveContextReader; @@ -21,11 +22,9 @@ class AdaptiveContextIterator // would make it difficult AdaptiveContextIterator(AdaptiveContextReader* trainSetReader); - bool equal(const AdaptiveContextIterator& other) const override { - return other.trainSetReader_ == trainSetReader_; - } + bool equal(const AdaptiveContextIterator& other) const override; - const std::vector& dereference() const override { return currentSamples_; } + const std::vector& dereference() const override; void increment() override; }; @@ -51,10 +50,7 @@ class AdaptiveContextReader { * can be empty (no context is provided for the respective translatable * sentence) in which case it is also represented by a single empty line. */ - AdaptiveContextReader(std::vector paths) { - for(auto& path : paths) - files_.emplace_back(new io::InputFileStream(path)); - } + AdaptiveContextReader(std::vector paths); /** * @brief Returns an iterator over the sets of context sentences produced by @@ -62,17 +58,11 @@ class AdaptiveContextReader { * * @return the beginning of the iterator. */ - AdaptiveContextIterator begin() { - return AdaptiveContextIterator(this); - } + AdaptiveContextIterator begin(); - AdaptiveContextIterator end() { - return AdaptiveContextIterator(nullptr); - } + AdaptiveContextIterator end(); - bool eof() { - return eof_; - } + bool eof(); /** * @brief Reads the next set of samples -- the contaxt sentences -- for @@ -90,67 +80,8 @@ class AdaptiveContextReader { * single file, e.g., [0] could contain 3 newline separated sentences in * English and [1] would contain their 3 respective translations in Latvian. */ - std::vector getSamples() { - // extracted lines for source and target corpora - std::vector samples; - // counters of number of lines extracted for source and target - std::vector counts; - - // Early exit if input files are exhausted - if (eof_) return samples; - - for(auto const& file : files_) { - size_t currCount = 0; - std::string lines; - std::string line; - bool fileEnded = true; - while(io::getline(*file, line)) { - if(line.empty()) { - fileEnded = false; - break; - } - - if(currCount) - lines += "\n"; - lines += line; - currCount += 1; - } - eof_ = fileEnded; - - if(!lines.empty()) - samples.emplace_back(lines); - counts.push_back(currCount); - - // check if the same number of lines is extracted for source and target - size_t prevCount = counts[0]; - for(size_t i = 1; i < counts.size(); ++i) { - ABORT_IF(prevCount != counts[i], - "An empty source or target sentence has been encountered!"); - prevCount = counts[i]; - } - } - - return samples; - } + std::vector getSamples(); }; -AdaptiveContextIterator::AdaptiveContextIterator(AdaptiveContextReader* trainSetReader) : trainSetReader_(trainSetReader) { - if(trainSetReader) { - currentSamples_ = trainSetReader_->getSamples(); - } -} - -void AdaptiveContextIterator::increment() { - // If the previous increment has exhausted the file, we must indicate that the we've reached - // the iterator's end - if(trainSetReader_->eof() && trainSetReader_ != nullptr) { - trainSetReader_ = nullptr; - return; - } - // If we're at the end of the iterator and increment has been called yet another time, there's - // a bug in the calling code - ABORT_IF(trainSetReader_ == nullptr, "Incrementing past the end of the iterator isn't allowed"); - - currentSamples_ = trainSetReader_->getSamples(); -} +} // namespace data } // namespace marian From 030ddb019fe74b2aaa38fcf8cf2b1f0e7bdaa8a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 17 Sep 2021 11:40:36 +0300 Subject: [PATCH 067/135] Introduce more whitespace for readability --- src/data/adaptive_context.cpp | 11 +++++++++++ src/data/adaptive_context.h | 8 +++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/src/data/adaptive_context.cpp b/src/data/adaptive_context.cpp index f63a9fc54..9ac680e83 100644 --- a/src/data/adaptive_context.cpp +++ b/src/data/adaptive_context.cpp @@ -3,6 +3,7 @@ namespace marian { namespace data { + AdaptiveContextIterator::AdaptiveContextIterator(AdaptiveContextReader* trainSetReader) : trainSetReader_(trainSetReader) { if(trainSetReader) { @@ -10,14 +11,17 @@ AdaptiveContextIterator::AdaptiveContextIterator(AdaptiveContextReader* trainSet } } + bool AdaptiveContextIterator::equal(const AdaptiveContextIterator& other) const { return other.trainSetReader_ == trainSetReader_; } + const std::vector& AdaptiveContextIterator::dereference() const { return currentSamples_; } + void AdaptiveContextIterator::increment() { // If the previous increment has exhausted the file, we must indicate that the we've reached // the iterator's end @@ -33,23 +37,29 @@ void AdaptiveContextIterator::increment() { } + + AdaptiveContextReader::AdaptiveContextReader(std::vector paths) { for(auto& path : paths) files_.emplace_back(new io::InputFileStream(path)); } + AdaptiveContextIterator AdaptiveContextReader::begin() { return AdaptiveContextIterator(this); } + AdaptiveContextIterator AdaptiveContextReader::end() { return AdaptiveContextIterator(nullptr); } + bool AdaptiveContextReader::eof() { return eof_; } + std::vector AdaptiveContextReader::getSamples() { // extracted lines for source and target corpora std::vector samples; @@ -93,5 +103,6 @@ std::vector AdaptiveContextReader::getSamples() { return samples; } + } // namespace data } // namespace marian diff --git a/src/data/adaptive_context.h b/src/data/adaptive_context.h index 167cd1efe..f0d2fe93a 100644 --- a/src/data/adaptive_context.h +++ b/src/data/adaptive_context.h @@ -6,17 +6,20 @@ namespace marian { namespace data { + class AdaptiveContextReader; + /** * @brief An iterator for easier access of the context sentences produced by * `AdaptiveContextReader::getSamples()` */ class AdaptiveContextIterator : public IteratorFacade> { -private: + AdaptiveContextReader* trainSetReader_; std::vector currentSamples_; + public: // TODO: should we use a smart pointer here instead? The TrainSetReader::begin() method // would make it difficult @@ -29,11 +32,13 @@ class AdaptiveContextIterator void increment() override; }; + /** * @brief Reads the context sentences, that are used for on-the-fly training in * the self-adaptive translation mode, from files. */ class AdaptiveContextReader { + std::vector> files_; /// Indicates whether the input files have been exhausted. bool eof_ = false; @@ -83,5 +88,6 @@ class AdaptiveContextReader { std::vector getSamples(); }; + } // namespace data } // namespace marian From c90a4d7c361e8911564ca2717041d05bc302a12b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 17 Sep 2021 14:35:20 +0300 Subject: [PATCH 068/135] Rename and move the adaptive translation function --- src/translator/self_adaptive.h | 70 +++++++++++++++++----------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 8aa5189f7..d816a9f1c 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -74,7 +74,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { LOG(info, "Running..."); - translate(testBatches, contexts.begin(), contexts.end(), collector); + adaptAndTranslate(testBatches, contexts.begin(), contexts.end(), collector); auto translations = collector->collect(options_->get("n-best")); YAML::Emitter output; @@ -82,39 +82,6 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { return "{\"output\":" + std::string(output.c_str()) + "}"; } - template - void translate( - Ptr> - testBatches, - Iterator trainBegin, - Iterator trainEnd, - Ptr collector) { - auto printer = New(options_, cpuModel_->TrgVocab()); - - for(auto testBatch : *testBatches) { - ABORT_IF(trainBegin == trainEnd, "Context batches ran out before test batches"); - - auto trainSet = *trainBegin; - ++trainBegin; - - if(!trainSet.empty()) { - LOG(info, "# NEW TEST BATCH"); - trainSlot_->SetModel(cpuModel_); - trainSlot_->Train(trainSet); - translateSlot_->PointToParams(*trainSlot_); - translate(testBatch, collector, printer); - needsSwitching_ = true; - } else { - LOG(info, "# EMPTY TEST BATCH"); - if(needsSwitching_) { - translateSlot_->Load(*cpuModel_); - needsSwitching_ = false; - } - translate(testBatch, collector, printer); - } - } - } - void run() override { // Initialize input data auto srcPaths = options_->get>("input"); @@ -135,7 +102,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { LOG(info, "Running..."); - translate(testBatches, trainSets->begin(), trainSets->end(), collector); + adaptAndTranslate(testBatches, trainSets->begin(), trainSets->end(), collector); } private: @@ -148,6 +115,39 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { Ptr translateEngine_; bool needsSwitching_ = true; + template + void adaptAndTranslate( + Ptr> + testBatches, + Iterator trainBegin, + Iterator trainEnd, + Ptr collector) { + auto printer = New(options_, cpuModel_->TrgVocab()); + + for(auto testBatch : *testBatches) { + ABORT_IF(trainBegin == trainEnd, "Context batches ran out before test batches"); + + auto trainSet = *trainBegin; + ++trainBegin; + + if(!trainSet.empty()) { + LOG(info, "# NEW TEST BATCH"); + trainSlot_->SetModel(cpuModel_); + trainSlot_->Train(trainSet); + translateSlot_->PointToParams(*trainSlot_); + translate(testBatch, collector, printer); + needsSwitching_ = true; + } else { + LOG(info, "# EMPTY TEST BATCH"); + if(needsSwitching_) { + translateSlot_->Load(*cpuModel_); + needsSwitching_ = false; + } + translate(testBatch, collector, printer); + } + } + } + void translate(Ptr batch, Ptr collector, Ptr printer) { From 448de67b08340677fee459c88c43c6342c8fc6fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 17 Sep 2021 14:35:42 +0300 Subject: [PATCH 069/135] Unhardcode the maximum translation input length parameter --- src/common/config_parser.cpp | 4 ++++ src/translator/self_adaptive.h | 5 ++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index b46b6a6e7..26a4d6601 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -700,6 +700,10 @@ void ConfigParser::addOptionsTranslation(cli::CLIWrapper& cli) { addSuboptionsTSV(cli); addSuboptionsDevices(cli); addSuboptionsBatching(cli); + } else { + cli.add("--max-length-translate", + "Maximum input sentence length for translation", + 1000); } // for self-adaptive mode vocabs are already added via the training options diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index d816a9f1c..bf942e89f 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -27,9 +27,8 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { // adaptation happens per sentence optionsTrans_->set("mini-batch", 1); optionsTrans_->set("maxi-batch", 1); - // TODO: should probably un-hardcode this? The issue is, though, that the users - // might want separate options for training and translation - optionsTrans_->set("max-length", 1000); + auto maxTranslationInput = options_->get("max-length-translate"); + optionsTrans_->set("max-length", maxTranslationInput); optionsTrans_->set("shuffle", "none"); auto modelFilename = options_->get("model"); From a6639ffb220df5afef3a4534d7bc7d329f199352 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 17 Sep 2021 15:26:11 +0300 Subject: [PATCH 070/135] Compile adaptive_context.cpp conditionally --- src/CMakeLists.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 44aebe6f4..471c49770 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -41,7 +41,6 @@ set(MARIAN_SOURCES data/corpus_nbest.cpp data/text_input.cpp data/shortlist.cpp - data/adaptive_context.cpp 3rd_party/cnpy/cnpy.cpp 3rd_party/ExceptionWithCallStack.cpp @@ -126,6 +125,12 @@ set(MARIAN_SOURCES $ ) +if(COMPILE_ADAPTIVE) + set(MARIAN_SOURCES ${MARIAN_SOURCES} + data/adaptive_context.cpp + ) +endif(COMPILE_ADAPTIVE) + add_library(marian STATIC ${MARIAN_SOURCES}) target_compile_options(marian PRIVATE ${ALL_WARNINGS}) From bafcae1274411fd9aea709132b73a3650019fc1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 20 Sep 2021 13:15:20 +0300 Subject: [PATCH 071/135] Remove the marian_swapper executable --- src/CMakeLists.txt | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 471c49770..45e7e538f 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -221,10 +221,6 @@ if (NOT COMPILE_LIBRARY_ONLY) set_target_properties(marian_decoder PROPERTIES OUTPUT_NAME marian-decoder) target_compile_options(marian_decoder PRIVATE ${ALL_WARNINGS}) - add_executable(marian_swapper command/marian_swapper.cpp) - set_target_properties(marian_swapper PROPERTIES OUTPUT_NAME marian_swapper) - target_compile_options(marian_swapper PRIVATE ${ALL_WARNINGS}) - add_executable(marian_scorer command/marian_scorer.cpp) set_target_properties(marian_scorer PROPERTIES OUTPUT_NAME marian-scorer) target_compile_options(marian_scorer PRIVATE ${ALL_WARNINGS}) @@ -237,7 +233,7 @@ if (NOT COMPILE_LIBRARY_ONLY) set_target_properties(marian_conv PROPERTIES OUTPUT_NAME marian-conv) target_compile_options(marian_conv PRIVATE ${ALL_WARNINGS}) - set(EXECUTABLES ${EXECUTABLES} marian_train marian_decoder marian_swapper marian_scorer marian_vocab marian_conv) + set(EXECUTABLES ${EXECUTABLES} marian_train marian_decoder marian_scorer marian_vocab marian_conv) # marian.zip and marian.tgz # This combines marian, marian_decoder in a single ZIP or TAR file for From afc5e158f037599d7509a2d541250c8f6d3791c9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 20 Sep 2021 15:20:31 +0300 Subject: [PATCH 072/135] Remove dead code from the model swapping code --- src/command/marian_swapper.cpp | 98 ---------------------------------- src/tensors/gpu/swap.cu | 4 -- src/tensors/gpu/swap.h | 5 -- src/translator/swappable.cpp | 58 -------------------- src/translator/swappable.h | 11 ---- 5 files changed, 176 deletions(-) delete mode 100644 src/command/marian_swapper.cpp diff --git a/src/command/marian_swapper.cpp b/src/command/marian_swapper.cpp deleted file mode 100644 index 758501d1e..000000000 --- a/src/command/marian_swapper.cpp +++ /dev/null @@ -1,98 +0,0 @@ -#include "translator/history.h" -#include "translator/output_printer.h" -#include "translator/swappable.h" - -#include -#include -#include - -namespace marian { -void LoadBig(Ptr options, std::unordered_map &to) { - to.emplace("pten", CPULoadedModel(options, - "/home/ubuntu/consistent-big-models/padded/pten.npz", - {"/home/ubuntu/consistent-big-models/padded/pten.vocab"}, - "/home/ubuntu/consistent-big-models/padded/pten.vocab")); - - to.emplace("enit", CPULoadedModel(options, - "/home/ubuntu/consistent-big-models/padded/enit.npz", - {"/home/ubuntu/consistent-big-models/padded/enit.vocab"}, - "/home/ubuntu/consistent-big-models/padded/enit.vocab")); -} - -void LoadTiny(Ptr options, std::unordered_map &to) { - std::vector models = {"csen", "encs", "enet", "eten", "esen", "enes"}; - for (const std::string m : models) { - std::string base = "/home/ubuntu/consistent-bergamot-students/padded/"; - base += m + "."; - to.emplace(m, CPULoadedModel(options, base + "npz", {base + "spm"}, base + "spm")); - } -} - -} // namespace - -/* Demo program: run with options for any of the models */ -int main(int argc, char** argv) { - using namespace marian; - Ptr options = parseOptions(argc, argv, cli::mode::translation); - - Ptr engine = New(options, 0); - GPULoadedModel slot(engine); - - std::unordered_map models; -// LoadBig(options, models); - LoadTiny(options, models); - - // begin with a space to avoid conflict with a real sentence. - const std::string kSwitchPrefix(" CHANGE "); - - bool alignments = !options->get("alignment").empty(); - - bool loaded = false; - std::string line; - while (std::getline(std::cin, line)) { - // Switch out which model is used. - if (line.substr(0, kSwitchPrefix.size()) == kSwitchPrefix) { - std::string key = line.substr(kSwitchPrefix.size()); - auto found = models.find(key); - if (found == models.end()) { - std::cerr << "Model for " << key << " not loaded." << std::endl; - return 1; - } - slot.Load(found->second); - loaded = true; - continue; - } - if (!loaded) { - std::cerr << "Select a model first." << std::endl; - continue; - } - - // Actually translating with a model. - marian::Histories histories = slot.Translate({line}); - // In practice there is one history because we provided one line. - for(auto history : histories) { - Result result(history->top()); - Words words = std::get<0>(result); - std::cout << slot.TrgVocab()->decode(words) << std::endl; - - /* Print alignments */ - if (alignments) { - Hypothesis &hypo = *std::get<1>(result); - // [t][s] -> P(s|t) - marian::data::SoftAlignment alignment(hypo.tracebackAlignment()); - // An easier call for this is: - // std:cout << data::SoftAlignToString(alignment); - // The below is just there to show how access them programatically. - // NB you can convert to hard with data::ConvertSoftAlignToHardAlign(alignment, threshold) - for (auto target : alignment) { - for (float source : target) { - std::cout << source << ' '; - } - std::cout << '\n'; - } - } - } - } - - return 0; -} diff --git a/src/tensors/gpu/swap.cu b/src/tensors/gpu/swap.cu index 16210e0c5..c16a71614 100644 --- a/src/tensors/gpu/swap.cu +++ b/src/tensors/gpu/swap.cu @@ -9,9 +9,5 @@ namespace marian { CUDA_CHECK(cudaSetDevice(deviceId.no)); CUDA_CHECK(cudaMemcpy(gpuOut, in, count, cudaMemcpyHostToDevice)); } - void copyGpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId) { - CUDA_CHECK(cudaSetDevice(deviceId.no)); - CUDA_CHECK(cudaMemcpy(gpuOut, in, count, cudaMemcpyDeviceToDevice)); - } } } diff --git a/src/tensors/gpu/swap.h b/src/tensors/gpu/swap.h index 7d8784266..a020c8827 100644 --- a/src/tensors/gpu/swap.h +++ b/src/tensors/gpu/swap.h @@ -6,15 +6,10 @@ namespace marian { namespace swapper { #ifdef CUDA_FOUND void copyCpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId); - void copyGpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId); #else inline void copyCpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId) { ABORT("Copy from CPU to GPU memory is only available with CUDA."); } - - inline void copyGpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId) { - ABORT("Copy from GPU to GPU memory is only available with CUDA."); - } #endif } } diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index fa66c0ae9..565bbcbeb 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -11,23 +11,6 @@ #include "tensors/gpu/swap.h" namespace marian { -std::string MultilineInputHack(const std::vector &input) { - if (input.size() == 1) { - return input[0]; - } else { - std::string ret; - std::size_t size = 0; - for (auto&& line : input) { - size += line.size() + 1; - } - ret.reserve(size); - for (auto&& line : input) { - ret.append(line); - ret.append("\n"); - } - return ret; - } -} namespace { DeviceId LookupGPU(const Ptr options, size_t deviceIdx) { @@ -43,17 +26,6 @@ void get(std::vector &out, MemoryPiece::PtrType mem, Ptr backe gpu::copy(backend, mem->data(), mem->data() + mem->size(), out.data()); } -void GPUEngineTrain::SwapPointers( - std::vector &with) { - auto write_it = graph_->params()->begin(); - auto read_it = with.begin(); - - std::vector outvec; - for(; read_it != with.end(); ++write_it, ++read_it) { - std::swap(*(*write_it)->val()->memory(), **read_it); - } -} - GPUEngineTrain::GPUEngineTrain(Ptr options, size_t deviceIdx) : options_(options), myDeviceId_(LookupGPU(options, deviceIdx)) { ABORT_IF(myDeviceId_.type == DeviceType::cpu, "Swappable slot only works for GPU devices."); @@ -192,17 +164,6 @@ GPULoadedModel::~GPULoadedModel() { } } -void GPULoadedModel::Load(const GPULoadedModel &from) { - srcVocabs_ = from.srcVocabs_; - trgVocab_ = from.trgVocab_; - - ABORT_IF(engine_ != from.engine_, "TODO: copy across GPUs."); - - for (size_t i = 0; i < parameters_.size(); ++i) { - swapper::copyGpuToGpu(reinterpret_cast(parameters_[i]->data()), reinterpret_cast(from.parameters_[i]->data()), parameters_[i]->size(), engine_->myDeviceId_); - } -} - void GPULoadedModel::PointToParams(const GPULoadedModelTrain &from) { ABORT_IF(engine_->myDeviceId_ != from.engine_->myDeviceId_, "TODO: copy across GPUs."); srcVocabs_ = from.srcVocabs_; @@ -218,25 +179,6 @@ void GPULoadedModel::Load(const CPULoadedModel &from) { } } -Histories GPULoadedModel::Translate(const std::vector &input) { - ABORT_IF(!trgVocab_, "GPULoadedModel needs to be overwritten by a CPU model first."); - engine_->SwapPointers(parameters_); - - auto corpus = New(std::vector(1, MultilineInputHack(input)), srcVocabs_, engine_->options_); // @TODO dirty hack - data::BatchGenerator batchGenerator(corpus, engine_->options_, nullptr, false); // @TODO if the asynchronous batch preparation = true, but we supply less text than the mini-batch size we crash - - BeamSearch search(engine_->options_, engine_->scorers_, trgVocab_); - Histories ret; - ret.reserve(input.size()); - for (auto&& batch : batchGenerator) { - auto result = search.search(engine_->graph_, batch); - ret.insert(ret.end(), result.begin(), result.end()); - } - std::sort(ret.begin(), ret.end(),[](marian::Ptr a, marian::Ptr b){return a->getLineNum() < b->getLineNum();}); - engine_->SwapPointers(parameters_); - return ret; -} - Histories GPULoadedModel::Translate(const Ptr batch) { ABORT_IF(!trgVocab_, "GPULoadedModel needs to be overwritten by a CPU model first."); // std::vector outvec; diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 7aee61c9d..6128b5db4 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -33,7 +33,6 @@ class GPUEngineTrain { Ptr builder_; const DeviceId myDeviceId_; - void SwapPointers(std::vector &with); void RecreateGraphAndBuilder(); public: @@ -76,13 +75,6 @@ class GPULoadedModelTrain { }; - - -// ##### ^ above is stuff for runtime domain adaptation - - - - /* Execute on a particular device */ class GPUEngine { private: @@ -126,11 +118,8 @@ class GPULoadedModel { // Overwrite this model with parameters from a different one. void Load(const CPULoadedModel &from); - void Load(const GPULoadedModel &from); - void Load(const GPULoadedModelTrain &from); void PointToParams(const GPULoadedModelTrain &from); - Histories Translate(const std::vector &input); Histories Translate(const Ptr batch); }; From 6aeb510ce78e0b955dff93829e764be7cc8cbe76 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 21 Sep 2021 16:03:36 +0300 Subject: [PATCH 073/135] Rename some swappable classes and improve documentation --- src/data/adaptive_context.h | 2 +- src/translator/self_adaptive.h | 8 +-- src/translator/swappable.cpp | 25 ++++---- src/translator/swappable.h | 106 ++++++++++++++++++++++----------- 4 files changed, 89 insertions(+), 52 deletions(-) diff --git a/src/data/adaptive_context.h b/src/data/adaptive_context.h index f0d2fe93a..80d2213da 100644 --- a/src/data/adaptive_context.h +++ b/src/data/adaptive_context.h @@ -81,7 +81,7 @@ class AdaptiveContextReader { * represented by a single empty line. * * @return a vector representing a single group of context sentences. Each - * element in the vector contains newline seperated input lines comming from a + * element in the vector contains newline separated input lines comming from a * single file, e.g., [0] could contain 3 newline separated sentences in * English and [1] would contain their 3 respective translations in Latvian. */ diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index bf942e89f..962ac164c 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -39,10 +39,10 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { auto vocabPaths = options_->get>("vocabs"); std::vector srcVocabPaths(vocabPaths.begin(), vocabPaths.end() - 1); cpuModel_ = New(options_, modelFilename, srcVocabPaths, vocabPaths.back()); - translateEngine_ = New(optionsTrans_, 0); + translateEngine_ = New(optionsTrans_, 0); translateSlot_ = New(translateEngine_); trainEngine_ = New(options_, 0); - trainSlot_ = New(trainEngine_); + trainSlot_ = New(trainEngine_); } std::string run(const std::string& json) override { @@ -108,10 +108,10 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { Ptr options_; // Options for training Ptr optionsTrans_; // Options for translator Ptr cpuModel_; - Ptr trainSlot_; + Ptr trainSlot_; Ptr translateSlot_; Ptr trainEngine_; - Ptr translateEngine_; + Ptr translateEngine_; bool needsSwitching_ = true; template diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 565bbcbeb..dff2e02e4 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -48,24 +48,23 @@ void GPUEngineTrain::RecreateGraphAndBuilder() { GPUEngineTrain::~GPUEngineTrain() {} -GPULoadedModelTrain::GPULoadedModelTrain(Ptr gpu) : engine_(gpu) { +SwappableModelTrainer::SwappableModelTrainer(Ptr gpu) : engine_(gpu) { } -GPULoadedModelTrain::~GPULoadedModelTrain() { +SwappableModelTrainer::~SwappableModelTrainer() { } -void GPULoadedModelTrain::SetModel(Ptr from) { +void SwappableModelTrainer::SetModel(Ptr from) { srcVocabs_ = from->SrcVocabs(); trgVocab_ = from->TrgVocab(); cpuModel_ = from; } -std::vector GPULoadedModelTrain::Parameters() const { +std::vector SwappableModelTrainer::Parameters() const { return engine_->graph_->params()->toMemoryPieces(); } -// Load the initial model (dropping any previous changes) and train it on the provided input -void GPULoadedModelTrain::Train(const std::vector &input) { +void SwappableModelTrainer::Train(const std::vector &input) { ABORT_IF(!trgVocab_, "GPULoadedModelTrain needs to be overwritten by a CPU model first."); auto state = New(engine_->options_->get("learn-rate")); @@ -78,8 +77,8 @@ void GPULoadedModelTrain::Train(const std::vector &input) { allVocabs.reserve(srcVocabs_.size() + 1); allVocabs.insert(allVocabs.end(), srcVocabs_.begin(), srcVocabs_.end()); allVocabs.emplace_back(trgVocab_); - auto corpus = New(input, allVocabs, engine_->options_); // @TODO dirty hack - data::BatchGenerator batchGenerator(corpus, engine_->options_, nullptr, false); // @TODO if the asynchronous batch preparation = true, but we supply less text than the mini-batch size we crash + auto corpus = New(input, allVocabs, engine_->options_); + data::BatchGenerator batchGenerator(corpus, engine_->options_, nullptr, false); // We reset the training graph to the original model parameters to prepare // for adapting it to the new inputs @@ -121,7 +120,7 @@ void GPULoadedModelTrain::Train(const std::vector &input) { -void GPUEngine::SwapPointers(std::vector &with) { +void GPUEngineTranslate::SwapPointers(std::vector &with) { auto write_it = graph_->params()->begin(); auto read_it = with.begin(); for (; read_it != with.end(); ++write_it, ++read_it) { @@ -129,7 +128,7 @@ void GPUEngine::SwapPointers(std::vector &with) { } } -GPUEngine::GPUEngine(Ptr options, size_t deviceIdx) +GPUEngineTranslate::GPUEngineTranslate(Ptr options, size_t deviceIdx) : options_(options), graph_(New(true)), myDeviceId_(LookupGPU(options, deviceIdx)), allocator_(myDeviceId_, 0, 128 * 1048576) { ABORT_IF(myDeviceId_.type == DeviceType::cpu, "Swappable slot only works for GPU devices."); options_->set("inference", true); @@ -150,9 +149,9 @@ GPUEngine::GPUEngine(Ptr options, size_t deviceIdx) // TODO: reach into graph_->params() private members and free the parameter memory. } -GPUEngine::~GPUEngine() {} +GPUEngineTranslate::~GPUEngineTranslate() {} -GPULoadedModel::GPULoadedModel(Ptr gpu) : engine_(gpu) { +GPULoadedModel::GPULoadedModel(Ptr gpu) : engine_(gpu) { for (auto ¶m : *engine_->graph_->params()) { parameters_.push_back(engine_->allocator_.alloc(param->val()->memory()->size())); } @@ -164,7 +163,7 @@ GPULoadedModel::~GPULoadedModel() { } } -void GPULoadedModel::PointToParams(const GPULoadedModelTrain &from) { +void GPULoadedModel::PointToParams(const SwappableModelTrainer &from) { ABORT_IF(engine_->myDeviceId_ != from.engine_->myDeviceId_, "TODO: copy across GPUs."); srcVocabs_ = from.srcVocabs_; trgVocab_ = from.trgVocab_; diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 6128b5db4..ffa2666ee 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -15,7 +15,7 @@ #include namespace marian { -class GPULoadedModelTrain; +class SwappableModelTrainer; class Scorer; @@ -23,10 +23,13 @@ class GPULoadedModel; class CPULoadedModel; -/* Execute on a particular device */ +/** + * The class wraps an expression graph and a model builder that are used by + * `SwappableModelTrainer` for training a model. + */ class GPUEngineTrain { private: - friend class GPULoadedModelTrain; + friend class SwappableModelTrainer; friend class GPULoadedModel; Ptr options_; Ptr graph_; @@ -46,8 +49,14 @@ class GPUEngineTrain { ~GPUEngineTrain(); }; -/* A model loaded on the GPU that can be overwritten from CPU or GPU. */ -class GPULoadedModelTrain { +/** + * @brief Wraps a `GPUEngineTrain` and a `CPULoadedModel` and performs model + * training. + * + * This class is created with self-adaptive translation in mind. Each invocation + * of Train() resets the model parameters at the start of training. + */ +class SwappableModelTrainer { private: friend class GPULoadedModel; @@ -58,57 +67,74 @@ class GPULoadedModelTrain { Ptr trgVocab_; public: - GPULoadedModelTrain(Ptr gpu); + SwappableModelTrainer(Ptr gpu); - ~GPULoadedModelTrain(); + ~SwappableModelTrainer(); const std::vector> &SrcVocabs() const { return srcVocabs_; } Ptr TrgVocab() const { return trgVocab_; } - // Change the internal pointers to vocabularies and CPULoadedModel to different ones + /// Change the internal pointers to vocabularies and CPULoadedModel to + /// different ones void SetModel(Ptr from); std::vector Parameters() const; + /** + * @brief resets the training graph, reloads the model parameters and trains + * the model on the provided inputs. + * + * Intended to be used in the self-adaptive translation mode -- training is + * always performed on the original model parameters, each training + * invocation resets previous changes. + * + * @param input Training data. A vector representing a parallel corpus -- + * vector elements are the different sides of a parallel corpus, each is a + * newline separated set of sentences in a single language. + */ void Train(const std::vector &input); }; +/** + * The class wraps an expression graph and scorers that are used by + * `GPULoadedModel` for translation. + */ +class GPUEngineTranslate { +private: + friend class GPULoadedModel; + Ptr options_; + Ptr graph_; + std::vector> scorers_; + const DeviceId myDeviceId_; + Allocator allocator_; -/* Execute on a particular device */ -class GPUEngine { - private: - friend class GPULoadedModel; - Ptr options_; - Ptr graph_; - std::vector > scorers_; - const DeviceId myDeviceId_; - Allocator allocator_; - - void SwapPointers(std::vector &with); + void SwapPointers(std::vector &with); - public: - /** - * @param options The marian options object - * @param deviceNum The index of the device you want to use for this slot. Note that this is not the deviceID but the index of the device in the - * array of supplied devices. Eg if you provide -d 0 3 5 and you want the Slot to run on GPU 3, you provide deviceNum=1. - */ - explicit GPUEngine(Ptr options, size_t deviceNum); +public: + /** + * @param options The marian options object + * @param deviceNum The index of the device you want to use for this slot. Note that this is not the deviceID but the index of the device in the + * array of supplied devices. Eg if you provide -d 0 3 5 and you want the Slot to run on GPU 3, you provide deviceNum=1. + */ + explicit GPUEngineTranslate(Ptr options, size_t deviceNum); - ~GPUEngine(); + ~GPUEngineTranslate(); }; -/* A model loaded on the GPU that can be overwritten from CPU or GPU. */ +/** A model loaded on the GPU that can be overwritten from CPU. Facilitates + * translation with the model. + */ class GPULoadedModel { private: - Ptr engine_; + Ptr engine_; std::vector parameters_; std::vector> srcVocabs_; Ptr trgVocab_; public: - GPULoadedModel(Ptr gpu); + GPULoadedModel(Ptr gpu); ~GPULoadedModel(); @@ -116,14 +142,25 @@ class GPULoadedModel { Ptr TrgVocab() const { return trgVocab_; } - // Overwrite this model with parameters from a different one. + /// Overwrite this model with parameters from a different one. void Load(const CPULoadedModel &from); - void PointToParams(const GPULoadedModelTrain &from); + /** + * @brief Set the internal shared pointers to model parameters and + * vocabularies to different ones + * + * The effect is similar to `Load()` but nothing is copied in the process. + * + * @param from Swappable model trainer from which to take the shared + * pointers to model parameters and vocabularies. + */ + void PointToParams(const SwappableModelTrainer &from); Histories Translate(const Ptr batch); }; -/* A model loaded on the CPU. */ +/** + * A model loaded on the CPU. Holds model parameters and vocabularies. + */ class CPULoadedModel { private: std::vector parameters_; @@ -131,7 +168,8 @@ class CPULoadedModel { Ptr trgVocab_; public: - // The parts of Options that relate to model and vocab are ignored. The files provided will be loaded. + // The parts of Options that relate to model and vocab are ignored. The + // files provided will be loaded. CPULoadedModel(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath); const std::vector &Parameters() const { return parameters_; } From ad38da9ae18c6afbc8957189cfea00d31a3a7b0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 21 Sep 2021 17:02:19 +0300 Subject: [PATCH 074/135] Describe the purpose of swappable.h --- src/translator/swappable.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/src/translator/swappable.h b/src/translator/swappable.h index ffa2666ee..8bd9d84bd 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -1,9 +1,18 @@ #pragma once -/* Support for swapping models in and out of a GPU, when you have more models - * than fit in the GPU's RAM. The models must have identical graphs, including - * size. They can have different parameters and different vocabularies but the - * vocabularies must have the same size. To make vocabulary the same size, pad - * using scripts/contrib/pad_model_vocabulary.py offline. +/* Support for swapping and resetting models for the self-adaptive translation + * mode. The intended use case is to store a read-only copy of the model in + * `CPULoadedModel`, optionally train on a copy of the parameters using + * `SwappableModelTrainer` and then transfer either the trained or original + * model parameters into `GPULoadedModel` for translation. `GPUEngineTrain` and + * `GPUEngineTranslate` are used for storing the expression graphs for training + * and translation, respectively, and other related things. Translation on the + * CPU currently isn't supported. + * + * Originally this code was intended to allow multiple models to share a single + * GPU for translation and be swapped into GPU memory only when needed. However, + * parts of it, that weren't needed for self-adaptive translation, have been + * trimmed down since then. Look into the commit history if you want to revive + * this functionality. */ #include "common/io.h" #include "data/vocab.h" From 295040db405a95516125b221612273494ee6089b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 22 Sep 2021 13:41:37 +0300 Subject: [PATCH 075/135] Explain the purpose of self-adaptive code --- src/translator/self_adaptive.h | 8 ++++++++ src/translator/swappable.h | 3 ++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 962ac164c..94fdf4853 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -14,6 +14,14 @@ namespace marian { using namespace data; +/** + * @breif Implementation of the self-adaptive translation mode. + * + * Self-adaptive translation means optionally using a set of context sentences + * (e.g., provided by a translation memory), that are similar to the + * translatable sentence, to train the model for a few iterations to fine-tune + * it before translating the given sentence. + */ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { public: TrainSelfAdaptive(Ptr options) : options_(options) { diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 8bd9d84bd..3f6864751 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -1,5 +1,6 @@ #pragma once -/* Support for swapping and resetting models for the self-adaptive translation +/** + * Support for swapping and resetting models for the self-adaptive translation * mode. The intended use case is to store a read-only copy of the model in * `CPULoadedModel`, optionally train on a copy of the parameters using * `SwappableModelTrainer` and then transfer either the trained or original From 6311f2bb4534475fcc9d1c0c6e13541632484312 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 22 Sep 2021 14:49:59 +0300 Subject: [PATCH 076/135] Improve comments in self-adaptive code --- src/translator/self_adaptive.h | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 94fdf4853..f129777f1 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -25,9 +25,6 @@ using namespace data; class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { public: TrainSelfAdaptive(Ptr options) : options_(options) { - - // @TODO: should probably better re-enable the shuffling related options - // in config for marian-adaptive options_->set("shuffle", "none"); // Set up translator options optionsTrans_ = New(options_->clone()); @@ -40,8 +37,9 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { optionsTrans_->set("shuffle", "none"); auto modelFilename = options_->get("model"); - // Training has a single "model", translation can have multiple "models" in the general case. - // Adaptive options also take a single "model" so we have to adapt translation options manually. + // Training has a single "model", translation can have multiple "models" in + // the general case. Adaptive options also take only a single "model" so we + // have to adapt translation options manually. optionsTrans_->set>("models", {modelFilename}); auto vocabPaths = options_->get>("vocabs"); @@ -53,6 +51,16 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { trainSlot_ = New(trainEngine_); } + /** + * @brief Implementation for self-adaptive translation where data come from a + * web request. + * + * @param json Input data in JSON. An "input" array of strings is expected to + * contain translatable sentences, each of which has a corresponding set of + * context sentences as a sub-array in the "context" array. + * + * @return JSON-encoded translations + */ std::string run(const std::string& json) override { //LOG(warn, "REMOVEME Received Json:\n{}", json); @@ -89,6 +97,10 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { return "{\"output\":" + std::string(output.c_str()) + "}"; } + /** + * @brief Implementation for self-adaptive translation where inputs and + * outputs are specified in CLI options. + */ void run() override { // Initialize input data auto srcPaths = options_->get>("input"); From 6bf3445516a6c561bfb6b1090ee585859ec3da58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 24 Sep 2021 14:59:29 +0300 Subject: [PATCH 077/135] Check that param names and sizes match upon loading --- src/translator/swappable.cpp | 32 ++++++++++++++++++++++++++++++-- src/translator/swappable.h | 1 + 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index dff2e02e4..6920d41c3 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -153,6 +153,7 @@ GPUEngineTranslate::~GPUEngineTranslate() {} GPULoadedModel::GPULoadedModel(Ptr gpu) : engine_(gpu) { for (auto ¶m : *engine_->graph_->params()) { + names_.push_back(param->name()); parameters_.push_back(engine_->allocator_.alloc(param->val()->memory()->size())); } } @@ -173,8 +174,35 @@ void GPULoadedModel::PointToParams(const SwappableModelTrainer &from) { void GPULoadedModel::Load(const CPULoadedModel &from) { srcVocabs_ = from.SrcVocabs(); trgVocab_ = from.TrgVocab(); - for (size_t i = 0; i < parameters_.size(); ++i) { - swapper::copyCpuToGpu(reinterpret_cast(parameters_[i]->data()), from.Parameters()[i].data(), from.Parameters()[i].size(), engine_->myDeviceId_); + auto fromParams = from.Parameters(); + + auto printParamsAndExit = [&]() { + std::ostringstream paramNames; + for(size_t i = 0; i < parameters_.size(); ++i) { + paramNames << " TO (" << names_[i] << ") size: " << parameters_[i]->size() << "\n"; + } + for(size_t i = 0; i < fromParams.size(); ++i) { + paramNames << " FROM (" << fromParams[i].name << ") size: " << fromParams[i].size() << "\n"; + } + LOG(error, + "Attempting to load parameters with mismatched names or sizes:\n{}", + paramNames.str()); + ABORT("Attempting to load parameters with mismatched names or sizes."); + }; + + // Sanity check + if (parameters_.size() != fromParams.size()) + printParamsAndExit(); + + for(size_t i = 0; i < parameters_.size(); ++i) { + // Sanity check + if (names_[i] != fromParams[i].name || parameters_[i]->size() != fromParams[i].size()) + printParamsAndExit(); + + swapper::copyCpuToGpu(reinterpret_cast(parameters_[i]->data()), + fromParams[i].data(), + fromParams[i].size(), + engine_->myDeviceId_); } } diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 3f6864751..af3cffa4c 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -139,6 +139,7 @@ class GPULoadedModel { private: Ptr engine_; + std::vector names_; std::vector parameters_; std::vector> srcVocabs_; Ptr trgVocab_; From 5cac0d1ac02673bd8bf0b0e5c4e9a16842bc6eb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 24 Sep 2021 15:00:58 +0300 Subject: [PATCH 078/135] Fix amun model loading --- src/models/amun.h | 19 ++++++++++++------- src/translator/swappable.cpp | 19 ++++++++++++------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/src/models/amun.h b/src/models/amun.h index 1bfda2697..9c42d4015 100644 --- a/src/models/amun.h +++ b/src/models/amun.h @@ -35,9 +35,7 @@ class Amun : public EncoderDecoder { "use --type s2s"); } - void load(Ptr graph, - const std::string& name, - bool /*markedReloaded*/ = true) override { + static void remapIoItems(std::vector &ioItems, bool tiedEmbeddinsSrcOrAll) { std::map nameMap = {{"decoder_U", "decoder_cell1_U"}, {"decoder_Ux", "decoder_cell1_Ux"}, @@ -86,12 +84,9 @@ class Amun : public EncoderDecoder { {"encoder_r_gamma1", "encoder_bi_r_gamma1"}, {"encoder_r_gamma2", "encoder_bi_r_gamma2"}}; - if(opt("tied-embeddings-src") || opt("tied-embeddings-all")) + if (tiedEmbeddinsSrcOrAll) nameMap["Wemb"] = "Wemb"; - LOG(info, "Loading model from {}", name); - // load items from .npz file - auto ioItems = io::loadItems(name); // map names and remove a dummy matrices for(auto it = ioItems.begin(); it != ioItems.end();) { // for backwards compatibility, turn one-dimensional vector into two dimensional matrix with first dimension being 1 and second dimension of the original size @@ -116,6 +111,16 @@ class Amun : public EncoderDecoder { it++; } } + } + + void load(Ptr graph, + const std::string& name, + bool /*markedReloaded*/ = true) override { + LOG(info, "Loading model from {}", name); + // load items from .npz file + auto ioItems = io::loadItems(name); + // remap item names and remove dummy matrices + remapIoItems(ioItems, opt("tied-embeddings-src") || opt("tied-embeddings-all")); // load items into the graph graph->load(ioItems); } diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 6920d41c3..56d3dd990 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -9,6 +9,7 @@ #include "common/timer.h" #include #include "tensors/gpu/swap.h" +#include "models/amun.h" namespace marian { @@ -228,15 +229,19 @@ Histories GPULoadedModel::Translate(const Ptr batch) { CPULoadedModel::CPULoadedModel(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath) : parameters_(io::loadItems(parameters)) { // Load parameters. + //Remap the parameter names if the model uses an older naming convention + if (options->get("type") == "amun") { + bool tied = options->get("tied-embeddings-src") || options->get("tied-embeddings-all"); + Amun::remapIoItems(parameters_, tied); + } + // Find the special element and remove it: - size_t special_idx = 0; - for (size_t i = 0; i < parameters_.size(); i++) { - if (parameters_[i].name == "special:model.yml") { - special_idx = i; - break; - } + auto pred = [](const io::Item &item) { return item.name == "special:model.yml"; }; + auto special_it = std::find_if(parameters_.begin(), parameters_.end(), pred); + if (special_it != parameters_.end()) { + parameters_.erase(special_it); } - parameters_.erase(parameters_.begin() + special_idx); + // Prepare the name so that it matches the named map for (auto&& item : parameters_) { item.name = "F0::" + item.name; From 1e1397d767751c9b3429e74a00152fedd94dff56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 24 Sep 2021 15:29:37 +0300 Subject: [PATCH 079/135] Implement parameter name remapping for nematus models --- src/models/nematus.h | 340 ++++++++++++++++++----------------- src/translator/swappable.cpp | 16 +- 2 files changed, 184 insertions(+), 172 deletions(-) diff --git a/src/models/nematus.h b/src/models/nematus.h index 730418e5f..a5c0c1c3f 100644 --- a/src/models/nematus.h +++ b/src/models/nematus.h @@ -8,7 +8,7 @@ namespace marian { class Nematus : public EncoderDecoder { public: - Nematus(Ptr graph, Ptr options) : EncoderDecoder(graph, options), nameMap_(createNameMap()) { + Nematus(Ptr graph, Ptr options) : EncoderDecoder(graph, options), nameMap_(createNameMap(options)) { ABORT_IF(options_->get("enc-type") != "bidirectional", "--type nematus does not support other encoder " "type than bidirectional, use --type s2s"); @@ -25,178 +25,188 @@ class Nematus : public EncoderDecoder { "--dec-cell-high-depth > 1, use --type s2s"); } - void load(Ptr graph, - const std::string& name, - bool /*markReloaded*/ = true) override { - LOG(info, "Loading model from {}", name); - // load items from .npz file - auto ioItems = io::loadItems(name); - // map names and remove a dummy matrix 'decoder_c_tt' from items to avoid creating isolated node - for(auto it = ioItems.begin(); it != ioItems.end();) { - // for backwards compatibility, turn one-dimensional vector into two dimensional matrix with first dimension being 1 and second dimension of the original size - // @TODO: consider dropping support for Nematus models - if(it->shape.size() == 1) { - int dim = it->shape[-1]; - it->shape.resize(2); - it->shape.set(0, 1); - it->shape.set(1, dim); - } - - if(it->name == "decoder_c_tt") { - it = ioItems.erase(it); - } else if(it->name == "uidx") { - it = ioItems.erase(it); - } else if(it->name == "history_errs") { - it = ioItems.erase(it); - } else { - auto pair = nameMap_.find(it->name); - if(pair != nameMap_.end()) - it->name = pair->second; - it++; - } - } - // load items into the graph - graph->load(ioItems); + static void remapIoItems(std::vector& ioItems, Ptr options) { + remapIoItems(ioItems, createNameMap(options), options); } - void save(Ptr graph, - const std::string& name, - bool saveTranslatorConfig = false) override { - LOG(info, "Saving model to {}", name); - - // prepare reversed map - if(nameMapRev_.empty()) - for(const auto& kv : nameMap_) - nameMapRev_.insert({kv.second, kv.first}); - - // get parameters from the graph to items - std::vector ioItems; - graph->save(ioItems); - // replace names to be compatible with Nematus - for(auto& item : ioItems) { - auto newItemName = nameMapRev_.find(item.name); - if(newItemName != nameMapRev_.end()) - item.name = newItemName->second; - } - // add a dummy matrix 'decoder_c_tt' required for Amun and Nematus - ioItems.emplace_back(); - ioItems.back().name = "decoder_c_tt"; - ioItems.back().shape = Shape({1, 0}); - ioItems.back().bytes.emplace_back((char)0); - - io::addMetaToItems(getModelParametersAsString(), "special:model.yml", ioItems); - io::saveItems(name, ioItems); - - if(saveTranslatorConfig) { - createAmunConfig(name); - createDecoderConfig(name); + void load(Ptr graph, const std::string& name, bool /*markReloaded*/ = true) + override { + LOG(info, "Loading model from {}", name); + // load items from .npz file + auto ioItems = io::loadItems(name); + + // map names and remove a dummy matrices + remapIoItems(ioItems, nameMap_, options_); + + // load items into the graph + graph->load(ioItems); } - } -private: - std::map nameMap_; - std::map nameMapRev_; - - std::map createNameMap() { - std::map nameMap - = {{"decoder_U", "decoder_cell1_U"}, - {"decoder_Ux", "decoder_cell1_Ux"}, - {"decoder_W", "decoder_cell1_W"}, - {"decoder_Wx", "decoder_cell1_Wx"}, - {"decoder_b", "decoder_cell1_b"}, - {"decoder_bx", "decoder_cell1_bx"}, - {"decoder_U_nl", "decoder_cell2_U"}, - {"decoder_Ux_nl", "decoder_cell2_Ux"}, - {"decoder_Wc", "decoder_cell2_W"}, - {"decoder_Wcx", "decoder_cell2_Wx"}, - {"decoder_b_nl", "decoder_cell2_b"}, - {"decoder_bx_nl", "decoder_cell2_bx"}, - {"ff_logit_prev_W", "decoder_ff_logit_l1_W0"}, - {"ff_logit_lstm_W", "decoder_ff_logit_l1_W1"}, - {"ff_logit_ctx_W", "decoder_ff_logit_l1_W2"}, - {"ff_logit_prev_b", "decoder_ff_logit_l1_b0"}, - {"ff_logit_lstm_b", "decoder_ff_logit_l1_b1"}, - {"ff_logit_ctx_b", "decoder_ff_logit_l1_b2"}, - {"ff_logit_W", "decoder_ff_logit_l2_W"}, - {"ff_logit_b", "decoder_ff_logit_l2_b"}, - {"ff_state_W", "decoder_ff_state_W"}, - {"ff_state_b", "decoder_ff_state_b"}, - {"Wemb_dec", "decoder_Wemb"}, - {"Wemb", "encoder_Wemb"}, - {"encoder_U", "encoder_bi_U"}, - {"encoder_Ux", "encoder_bi_Ux"}, - {"encoder_W", "encoder_bi_W"}, - {"encoder_Wx", "encoder_bi_Wx"}, - {"encoder_b", "encoder_bi_b"}, - {"encoder_bx", "encoder_bi_bx"}, - {"encoder_r_U", "encoder_bi_r_U"}, - {"encoder_r_Ux", "encoder_bi_r_Ux"}, - {"encoder_r_W", "encoder_bi_r_W"}, - {"encoder_r_Wx", "encoder_bi_r_Wx"}, - {"encoder_r_b", "encoder_bi_r_b"}, - {"encoder_r_bx", "encoder_bi_r_bx"}, - {"ff_state_ln_s", "decoder_ff_state_ln_s"}, - {"ff_state_ln_b", "decoder_ff_state_ln_b"}, - {"ff_logit_prev_ln_s", "decoder_ff_logit_l1_ln_s0"}, - {"ff_logit_lstm_ln_s", "decoder_ff_logit_l1_ln_s1"}, - {"ff_logit_ctx_ln_s", "decoder_ff_logit_l1_ln_s2"}, - {"ff_logit_prev_ln_b", "decoder_ff_logit_l1_ln_b0"}, - {"ff_logit_lstm_ln_b", "decoder_ff_logit_l1_ln_b1"}, - {"ff_logit_ctx_ln_b", "decoder_ff_logit_l1_ln_b2"}}; - - // add mapping for deep encoder cells - std::vector suffixes = {"_U", "_Ux", "_b", "_bx"}; - for(int i = 1; i < options_->get("enc-cell-depth"); ++i) { - std::string num1 = std::to_string(i); - std::string num2 = std::to_string(i + 1); - for(auto suf : suffixes) { - nameMap.insert({"encoder" + suf + "_drt_" + num1, "encoder_bi_cell" + num2 + suf}); - nameMap.insert({"encoder_r" + suf + "_drt_" + num1, "encoder_bi_r_cell" + num2 + suf}); + void save( + Ptr graph, const std::string& name, bool saveTranslatorConfig = false) + override { + LOG(info, "Saving model to {}", name); + + // prepare reversed map + if(nameMapRev_.empty()) + for(const auto& kv : nameMap_) + nameMapRev_.insert({kv.second, kv.first}); + + // get parameters from the graph to items + std::vector ioItems; + graph->save(ioItems); + // replace names to be compatible with Nematus + for(auto& item : ioItems) { + auto newItemName = nameMapRev_.find(item.name); + if(newItemName != nameMapRev_.end()) + item.name = newItemName->second; + } + // add a dummy matrix 'decoder_c_tt' required for Amun and Nematus + ioItems.emplace_back(); + ioItems.back().name = "decoder_c_tt"; + ioItems.back().shape = Shape({1, 0}); + ioItems.back().bytes.emplace_back((char)0); + + io::addMetaToItems(getModelParametersAsString(), "special:model.yml", ioItems); + io::saveItems(name, ioItems); + + if(saveTranslatorConfig) { + createAmunConfig(name); + createDecoderConfig(name); } } - // add mapping for deep decoder cells - for(int i = 3; i <= options_->get("dec-cell-base-depth"); ++i) { - std::string num1 = std::to_string(i - 2); - std::string num2 = std::to_string(i); - for(auto suf : suffixes) - nameMap.insert({"decoder" + suf + "_nl_drt_" + num1, "decoder_cell" + num2 + suf}); - } - // add mapping for normalization layers - std::map nameMapCopy(nameMap); - for(auto& kv : nameMapCopy) { - std::string prefix = kv.first.substr(0, 7); - - if(prefix == "encoder" || prefix == "decoder") { - nameMap.insert({kv.first + "_lns", kv.second + "_lns"}); - nameMap.insert({kv.first + "_lnb", kv.second + "_lnb"}); + + private: + std::map nameMap_; + std::map nameMapRev_; + + static void remapIoItems(std::vector& ioItems, std::map nameMap, Ptr options) { + // map names and remove a dummy matrix 'decoder_c_tt' from items to avoid creating isolated node + for(auto it = ioItems.begin(); it != ioItems.end();) { + // for backwards compatibility, turn one-dimensional vector into two dimensional matrix with first dimension being 1 and second dimension of the original size + // @TODO: consider dropping support for Nematus models + if(it->shape.size() == 1) { + int dim = it->shape[-1]; + it->shape.resize(2); + it->shape.set(0, 1); + it->shape.set(1, dim); + } + + if(it->name == "decoder_c_tt") { + it = ioItems.erase(it); + } else if(it->name == "uidx") { + it = ioItems.erase(it); + } else if(it->name == "history_errs") { + it = ioItems.erase(it); + } else { + auto pair = nameMap.find(it->name); + if(pair != nameMap.end()) + it->name = pair->second; + it++; + } } } - return nameMap; - } + static std::map createNameMap(Ptr options) { + std::map nameMap + = {{"decoder_U", "decoder_cell1_U"}, + {"decoder_Ux", "decoder_cell1_Ux"}, + {"decoder_W", "decoder_cell1_W"}, + {"decoder_Wx", "decoder_cell1_Wx"}, + {"decoder_b", "decoder_cell1_b"}, + {"decoder_bx", "decoder_cell1_bx"}, + {"decoder_U_nl", "decoder_cell2_U"}, + {"decoder_Ux_nl", "decoder_cell2_Ux"}, + {"decoder_Wc", "decoder_cell2_W"}, + {"decoder_Wcx", "decoder_cell2_Wx"}, + {"decoder_b_nl", "decoder_cell2_b"}, + {"decoder_bx_nl", "decoder_cell2_bx"}, + {"ff_logit_prev_W", "decoder_ff_logit_l1_W0"}, + {"ff_logit_lstm_W", "decoder_ff_logit_l1_W1"}, + {"ff_logit_ctx_W", "decoder_ff_logit_l1_W2"}, + {"ff_logit_prev_b", "decoder_ff_logit_l1_b0"}, + {"ff_logit_lstm_b", "decoder_ff_logit_l1_b1"}, + {"ff_logit_ctx_b", "decoder_ff_logit_l1_b2"}, + {"ff_logit_W", "decoder_ff_logit_l2_W"}, + {"ff_logit_b", "decoder_ff_logit_l2_b"}, + {"ff_state_W", "decoder_ff_state_W"}, + {"ff_state_b", "decoder_ff_state_b"}, + {"Wemb_dec", "decoder_Wemb"}, + {"Wemb", "encoder_Wemb"}, + {"encoder_U", "encoder_bi_U"}, + {"encoder_Ux", "encoder_bi_Ux"}, + {"encoder_W", "encoder_bi_W"}, + {"encoder_Wx", "encoder_bi_Wx"}, + {"encoder_b", "encoder_bi_b"}, + {"encoder_bx", "encoder_bi_bx"}, + {"encoder_r_U", "encoder_bi_r_U"}, + {"encoder_r_Ux", "encoder_bi_r_Ux"}, + {"encoder_r_W", "encoder_bi_r_W"}, + {"encoder_r_Wx", "encoder_bi_r_Wx"}, + {"encoder_r_b", "encoder_bi_r_b"}, + {"encoder_r_bx", "encoder_bi_r_bx"}, + {"ff_state_ln_s", "decoder_ff_state_ln_s"}, + {"ff_state_ln_b", "decoder_ff_state_ln_b"}, + {"ff_logit_prev_ln_s", "decoder_ff_logit_l1_ln_s0"}, + {"ff_logit_lstm_ln_s", "decoder_ff_logit_l1_ln_s1"}, + {"ff_logit_ctx_ln_s", "decoder_ff_logit_l1_ln_s2"}, + {"ff_logit_prev_ln_b", "decoder_ff_logit_l1_ln_b0"}, + {"ff_logit_lstm_ln_b", "decoder_ff_logit_l1_ln_b1"}, + {"ff_logit_ctx_ln_b", "decoder_ff_logit_l1_ln_b2"}}; + + // add mapping for deep encoder cells + std::vector suffixes = {"_U", "_Ux", "_b", "_bx"}; + for(int i = 1; i < options->get("enc-cell-depth"); ++i) { + std::string num1 = std::to_string(i); + std::string num2 = std::to_string(i + 1); + for(auto suf : suffixes) { + nameMap.insert({"encoder" + suf + "_drt_" + num1, "encoder_bi_cell" + num2 + suf}); + nameMap.insert({"encoder_r" + suf + "_drt_" + num1, "encoder_bi_r_cell" + num2 + suf}); + } + } + // add mapping for deep decoder cells + for(int i = 3; i <= options->get("dec-cell-base-depth"); ++i) { + std::string num1 = std::to_string(i - 2); + std::string num2 = std::to_string(i); + for(auto suf : suffixes) + nameMap.insert({"decoder" + suf + "_nl_drt_" + num1, "decoder_cell" + num2 + suf}); + } + // add mapping for normalization layers + std::map nameMapCopy(nameMap); + for(auto& kv : nameMapCopy) { + std::string prefix = kv.first.substr(0, 7); + + if(prefix == "encoder" || prefix == "decoder") { + nameMap.insert({kv.first + "_lns", kv.second + "_lns"}); + nameMap.insert({kv.first + "_lnb", kv.second + "_lnb"}); + } + } - void createAmunConfig(const std::string& name) { - Config::YamlNode amun; - // Amun has only CPU decoder for deep Nematus models - amun["cpu-threads"] = 16; - amun["gpu-threads"] = 0; - amun["maxi-batch"] = 1; - amun["mini-batch"] = 1; - - auto vocabs = options_->get>("vocabs"); - amun["source-vocab"] = vocabs[0]; - amun["target-vocab"] = vocabs[1]; - amun["devices"] = options_->get>("devices"); - amun["normalize"] = true; - amun["beam-size"] = 5; - amun["relative-paths"] = false; - - amun["scorers"]["F0"]["path"] = name; - amun["scorers"]["F0"]["type"] = "nematus2"; - amun["weights"]["F0"] = 1.0f; - - io::OutputFileStream out(name + ".amun.yml"); - out << amun; - } -}; + return nameMap; + } + + void createAmunConfig(const std::string& name) { + Config::YamlNode amun; + // Amun has only CPU decoder for deep Nematus models + amun["cpu-threads"] = 16; + amun["gpu-threads"] = 0; + amun["maxi-batch"] = 1; + amun["mini-batch"] = 1; + + auto vocabs = options_->get>("vocabs"); + amun["source-vocab"] = vocabs[0]; + amun["target-vocab"] = vocabs[1]; + amun["devices"] = options_->get>("devices"); + amun["normalize"] = true; + amun["beam-size"] = 5; + amun["relative-paths"] = false; + + amun["scorers"]["F0"]["path"] = name; + amun["scorers"]["F0"]["type"] = "nematus2"; + amun["weights"]["F0"] = 1.0f; + + io::OutputFileStream out(name + ".amun.yml"); + out << amun; + } + }; } // namespace marian diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 56d3dd990..144d532b5 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -1,15 +1,16 @@ -#include "marian.h" #include "translator/swappable.h" +#include +#include "common/io.h" #include "common/logging.h" +#include "common/timer.h" #include "data/corpus.h" #include "data/text_input.h" +#include "marian.h" +#include "models/amun.h" +#include "models/nematus.h" +#include "tensors/gpu/swap.h" #include "translator/beam_search.h" #include "translator/translator.h" -#include "common/io.h" -#include "common/timer.h" -#include -#include "tensors/gpu/swap.h" -#include "models/amun.h" namespace marian { @@ -228,11 +229,12 @@ Histories GPULoadedModel::Translate(const Ptr batch) { CPULoadedModel::CPULoadedModel(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath) : parameters_(io::loadItems(parameters)) { - // Load parameters. //Remap the parameter names if the model uses an older naming convention if (options->get("type") == "amun") { bool tied = options->get("tied-embeddings-src") || options->get("tied-embeddings-all"); Amun::remapIoItems(parameters_, tied); + } else if (options->get("type") == "nematus") { + Nematus::remapIoItems(parameters_, options); } // Find the special element and remove it: From 3f9c088eeb35a5f983694cace68737e504b41dfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 24 Sep 2021 15:41:11 +0300 Subject: [PATCH 080/135] Work around a crash in amun model loading --- src/translator/swappable.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 144d532b5..5a4555955 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -198,7 +198,9 @@ void GPULoadedModel::Load(const CPULoadedModel &from) { for(size_t i = 0; i < parameters_.size(); ++i) { // Sanity check - if (names_[i] != fromParams[i].name || parameters_[i]->size() != fromParams[i].size()) + // Not sure if that's ok, but we don't check for size equality because for + // some reason the target memory location sometimes can be bigger + if (names_[i] != fromParams[i].name || parameters_[i]->size() < fromParams[i].size()) printParamsAndExit(); swapper::copyCpuToGpu(reinterpret_cast(parameters_[i]->data()), From d4ba1fa53a2751ebe4f025988c88286c179019d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 26 Oct 2021 15:33:52 +0300 Subject: [PATCH 081/135] Don't crash when training sets not provided Happens in server mode for self-adaptive translation --- src/training/scheduler.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/training/scheduler.h b/src/training/scheduler.h index 9d2500f92..12bb7b69d 100644 --- a/src/training/scheduler.h +++ b/src/training/scheduler.h @@ -510,9 +510,14 @@ class Scheduler : public TrainingObserver { void actAfterEpoch(TrainingState& state) override { // stop if data streaming from STDIN is stopped for a TSV input - std::string firstPath = options_->get>("train-sets")[0]; - if(options_->get("tsv", false) && (firstPath == "stdin" || firstPath == "-")) - endOfStdin_ = true; + if (options_->has("training-sets")) { + auto trainingSets = options_->get>("train-sets"); + if (trainingSets.size() > 0) { + std::string firstPath = options_->get>("train-sets")[0]; + if(options_->get("tsv", false) && (firstPath == "stdin" || firstPath == "-")) + endOfStdin_ = true; + } + } float factor = options_->get("lr-decay"); From 24e8fc3ead8f738046e9e6aa754163f6cee1bd2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 26 Oct 2021 15:57:30 +0300 Subject: [PATCH 082/135] Copy over the self-adaptive server example script from an older commit It was left out during the reimplementation of the self-adaptive translation stuff to use the new "swappable" approach. --- scripts/self-adaptive/client_example.py | 51 +++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 scripts/self-adaptive/client_example.py diff --git a/scripts/self-adaptive/client_example.py b/scripts/self-adaptive/client_example.py new file mode 100644 index 000000000..6ef7757a7 --- /dev/null +++ b/scripts/self-adaptive/client_example.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python + +from __future__ import print_function, unicode_literals, division + +import sys +import time +import argparse +import json + +from websocket import create_connection + + +def translate(batch, port=8080): + ws = create_connection("ws://localhost:{}/translate".format(port)) + ws.send(batch) + result = ws.recv() + ws.close() + return result.rstrip() + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("-p", "--port", type=int, default=8080) + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + + # List of input sentences separated by a new line character + inputs = "this is an example\nthe second sentence\nno context provided" + # For each input sentence a list of parallel sentences can be provided as a + # list of source and target sentences. + contexts = [ + # Source-side context for the first input sentence + ["this is a test\nthese are examples", + # Target-side context for the first input sentence + "das ist ein test\ndies sind Beispiele"], + # Only one example is given as a context for the second input sentence + ["the next sentence", + "der nächste Satz"], + # No context for the third input sentence + [] + ] + + input_data = {'input': inputs, 'context': contexts} + input_json = json.dumps(input_data) + + output_json = translate(input_json, port=args.port) + output_data = json.loads(output_json) + print(output_data['output']) From d68fd733ef3e37fa8ce7ceaf9aa11e21bc603825 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 26 Oct 2021 16:01:53 +0300 Subject: [PATCH 083/135] Clean up logging --- src/translator/self_adaptive.h | 4 ++-- src/translator/swappable.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index f129777f1..a6c23f533 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -150,14 +150,14 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { ++trainBegin; if(!trainSet.empty()) { - LOG(info, "# NEW TEST BATCH"); + LOG(info, "Got {} context sentences", trainSet.size()); trainSlot_->SetModel(cpuModel_); trainSlot_->Train(trainSet); translateSlot_->PointToParams(*trainSlot_); translate(testBatch, collector, printer); needsSwitching_ = true; } else { - LOG(info, "# EMPTY TEST BATCH"); + LOG(info, "No context"); if(needsSwitching_) { translateSlot_->Load(*cpuModel_); needsSwitching_ = false; diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 5a4555955..2fda0e6c6 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -91,12 +91,12 @@ void SwappableModelTrainer::Train(const std::vector &input) { while(scheduler->keepGoing()) { batchGenerator.prepare(); - LOG(info, "## NEW BATCHES"); + // LOG(info, "## NEW BATCHES"); for(auto&& batch : batchGenerator) { if(!scheduler->keepGoing()) break; - LOG(info, "### NEW BATCH"); + // LOG(info, "### NEW BATCH"); // Make an update step on the copy of the model auto lossNode = engine_->builder_->build(engine_->graph_, batch); engine_->graph_->forward(); From 324f69a1e14a5a6211d1e7d145398cf5d74573d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 26 Oct 2021 16:41:37 +0300 Subject: [PATCH 084/135] Remove a config option for swappable stuff that isn't used any more --- src/common/config_parser.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index 26a4d6601..7d9163eff 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -727,8 +727,6 @@ void ConfigParser::addOptionsTranslation(cli::CLIWrapper& cli) { cli.add>("--output-approx-knn", "Use approximate knn search in output layer (currently only in transformer)") ->implicit_val("100 1024"); - cli.add("--swap-model", - "Path to model to swap to."); #if 0 // @TODO: Ask Hany if there are any decoding-time options // add ULR settings if(mode_ != cli::mode::selfadaptive) From 7f430741dafee1d5ba62aa03796e146d58d5ffb6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 27 Oct 2021 11:49:41 +0300 Subject: [PATCH 085/135] Disable early stopping for self-adaptive training Fixes a crash due to the early-stopping-on option being required after the merge --- src/common/config_parser.cpp | 10 ---------- src/common/config_parser.h | 1 - src/translator/self_adaptive.h | 3 +++ 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index 3de2caf95..937a86cca 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -98,7 +98,6 @@ ConfigParser::ConfigParser(cli::mode mode) addOptionsTraining(cli_); addOptionsTranslation(cli_); addOptionsServer(cli_); - addOptionsStupid(cli_); break; default: ABORT("wrong CLI mode"); @@ -109,15 +108,6 @@ ConfigParser::ConfigParser(cli::mode mode) // clang-format on } -void ConfigParser::addOptionsStupid(cli::CLIWrapper & cli) { - auto previous_group = cli.switchGroup("Server options"); - cli.add( - "--early-stopping", - "Stop if the first validation metric does not improve for arg consecutive validation steps", - 10); - cli.switchGroup(previous_group); -} - void ConfigParser::addOptionsGeneral(cli::CLIWrapper & cli) { int defaultWorkspace = (mode_ == cli::mode::translation) ? 512 : 2048; diff --git a/src/common/config_parser.h b/src/common/config_parser.h index 744656458..b6b825d7d 100644 --- a/src/common/config_parser.h +++ b/src/common/config_parser.h @@ -130,7 +130,6 @@ class ConfigParser { void addOptionsTranslation(cli::CLIWrapper&); void addOptionsScoring(cli::CLIWrapper&); void addOptionsEmbedding(cli::CLIWrapper&); - void addOptionsStupid(cli::CLIWrapper&); void addAliases(cli::CLIWrapper&); diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index a6c23f533..c26e3a8b5 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -26,6 +26,9 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { public: TrainSelfAdaptive(Ptr options) : options_(options) { options_->set("shuffle", "none"); + // Disable early stopping because typically training would happen for only a few iterations and + // and also it doesn't make much sense to run the validation metrics on the validation dataset here + options_->set("early-stopping", 0); // Set up translator options optionsTrans_ = New(options_->clone()); // We will only ever translate a single sentence at a time because dynamic From d7676bd6ef6cc7286083529074edc096b3957e28 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 27 Oct 2021 15:16:37 +0300 Subject: [PATCH 086/135] Forgot to remove a file that was used for debugging --- src/CMakeLists.txt | 5 -- src/command/bug_repro.cpp | 120 -------------------------------------- 2 files changed, 125 deletions(-) delete mode 100644 src/command/bug_repro.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index f5a6b2ee8..282d87be0 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -270,11 +270,6 @@ if (NOT COMPILE_LIBRARY_ONLY) add_custom_target(marian_tgz DEPENDS "${CMAKE_BINARY_DIR}/marian.tgz") add_custom_target(philly DEPENDS marian_tgz marian_zip) - add_executable(bug_repro command/bug_repro.cpp) - set_target_properties(bug_repro PROPERTIES OUTPUT_NAME bug_repro) - target_compile_options(bug_repro PRIVATE ${ALL_WARNINGS} -Wno-suggest-override) - set(EXECUTABLES ${EXECUTABLES} bug_repro) - if(COMPILE_SERVER) add_executable(marian_server command/marian_server.cpp) set_target_properties(marian_server PROPERTIES OUTPUT_NAME marian-server) diff --git a/src/command/bug_repro.cpp b/src/command/bug_repro.cpp deleted file mode 100644 index 86464ff77..000000000 --- a/src/command/bug_repro.cpp +++ /dev/null @@ -1,120 +0,0 @@ -#include "../common/config_parser.h" -#include "../common/options.h" -#include "../data/text_input.h" -#include "../models/model_factory.h" -#include "../models/model_task.h" -#include "../training/scheduler.h" -#include "marian.h" - -namespace marian { - -class ReproTask : public marian::ModelTask { -public: - ReproTask() { - } - void run() override { - auto parser = ConfigParser(cli::mode::training); - // i'm prob leaking memory at the end of run() but i don't care - const char* argseasy[] - = {"marian", - "-c", - "/home/rihards/exp/marian-adaptive-crash-repro/models/model.npz.repro.yml", - "-t", "dummy-value", "-t", "dummy-value", - "--after-batches", "20", - "--after-epochs", "4", - "--learn-rate", "0.1", - "--shuffle", "none", - "--mini-batch", "1"}; - int argc = sizeof(argseasy) / sizeof(char*); - // this is as close as i could get to initializing a char** in a sane manner - char** args = new char*[argc]; - for (int i = 0; i < argc; i++) { - args[i] = strdup(argseasy[i]); - } - auto options = parser.parseOptions(argc, args, false); - - // auto builder = models::createCriterionFunctionFromOptions(options, models::usage::training); - auto optimizer = Optimizer(New("optimizer", "adam", "learn-rate", 0.01)); - - std::vector vocabPaths - = {"/home/rihards/exp/marian-adaptive-crash-repro/models/train.1-to-1.bpe.en-lv.yml", - "/home/rihards/exp/marian-adaptive-crash-repro/models/train.1-to-1.bpe.en-lv.yml"}; - std::vector maxVocabs = {500, 500}; - - std::vector> vocabs; - for(size_t i = 0; i < vocabPaths.size(); i++) { - Ptr vocab = New(options, i); - vocab->load(vocabPaths[i], maxVocabs[i]); - vocabs.emplace_back(vocab); - } - std::string sources = "del@@ e@@ tions affecting 13 q 14 are also the most frequent structural genetic ab@@ " - "err@@ ations in chronic lym@@ pho@@ cy@@ tic leu@@ ka@@ emia ( C@@ ll ) 6,@@ 7 , 8 " - ".\nthis region is found to be heter@@ oz@@ y@@ g@@ ously deleted in 30 ¬ 60 % and hom@@ " - "oz@@ y@@ g@@ ously deleted in 10 ¬ 20 % of C@@ ll patien@@ ts@@ 9 ."; - std::string targets - = "del@@ ē@@ cijas , kas ietekmē 13 q 14 , arī ir visbiežāk sastopa@@ mās strukturālās " - "ģenē@@ tiskās ab@@ er@@ ācijas hron@@ iskā lim@@ foc@@ ī@@ tiskajā leik@@ ēm@@ ijā ( " - "H@@ LL ) 6,@@ 7 , 8 .\n30 –@@ 60 % H@@ LL pacientu ir konstatēta šī reģiona heter@@ " - "oz@@ ig@@ ota del@@ ē@@ cija , savukārt 10 –@@ 20 % H@@ LL pacientu ir konstatēta šī " - "reģiona hom@@ oz@@ ig@@ ota del@@ ē@@ c@@ ij@@ a@@ 9 ."; - // auto inputs = New(std::vector({sources, targets}), vocabs, options); - // auto batches = New>(inputs, options); - - for(size_t i = 0; i < 10; i++) { - LOG(info, "# NEW OUTER ITER"); - auto builder = models::createCriterionFunctionFromOptions(options, models::usage::training); - auto state = New(options->get("learn-rate")); - auto scheduler = New(options, state); - scheduler->registerTrainingObserver(scheduler); - scheduler->registerTrainingObserver(optimizer); - - Ptr graph; - - bool first = true; - scheduler->started(); - - graph = New(); - graph->setDevice({0, DeviceType::cpu}); - graph->reserveWorkspaceMB(128); - while(scheduler->keepGoing()) { - LOG(info, "## NEW INNER ITER"); - // if inputs aren't initialized for each epoch, their internal istringstreams get exhausted - auto inputs - = New(std::vector({sources, targets}), vocabs, options); - auto batches = New>(inputs, options); - // auto batches = New>(inputs, options); - batches->prepare(); - - for(auto batch : *batches) { - LOG(info, "### NEW BATCH"); - if(!scheduler->keepGoing()) { - break; - } - - auto lossNode = builder->build(graph, batch); - if (first) { - graph->graphviz("graph-" + std::to_string(i) + ".gv"); - first = false; - } - graph->forward(); - StaticLoss loss = *lossNode; - graph->backward(); - - optimizer->update(graph, 1); - scheduler->update(loss, batch); - } - - if(scheduler->keepGoing()) - scheduler->increaseEpoch(); - } - scheduler->finished(); - } - } -}; -} - -int main(int argc, char **argv) { - auto task = marian::ReproTask(); - task.run(); - return 0; -} From e48e737aafa3c2cfbc8f77961327de833b013b26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 27 Oct 2021 15:34:23 +0300 Subject: [PATCH 087/135] Update CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index e0b853144..915ef2560 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Added +- Adds a `marian-adaptive` executable to enable self-adaptive translation (a.k.a, runtime domain adaptation). - Adds option --add-lsh to marian-conv which allows the LSH to be memory-mapped. - Early stopping based on first, all, or any validation metrics via `--early-stopping-on` - Compute 8.6 support if using CUDA>=11.1 From 017b6c1a90d7ced1d3b9dd9f245466fb72674115 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 28 Oct 2021 14:50:39 +0300 Subject: [PATCH 088/135] Fix CPU-only compilation --- src/translator/swappable.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/translator/swappable.cpp b/src/translator/swappable.cpp index 2fda0e6c6..35c3cb3f1 100644 --- a/src/translator/swappable.cpp +++ b/src/translator/swappable.cpp @@ -25,7 +25,9 @@ namespace { // For debugging memory void get(std::vector &out, MemoryPiece::PtrType mem, Ptr backend) { out.resize(mem->size()); +#ifdef CUDA_FOUND gpu::copy(backend, mem->data(), mem->data() + mem->size(), out.data()); +#endif } GPUEngineTrain::GPUEngineTrain(Ptr options, size_t deviceIdx) From 1257a4540c078d5cc9f5829383681b45ab6a1da9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 28 Oct 2021 18:17:50 +0300 Subject: [PATCH 089/135] Add a virtual destructor to CollectorBase To fix a compilation error on MacOS --- src/translator/output_collector.cpp | 2 ++ src/translator/output_collector.h | 1 + 2 files changed, 3 insertions(+) diff --git a/src/translator/output_collector.cpp b/src/translator/output_collector.cpp index b74a5a54c..eec63ff8a 100644 --- a/src/translator/output_collector.cpp +++ b/src/translator/output_collector.cpp @@ -6,6 +6,8 @@ namespace marian { +CollectorBase::~CollectorBase(){}; + OutputCollector::OutputCollector() : nextId_(0), printing_(new DefaultPrinting()) {} diff --git a/src/translator/output_collector.h b/src/translator/output_collector.h index 4b0c48f13..106ecbf26 100644 --- a/src/translator/output_collector.h +++ b/src/translator/output_collector.h @@ -45,6 +45,7 @@ class GeometricPrinting : public PrintingStrategy { }; struct CollectorBase { + virtual ~CollectorBase() = 0; virtual void Write(long sourceId, const std::string& best1, const std::string& bestn, bool nbest) = 0; }; From 96115c8e658ba332b1b5bafe65ef1cc75824cfba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 29 Nov 2021 12:51:25 +0200 Subject: [PATCH 090/135] Fix casing in the `COMPILE_ADAPTIVE` cmake option's description Co-authored-by: Roman Grundkiewicz --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index f42b3d0b2..a7665874a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ option(COMPILE_CPU "Compile CPU version" ON) option(COMPILE_CUDA "Compile GPU version" ON) option(COMPILE_EXAMPLES "Compile examples" OFF) option(COMPILE_SERVER "Compile marian-server" OFF) -option(COMPILE_ADAPTIVE "Compile marian-ADAPTIVE" OFF) +option(COMPILE_ADAPTIVE "Compile marian-adaptive" OFF) option(COMPILE_TESTS "Compile tests" OFF) option(USE_APPLE_ACCELERATE "Compile with Apple Accelerate" OFF) option(USE_CCACHE "Use ccache compiler cache (https://ccache.dev)" OFF) From ba61acd758723cf8f4730389f32928435eef2a9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 29 Nov 2021 14:40:09 +0200 Subject: [PATCH 091/135] Split out marian-adaptive server mode into a separate executable --- CMakeLists.txt | 9 +++--- src/CMakeLists.txt | 6 ++++ src/command/marian_adaptive.cpp | 53 ++------------------------------- src/common/config_parser.cpp | 15 ++++------ src/common/config_parser.h | 2 +- 5 files changed, 20 insertions(+), 65 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a7665874a..6e53f6d12 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,7 +15,7 @@ option(COMPILE_CPU "Compile CPU version" ON) option(COMPILE_CUDA "Compile GPU version" ON) option(COMPILE_EXAMPLES "Compile examples" OFF) option(COMPILE_SERVER "Compile marian-server" OFF) -option(COMPILE_ADAPTIVE "Compile marian-adaptive" OFF) +option(COMPILE_ADAPTIVE "Compile marian-adaptive. Set COMPILE_SERVER=ON to enable the server mode." OFF) option(COMPILE_TESTS "Compile tests" OFF) option(USE_APPLE_ACCELERATE "Compile with Apple Accelerate" OFF) option(USE_CCACHE "Use ccache compiler cache (https://ccache.dev)" OFF) @@ -541,7 +541,7 @@ endif(COMPILE_CPU) ############################################################################### # Find OpenSSL set(BOOST_COMPONENTS "") -if(COMPILE_SERVER OR COMPILE_ADAPTIVE) +if(COMPILE_SERVER) find_package(OpenSSL) if(OpenSSL_FOUND) message(STATUS "Found OpenSSL") @@ -556,11 +556,10 @@ if(COMPILE_SERVER OR COMPILE_ADAPTIVE) endif() set(BOOST_COMPONENTS ${BOOST_COMPONENTS} system) else(OpenSSL_FOUND) - message(WARNING "Cannot find OpenSSL library. Not compiling server or marian-adaptive.") + message(WARNING "Cannot find OpenSSL library. Not compiling server.") set(COMPILE_SERVER "off") - set(COMPILE_ADAPTIVE "off") endif(OpenSSL_FOUND) -endif(COMPILE_SERVER OR COMPILE_ADAPTIVE) +endif(COMPILE_SERVER) ############################################################################### # Undo static lib search and put non-static searches here: diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 282d87be0..8a4bac9f1 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -287,6 +287,12 @@ if (NOT COMPILE_LIBRARY_ONLY) add_executable(marian_adaptive command/marian_adaptive.cpp) set_target_properties(marian_adaptive PROPERTIES OUTPUT_NAME marian-adaptive) set(EXECUTABLES ${EXECUTABLES} marian_adaptive) + + if(COMPILE_SERVER) + add_executable(marian_adaptive_server command/marian_adaptive_server.cpp) + set_target_properties(marian_adaptive_server PROPERTIES OUTPUT_NAME marian-adaptive-server) + set(EXECUTABLES ${EXECUTABLES} marian_adaptive_server) + endif(COMPILE_SERVER) endif(COMPILE_ADAPTIVE) foreach(exec ${EXECUTABLES}) diff --git a/src/command/marian_adaptive.cpp b/src/command/marian_adaptive.cpp index 0f64a84ca..a21d04a7d 100644 --- a/src/command/marian_adaptive.cpp +++ b/src/command/marian_adaptive.cpp @@ -1,7 +1,5 @@ #include "marian.h" -#include "3rd_party/simple-websocket-server/server_ws.hpp" -#include "common/file_stream.h" #include "common/timer.h" #include "common/utils.h" #include "training/training.h" @@ -9,58 +7,13 @@ using namespace marian; -typedef SimpleWeb::SocketServer WSServer; - int main(int argc, char **argv) { auto options = parseOptions(argc, argv, cli::mode::selfadaptive); auto task = New(options); - if(options->has("port") && options->get("port") != 0) { - // Initialize web server - WSServer server; - server.config.port = options->get("port", 8080); - - auto &translate = server.endpoint["^/translate/?$"]; - - translate.on_message = [&task](Ptr connection, - Ptr message) { - auto sendStream = std::make_shared(); - - // Get input text - auto inputText = message->string(); - - // Translate - timer::Timer timer; - auto outputText = task->run(inputText); - LOG(info, "Best translation: {}", outputText); - *sendStream << outputText << std::endl; - LOG(info, "Translation took: {:.5f}s", timer.elapsed()); - - // Send translation back - connection->send(sendStream, [](const SimpleWeb::error_code &ec) { - if(ec) - LOG(error, "Error sending message: ({}) {}", ec.value(), ec.message()); - }); - }; - - // Error Codes for error code meanings - // http://www.boost.org/doc/libs/1_55_0/doc/html/boost_asio/reference.html - translate.on_error = [](Ptr connection, const SimpleWeb::error_code &ec) { - LOG(error, "Connection error: ({}) {}", ec.value(), ec.message()); - }; - - // Start server thread - std::thread serverThread([&server]() { - LOG(info, "Server is listening on port {}", server.config.port); - server.start(); - }); - - serverThread.join(); - } else { - timer::Timer timer; - task->run(); - LOG(info, "Total time: {:.5f}s", timer.elapsed()); - } + timer::Timer timer; + task->run(); + LOG(info, "Total time: {:.5f}s", timer.elapsed()); return 0; } diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index 937a86cca..e736c20f0 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -70,12 +70,13 @@ std::string const& ConfigParser::cmdLine() const { } ConfigParser::ConfigParser(cli::mode mode) - : cli_(config_,"Marian: Fast Neural Machine Translation in C++", - "General options", "", 40), - mode_(mode == cli::mode::server ? cli::mode::translation : mode) { + : cli_(config_, "Marian: Fast Neural Machine Translation in C++", "General options", "", 40), + mode_(mode == cli::mode::server + ? cli::mode::translation + : (mode == cli::mode::selfadaptiveServer ? cli::mode::selfadaptive : mode)) { addOptionsGeneral(cli_); - if (mode == cli::mode::server) + if (mode == cli::mode::server || mode == cli::mode::selfadaptiveServer) addOptionsServer(cli_); addOptionsModel(cli_); @@ -97,7 +98,6 @@ ConfigParser::ConfigParser(cli::mode mode) case cli::mode::selfadaptive: addOptionsTraining(cli_); addOptionsTranslation(cli_); - addOptionsServer(cli_); break; default: ABORT("wrong CLI mode"); @@ -165,10 +165,7 @@ void ConfigParser::addOptionsServer(cli::CLIWrapper& cli) { // clang-format off auto previous_group = cli.switchGroup("Server options"); // TODO why is this needed? - size_t defaultPort = mode_ == cli::mode::selfadaptive ? 0 : 8080; - cli.add("--port,-p", - "Port number for web socket server", - defaultPort); + cli.add("--port,-p", "Port number for web socket server", 8080); cli.switchGroup(previous_group); // clang-format on } diff --git a/src/common/config_parser.h b/src/common/config_parser.h index b6b825d7d..5429f3d2c 100644 --- a/src/common/config_parser.h +++ b/src/common/config_parser.h @@ -14,7 +14,7 @@ namespace marian { namespace cli { -enum struct mode { training, translation, scoring, server, embedding, selfadaptive }; + enum struct mode { training, translation, scoring, server, embedding, selfadaptive, selfadaptiveServer }; } // namespace cli /** From 2e7e78f0c208a6660d545533ab3e0442578c5f92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 29 Nov 2021 14:43:46 +0200 Subject: [PATCH 092/135] Remove marian-adaptive from the .zip and .tgz targets It was an oversight to include them there in a previous commit. As the comment suggests, the targets are for some MS internal needs. --- src/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8a4bac9f1..85747787e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -253,7 +253,6 @@ if (NOT COMPILE_LIBRARY_ONLY) "${CMAKE_BINARY_DIR}/marian-scorer" "${CMAKE_BINARY_DIR}/marian-vocab" "${CMAKE_BINARY_DIR}/marian-conv" - "${CMAKE_BINARY_DIR}/marian-adaptive" DEPENDS marian_train marian_decoder marian_scorer marian_vocab marian_conv) add_custom_target(marian_zip DEPENDS "${CMAKE_BINARY_DIR}/marian.zip") @@ -265,7 +264,6 @@ if (NOT COMPILE_LIBRARY_ONLY) "marian-scorer" "marian-vocab" "marian-conv" - "marian-adaptive" DEPENDS marian_train marian_decoder marian_scorer marian_vocab marian_conv) add_custom_target(marian_tgz DEPENDS "${CMAKE_BINARY_DIR}/marian.tgz") add_custom_target(philly DEPENDS marian_tgz marian_zip) From 0084a3ad00a8f9bb4081bc4ff3916ae6d81f1d7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 29 Nov 2021 14:47:40 +0200 Subject: [PATCH 093/135] Remove a comment that was made obsolete by the grandparrent commit (ba61acd7) --- src/common/config_parser.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index e736c20f0..fbcaf6ed9 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -164,7 +164,6 @@ void ConfigParser::addOptionsGeneral(cli::CLIWrapper & cli) { void ConfigParser::addOptionsServer(cli::CLIWrapper& cli) { // clang-format off auto previous_group = cli.switchGroup("Server options"); - // TODO why is this needed? cli.add("--port,-p", "Port number for web socket server", 8080); cli.switchGroup(previous_group); // clang-format on From 30c040042eda425c1148c7847c58e6d34b5fc1f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 29 Nov 2021 14:54:52 +0200 Subject: [PATCH 094/135] Change the defaultDispFreq option to use an unsigned value At least i think that's what this does Co-authored-by: Roman Grundkiewicz --- src/common/config_parser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index fbcaf6ed9..a732de3f1 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -382,7 +382,7 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) { // In self-adaptive mode users would typically want less updates to happen than in regular training size_t defaultAfterEpochs = (mode_ == cli::mode::selfadaptive) ? 2 : 0; - std::string defaultDispFreq = (mode_ == cli::mode::selfadaptive) ? "1" : "1000u"; + std::string defaultDispFreq = (mode_ == cli::mode::selfadaptive) ? "1u" : "1000u"; // @TODO: these should be re-defined as aliases for `--after` but the current frame work matches on value, so not doable. cli.add("--after-epochs,-e", From 2fbb6ec57aab90bd5f24cc32988249bad9162e93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 29 Nov 2021 14:58:36 +0200 Subject: [PATCH 095/135] Fix indentation Co-authored-by: Roman Grundkiewicz --- src/common/config_parser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index a732de3f1..6a30c7016 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -742,7 +742,7 @@ void ConfigParser::addOptionsTranslation(cli::CLIWrapper& cli) { cli.switchGroup(previous_group); // clang-format on - } +} void ConfigParser::addOptionsScoring(cli::CLIWrapper& cli) { auto previous_group = cli.switchGroup("Scorer options"); From d09c021d627d3790250d9429134f4e295bd32ec7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 29 Nov 2021 15:08:58 +0200 Subject: [PATCH 096/135] Fix indentation --- src/tensors/gpu/swap.cu | 14 ++++++++------ src/tensors/gpu/swap.h | 15 +++++++++------ 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/src/tensors/gpu/swap.cu b/src/tensors/gpu/swap.cu index c16a71614..1528f3860 100644 --- a/src/tensors/gpu/swap.cu +++ b/src/tensors/gpu/swap.cu @@ -4,10 +4,12 @@ void copyCpuToGpu(const char * in, char * gpuOut); void copyGpuToGpu(const char * in, char * gpuOut); namespace marian { - namespace swapper { - void copyCpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId) { - CUDA_CHECK(cudaSetDevice(deviceId.no)); - CUDA_CHECK(cudaMemcpy(gpuOut, in, count, cudaMemcpyHostToDevice)); - } - } +namespace swapper { + +void copyCpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId) { + CUDA_CHECK(cudaSetDevice(deviceId.no)); + CUDA_CHECK(cudaMemcpy(gpuOut, in, count, cudaMemcpyHostToDevice)); +} + +} } diff --git a/src/tensors/gpu/swap.h b/src/tensors/gpu/swap.h index a020c8827..9de46e9e9 100644 --- a/src/tensors/gpu/swap.h +++ b/src/tensors/gpu/swap.h @@ -2,14 +2,17 @@ #include #include "common/definitions.h" #include "common/logging.h" + namespace marian { - namespace swapper { +namespace swapper { + #ifdef CUDA_FOUND - void copyCpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId); +void copyCpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId); #else - inline void copyCpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId) { - ABORT("Copy from CPU to GPU memory is only available with CUDA."); - } +inline void copyCpuToGpu(char * gpuOut, const char * in, size_t count, const marian::DeviceId& deviceId) { + ABORT("Copy from CPU to GPU memory is only available with CUDA."); +} #endif - } + +} } From 10d5bffdfb9d5a9ea45190f03456a3ceb55ae124 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 29 Nov 2021 15:11:27 +0200 Subject: [PATCH 097/135] Remove @brief from doc comments Co-authored-by: Roman Grundkiewicz --- src/translator/self_adaptive.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index c26e3a8b5..29b06a88b 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -15,8 +15,7 @@ namespace marian { using namespace data; /** - * @breif Implementation of the self-adaptive translation mode. - * + * Implementation of the self-adaptive translation mode. * Self-adaptive translation means optionally using a set of context sentences * (e.g., provided by a translation memory), that are similar to the * translatable sentence, to train the model for a few iterations to fine-tune From d41d81bb7cea58620c2c4bb318a79a3c2a54576f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 29 Nov 2021 15:12:52 +0200 Subject: [PATCH 098/135] Remove commented out debugging code Co-authored-by: Roman Grundkiewicz --- src/translator/self_adaptive.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 29b06a88b..bf5a0fb00 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -64,8 +64,6 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { * @return JSON-encoded translations */ std::string run(const std::string& json) override { - //LOG(warn, "REMOVEME Received Json:\n{}", json); - // Check if input is in JSON YAML::Node yaml = YAML::Load(json); if(!yaml["input"]) { From fde22269358fb939518689dff79d0ac8c8fbc035 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 29 Nov 2021 15:13:29 +0200 Subject: [PATCH 099/135] Don't split the line here Co-authored-by: Roman Grundkiewicz --- src/translator/self_adaptive.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index bf5a0fb00..a05a29142 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -136,8 +136,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { template void adaptAndTranslate( - Ptr> - testBatches, + Ptr> testBatches, Iterator trainBegin, Iterator trainEnd, Ptr collector) { From e40758766096fd4a7d2fad9ca149d90406c46db8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 29 Nov 2021 15:13:48 +0200 Subject: [PATCH 100/135] Fix indentation Co-authored-by: Roman Grundkiewicz --- src/translator/self_adaptive.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index a05a29142..fe8fbe186 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -176,9 +176,9 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { std::stringstream bestn; printer->print(history, best1, bestn); collector->Write(history->getLineNum(), - best1.str(), - bestn.str(), - options_->get("n-best")); + best1.str(), + bestn.str(), + options_->get("n-best")); } } }; From b869f683b9e9d4a6c2dd92d9801b0b036ad4da8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 29 Nov 2021 15:47:07 +0200 Subject: [PATCH 101/135] Make it clear that validation options are disabled --- src/translator/self_adaptive.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index fe8fbe186..fbde213c3 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -25,8 +25,12 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { public: TrainSelfAdaptive(Ptr options) : options_(options) { options_->set("shuffle", "none"); - // Disable early stopping because typically training would happen for only a few iterations and - // and also it doesn't make much sense to run the validation metrics on the validation dataset here + // Validation options are disabled for self-adaptive marian because + // typically training would happen for only a few iterations and it seems to + // not make much sense to run validation metrics on the validation dataset + // then (especially if you care about translation performance). However, we + // have to manually set the early-stopping option as disabled because the + // scheduler crashes if it's not present. options_->set("early-stopping", 0); // Set up translator options optionsTrans_ = New(options_->clone()); From e04b82938274c8c55e54b3a9103b33d2c2c1556e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 30 Nov 2021 11:31:59 +0200 Subject: [PATCH 102/135] Delete the pad_model_vocabulary.py script It came with the swappable code but isn't really needed for self-adaptive marian --- scripts/contrib/pad_model_vocabulary.py | 52 ------------------------- 1 file changed, 52 deletions(-) delete mode 100755 scripts/contrib/pad_model_vocabulary.py diff --git a/scripts/contrib/pad_model_vocabulary.py b/scripts/contrib/pad_model_vocabulary.py deleted file mode 100755 index eca73e34a..000000000 --- a/scripts/contrib/pad_model_vocabulary.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python3 -# Pads a Marian model's vocabulary to have greater size. The added tokens have -# zero probability. -# ./pad_model_vocabulary.py input.npz output.npz desired_vocab_size -# -# You'll also need to separately pad your vocabulary file like so: -# old=$(wc -l input.vocab |cut -d " " -f 1) -# (cat input.vocab; seq -f "" $((desired_vocab_size-old))) >output.vocab -# -# Warning: probably only works with shared vocabulary models. -import math -import numpy as np -import sys -import yaml - -# Amend the vocab size in a raw ["special:model.yml"] data from a Marian npz. -# Returns the raw data to use for ["special:model.yml"] -def substitute_vocab_config(raw, new_size): - print("Old yml: ", raw.tostring()) - raw_yaml = raw.tostring().decode("utf-8") - #Python yaml doesn't like null bytes. - if raw_yaml.endswith("\x00"): - raw_yaml = raw_yaml[:-1] - config = yaml.load(raw_yaml) - config['dim-vocabs'] = [new_size] * len(config['dim-vocabs']) - raw_yaml = yaml.dump(config) - if raw_yaml.endswith("\n"): - raw_yaml = raw_yaml[:-1] - raw_yaml += "\x00" - return np.array(bytearray(raw_yaml, 'utf-8')) - -if len(sys.argv) != 4: - print("Usage: " + sys.argv[0] + " input.npz output.npz desired_vocab_size") - sys.exit(1) - -resized_path = sys.argv[2] -new_size = int(sys.argv[3]) -old_model = np.load(sys.argv[1]) - -new_model = dict(old_model) -old_size = len(old_model["Wemb"]) -if old_size > new_size: - sys.stderr.write("New size is smaller than original. Cowardly refusing to clip vocab.\n") - sys.exit(2) -print("Before: ", new_model["decoder_ff_logit_out_b"].shape, new_model["Wemb"].shape) -bias = new_model["decoder_ff_logit_out_b"] -new_model["decoder_ff_logit_out_b"] = np.pad(bias, [(0,0),(0,new_size - bias.shape[1])], mode='constant', constant_values = -math.inf) -new_model["Wemb"] = np.pad(new_model["Wemb"], [(0,new_size - bias.shape[1]), (0,0)], mode='constant', constant_values = 0) -print("After: ", new_model["decoder_ff_logit_out_b"].shape, new_model["Wemb"].shape) -new_model["special:model.yml"] = substitute_vocab_config(new_model["special:model.yml"], new_size) -print("New yml: ", new_model["special:model.yml"].tostring()) -np.savez(resized_path, **new_model) From 939384bda8199cb6239ff2589052a90e93ee4c23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 30 Nov 2021 14:15:57 +0200 Subject: [PATCH 103/135] Comment on why data management options are disabled for self-adaptive marian --- src/common/config_parser.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index 6a30c7016..c91edf268 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -415,6 +415,13 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) { addSuboptionsTSV(cli); // data management options + // + // These options are disabled for self-adaptive translation because they seem + // to not make much sense in that context, except for --shuffle, because they + // deal with the storage of training data but in self-adaptive translation + // training data sets are small and they typically change for each input + // sentence. --shuffle isn't currently supported because we use `TextInput` + // for training data and shuffle is a no-op in that class. if (mode_ != cli::mode::selfadaptive) { cli.add("--shuffle", "How to shuffle input data (data: shuffles data and sorted batches; batches: " From 92aaeeadb2815823d85d7339746679eac5d35ab6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 30 Nov 2021 15:58:32 +0200 Subject: [PATCH 104/135] Explain the max-length-translate option; fix the default for max-lengt The default was wrong for self-adaptive translation --- src/common/config_parser.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index c91edf268..be18149ec 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -699,15 +699,13 @@ void ConfigParser::addOptionsTranslation(cli::CLIWrapper& cli) { "Keep the output segmented into SentencePiece subwords"); #endif + // For self-adaptive translation these options are already added in + // `addOptionsTraining` if(mode_ != cli::mode::selfadaptive) { addSuboptionsInputLength(cli); addSuboptionsTSV(cli); addSuboptionsDevices(cli); addSuboptionsBatching(cli); - } else { - cli.add("--max-length-translate", - "Maximum input sentence length for translation", - 1000); } // for self-adaptive mode vocabs are already added via the training options @@ -937,13 +935,25 @@ void ConfigParser::addSuboptionsBatching(cli::CLIWrapper& cli) { } void ConfigParser::addSuboptionsInputLength(cli::CLIWrapper& cli) { - size_t defaultMaxLength = (mode_ == cli::mode::training) ? 50 : 1000; + size_t defaultMaxLength = + (mode_ == cli::mode::training || mode_ == cli::mode::selfadaptive) + ? 50 + : 1000; // clang-format off cli.add("--max-length", "Maximum length of a sentence in a training sentence pair", defaultMaxLength); cli.add("--max-length-crop", "Crop a sentence to max-length instead of omitting it if longer than max-length"); + // In self-adaptive translation, the user might want to be able to set + // different max lengths for training and translation. In that case, + // --max-length is assumed to be meant for training (as per the help message) + // and we add a --max-lenght-translate parameter for translation. + if (mode_ == cli::mode::selfadaptive) { + cli.add("--max-length-translate", + "Maximum input sentence length for translation", + 1000); + } // clang-format on } From 99553d59b7307a206d2a36356943fe6ebe3922ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 30 Nov 2021 16:01:54 +0200 Subject: [PATCH 105/135] Remove an obsolete comment --- src/common/config_parser.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index be18149ec..0da03706b 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -708,7 +708,6 @@ void ConfigParser::addOptionsTranslation(cli::CLIWrapper& cli) { addSuboptionsBatching(cli); } - // for self-adaptive mode vocabs are already added via the training options if(mode_ != cli::mode::selfadaptive) { cli.add("--fp16", "Shortcut for mixed precision inference with float16, corresponds to: --precision float16"); From 8aec3caa38b30d28c1bd34fa9cb8216639e4d362 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 30 Nov 2021 16:04:24 +0200 Subject: [PATCH 106/135] Remove excessive empty lines --- src/data/adaptive_context.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/src/data/adaptive_context.cpp b/src/data/adaptive_context.cpp index 9ac680e83..4e626ae88 100644 --- a/src/data/adaptive_context.cpp +++ b/src/data/adaptive_context.cpp @@ -3,7 +3,6 @@ namespace marian { namespace data { - AdaptiveContextIterator::AdaptiveContextIterator(AdaptiveContextReader* trainSetReader) : trainSetReader_(trainSetReader) { if(trainSetReader) { @@ -11,17 +10,14 @@ AdaptiveContextIterator::AdaptiveContextIterator(AdaptiveContextReader* trainSet } } - bool AdaptiveContextIterator::equal(const AdaptiveContextIterator& other) const { return other.trainSetReader_ == trainSetReader_; } - const std::vector& AdaptiveContextIterator::dereference() const { return currentSamples_; } - void AdaptiveContextIterator::increment() { // If the previous increment has exhausted the file, we must indicate that the we've reached // the iterator's end @@ -37,29 +33,23 @@ void AdaptiveContextIterator::increment() { } - - AdaptiveContextReader::AdaptiveContextReader(std::vector paths) { for(auto& path : paths) files_.emplace_back(new io::InputFileStream(path)); } - AdaptiveContextIterator AdaptiveContextReader::begin() { return AdaptiveContextIterator(this); } - AdaptiveContextIterator AdaptiveContextReader::end() { return AdaptiveContextIterator(nullptr); } - bool AdaptiveContextReader::eof() { return eof_; } - std::vector AdaptiveContextReader::getSamples() { // extracted lines for source and target corpora std::vector samples; @@ -102,7 +92,5 @@ std::vector AdaptiveContextReader::getSamples() { return samples; } - - } // namespace data } // namespace marian From 971e1dc8eb4ffc6859426a1dda7c49935a98f7ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 30 Nov 2021 16:11:00 +0200 Subject: [PATCH 107/135] Split some long lines --- src/translator/swappable.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/src/translator/swappable.h b/src/translator/swappable.h index af3cffa4c..ce4c28b60 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -51,8 +51,10 @@ class GPUEngineTrain { public: /** * @param options The marian options object - * @param deviceNum The index of the device you want to use for this slot. Note that this is not the deviceID but the index of the device in the - * array of supplied devices. Eg if you provide -d 0 3 5 and you want the Slot to run on GPU 3, you provide deviceNum=1. + * @param deviceNum The index of the device you want to use for this slot. + * Note that this is not the deviceID but the index of the device in the + * array of supplied devices. Eg if you provide -d 0 3 5 and you want the + * Slot to run on GPU 3, you provide deviceNum=1. */ explicit GPUEngineTrain(Ptr options, size_t deviceNum); @@ -124,8 +126,10 @@ class GPUEngineTranslate { public: /** * @param options The marian options object - * @param deviceNum The index of the device you want to use for this slot. Note that this is not the deviceID but the index of the device in the - * array of supplied devices. Eg if you provide -d 0 3 5 and you want the Slot to run on GPU 3, you provide deviceNum=1. + * @param deviceNum The index of the device you want to use for this slot. + * Note that this is not the deviceID but the index of the device in the + * array of supplied devices. Eg if you provide -d 0 3 5 and you want the + * Slot to run on GPU 3, you provide deviceNum=1. */ explicit GPUEngineTranslate(Ptr options, size_t deviceNum); @@ -181,7 +185,10 @@ class CPULoadedModel { public: // The parts of Options that relate to model and vocab are ignored. The // files provided will be loaded. - CPULoadedModel(Ptr options, const std::string ¶meters, const std::vector &sourceVocabPaths, const std::string &targetVocabPath); + CPULoadedModel(Ptr options, + const std::string ¶meters, + const std::vector &sourceVocabPaths, + const std::string &targetVocabPath); const std::vector &Parameters() const { return parameters_; } From f3a085c6ccd0c9646130989b871b0334901b9146 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 1 Dec 2021 12:20:26 +0200 Subject: [PATCH 108/135] Forgot to add the marian_adaptive_server.cpp file to git --- src/command/marian_adaptive_server.cpp | 60 ++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100644 src/command/marian_adaptive_server.cpp diff --git a/src/command/marian_adaptive_server.cpp b/src/command/marian_adaptive_server.cpp new file mode 100644 index 000000000..26d6dee10 --- /dev/null +++ b/src/command/marian_adaptive_server.cpp @@ -0,0 +1,60 @@ +#include "marian.h" + +#include "3rd_party/simple-websocket-server/server_ws.hpp" +#include "common/file_stream.h" +#include "common/timer.h" +#include "common/utils.h" +#include "training/training.h" +#include "translator/self_adaptive.h" + +using namespace marian; + +typedef SimpleWeb::SocketServer WSServer; + +int main(int argc, char **argv) { + auto options = parseOptions(argc, argv, cli::mode::selfadaptiveServer); + auto task = New(options); + + // Initialize web server + WSServer server; + server.config.port = options->get("port", 8080); + + auto &translate = server.endpoint["^/translate/?$"]; + + translate.on_message = [&task](Ptr connection, + Ptr message) { + auto sendStream = std::make_shared(); + + // Get input text + auto inputText = message->string(); + + // Translate + timer::Timer timer; + auto outputText = task->run(inputText); + LOG(info, "Best translation: {}", outputText); + *sendStream << outputText << std::endl; + LOG(info, "Translation took: {:.5f}s", timer.elapsed()); + + // Send translation back + connection->send(sendStream, [](const SimpleWeb::error_code &ec) { + if(ec) + LOG(error, "Error sending message: ({}) {}", ec.value(), ec.message()); + }); + }; + + // Error Codes for error code meanings + // http://www.boost.org/doc/libs/1_55_0/doc/html/boost_asio/reference.html + translate.on_error = [](Ptr connection, const SimpleWeb::error_code &ec) { + LOG(error, "Connection error: ({}) {}", ec.value(), ec.message()); + }; + + // Start server thread + std::thread serverThread([&server]() { + LOG(info, "Server is listening on port {}", server.config.port); + server.start(); + }); + + serverThread.join(); + + return 0; +} From bcbeb2d8f2fa306ed0a43aa87e7aa86cc93b062c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 2 Dec 2021 14:00:12 +0200 Subject: [PATCH 109/135] Document the toMemoryPieces method --- src/graph/parameters.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/graph/parameters.h b/src/graph/parameters.h index 8dd579af1..8aed11460 100644 --- a/src/graph/parameters.h +++ b/src/graph/parameters.h @@ -45,6 +45,11 @@ class Parameters { LOG(debug, "Destroyed parameter object of type {}", acceptedElementType_); } + /** + * @brief Retrieves the memory corresponding to the parameter values + * + * @return A vector of memorypieces each corresponding to a single parameter + */ std::vector toMemoryPieces() { std::vector res; res.reserve(params_.size()); From 2667ea90b5351fe0657799beb6d3c4ed4c492899 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 2 Dec 2021 17:13:39 +0200 Subject: [PATCH 110/135] Delete some more @briefs --- src/data/adaptive_context.h | 10 +++++----- src/graph/parameters.h | 4 ++-- src/translator/self_adaptive.h | 4 ++-- src/translator/swappable.h | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/data/adaptive_context.h b/src/data/adaptive_context.h index 80d2213da..dc7ebee5a 100644 --- a/src/data/adaptive_context.h +++ b/src/data/adaptive_context.h @@ -11,7 +11,7 @@ class AdaptiveContextReader; /** - * @brief An iterator for easier access of the context sentences produced by + * An iterator for easier access of the context sentences produced by * `AdaptiveContextReader::getSamples()` */ class AdaptiveContextIterator @@ -34,7 +34,7 @@ class AdaptiveContextIterator /** - * @brief Reads the context sentences, that are used for on-the-fly training in + * Reads the context sentences, that are used for on-the-fly training in * the self-adaptive translation mode, from files. */ class AdaptiveContextReader { @@ -45,7 +45,7 @@ class AdaptiveContextReader { public: /** - * @brief Initializes a new reader by supplying paths to the files with + * Initializes a new reader by supplying paths to the files with * context sentences * * @param paths paths to the input files. The input files contain @@ -58,7 +58,7 @@ class AdaptiveContextReader { AdaptiveContextReader(std::vector paths); /** - * @brief Returns an iterator over the sets of context sentences produced by + * Returns an iterator over the sets of context sentences produced by * `getSamples()` * * @return the beginning of the iterator. @@ -70,7 +70,7 @@ class AdaptiveContextReader { bool eof(); /** - * @brief Reads the next set of samples -- the contaxt sentences -- for + * Reads the next set of samples -- the contaxt sentences -- for * on-the-fly training in the self-adaptive translation mode. * * @details The input files contain newline-separated parallel sentence pairs diff --git a/src/graph/parameters.h b/src/graph/parameters.h index 8aed11460..e7f2efa19 100644 --- a/src/graph/parameters.h +++ b/src/graph/parameters.h @@ -21,7 +21,7 @@ class Parameters { protected: Type acceptedElementType_; // this parameter object only takes paramters of this type - /** @brief List of all parameter nodes of this expression graph. */ + /** List of all parameter nodes of this expression graph. */ std::vector params_; std::unordered_map named_; @@ -46,7 +46,7 @@ class Parameters { } /** - * @brief Retrieves the memory corresponding to the parameter values + *01234 Retrieves the memory corresponding to the parameter values * * @return A vector of memorypieces each corresponding to a single parameter */ diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index fbde213c3..793c19c00 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -58,7 +58,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { } /** - * @brief Implementation for self-adaptive translation where data come from a + * Implementation for self-adaptive translation where data come from a * web request. * * @param json Input data in JSON. An "input" array of strings is expected to @@ -102,7 +102,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { } /** - * @brief Implementation for self-adaptive translation where inputs and + * Implementation for self-adaptive translation where inputs and * outputs are specified in CLI options. */ void run() override { diff --git a/src/translator/swappable.h b/src/translator/swappable.h index ce4c28b60..0ebda4389 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -62,7 +62,7 @@ class GPUEngineTrain { }; /** - * @brief Wraps a `GPUEngineTrain` and a `CPULoadedModel` and performs model + * Wraps a `GPUEngineTrain` and a `CPULoadedModel` and performs model * training. * * This class is created with self-adaptive translation in mind. Each invocation @@ -94,7 +94,7 @@ class SwappableModelTrainer { std::vector Parameters() const; /** - * @brief resets the training graph, reloads the model parameters and trains + * Resets the training graph, reloads the model parameters and trains * the model on the provided inputs. * * Intended to be used in the self-adaptive translation mode -- training is @@ -160,7 +160,7 @@ class GPULoadedModel { /// Overwrite this model with parameters from a different one. void Load(const CPULoadedModel &from); /** - * @brief Set the internal shared pointers to model parameters and + * Set the internal shared pointers to model parameters and * vocabularies to different ones * * The effect is similar to `Load()` but nothing is copied in the process. From 5b28786429fa709727e9d9bfe4b0b74c4be2e149 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 2 Dec 2021 17:26:58 +0200 Subject: [PATCH 111/135] Comment on a possibly missing "training-sets" option --- src/training/scheduler.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/training/scheduler.h b/src/training/scheduler.h index 6d1890116..5c91c477a 100644 --- a/src/training/scheduler.h +++ b/src/training/scheduler.h @@ -534,8 +534,11 @@ class Scheduler : public TrainingObserver { } void actAfterEpoch(TrainingState& state) override { - // stop if data streaming from STDIN is stopped for a TSV input + // When running self-adaptive marian in server mode the "training-sets" + // option isn't present because the training sentences are passed in via the + // request body if (options_->has("training-sets")) { + // Stop if data streaming from STDIN is stopped for a TSV input. auto trainingSets = options_->get>("train-sets"); if (trainingSets.size() > 0) { std::string firstPath = options_->get>("train-sets")[0]; From 097effa9a89572becd1f5c8e55357dba66d085ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 2 Dec 2021 17:38:15 +0200 Subject: [PATCH 112/135] Remove unneeded member variables and describe member var usage --- src/translator/self_adaptive.h | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 793c19c00..0a93d8061 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -51,10 +51,10 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { auto vocabPaths = options_->get>("vocabs"); std::vector srcVocabPaths(vocabPaths.begin(), vocabPaths.end() - 1); cpuModel_ = New(options_, modelFilename, srcVocabPaths, vocabPaths.back()); - translateEngine_ = New(optionsTrans_, 0); - translateSlot_ = New(translateEngine_); - trainEngine_ = New(options_, 0); - trainSlot_ = New(trainEngine_); + auto translateEngine = New(optionsTrans_, 0); + translateSlot_ = New(translateEngine); + auto trainEngine = New(options_, 0); + trainSlot_ = New(trainEngine); } /** @@ -129,14 +129,12 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { } private: - Ptr options_; // Options for training - Ptr optionsTrans_; // Options for translator - Ptr cpuModel_; - Ptr trainSlot_; - Ptr translateSlot_; - Ptr trainEngine_; - Ptr translateEngine_; - bool needsSwitching_ = true; + Ptr options_; // Options for training + Ptr optionsTrans_; // Options for translator + Ptr cpuModel_; // Holds model parameters and vocabularies + Ptr trainSlot_; // Performs model training + Ptr translateSlot_; // Performs translation with the model + bool needsSwitching_ = true; // Tracks whether translate slot's model needs to be reset template void adaptAndTranslate( From 507f8ebd5f16c4bd15f06388245cf2bb3cc61d7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 3 Dec 2021 12:03:56 +0200 Subject: [PATCH 113/135] Document some methods --- src/graph/expression_graph.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h index 5c375da01..abec87b09 100644 --- a/src/graph/expression_graph.h +++ b/src/graph/expression_graph.h @@ -184,6 +184,10 @@ class ExpressionGraph : public std::enable_shared_from_this { kvParams.second->clear(); } + + /** + * Call `clear()` on each of the parameters in the graph + */ void clearParams() { for(auto kvParams : paramsByElementType_) kvParams.second->clear(); @@ -235,6 +239,10 @@ class ExpressionGraph : public std::enable_shared_from_this { namespace_ = newNamespace; } + /** + * Extract graph parameters into a named map. + * @return A map with parameter names are keys and corresponding graph elements as values + */ const std::unordered_map & getParamsNamedMap() const { if (paramsByElementType_.size() != 1) { ABORT("Expected exactly one parameter datatype, got", paramsByElementType_.size()); From d797c906e05adb4840228e3841f449a51cbb4db1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 3 Dec 2021 12:11:42 +0200 Subject: [PATCH 114/135] Don't suggest looking at commits because they'll get squashed --- src/translator/swappable.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/translator/swappable.h b/src/translator/swappable.h index 0ebda4389..e6db24280 100644 --- a/src/translator/swappable.h +++ b/src/translator/swappable.h @@ -12,8 +12,9 @@ * Originally this code was intended to allow multiple models to share a single * GPU for translation and be swapped into GPU memory only when needed. However, * parts of it, that weren't needed for self-adaptive translation, have been - * trimmed down since then. Look into the commit history if you want to revive - * this functionality. + * trimmed down since then. Look here + * https://github.com/kpu/marian-dev/blob/90e161fa9fcb3e3ba1467c76a10b1fc7f9390b6d/src/translator/swappable.h + * if you want to revive this functionality. */ #include "common/io.h" #include "data/vocab.h" From babf93d2904b1fe489dbccb994a4498ea75c06ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 3 Dec 2021 14:29:18 +0200 Subject: [PATCH 115/135] Add a comment on stdin handling in CorpusBase --- src/data/corpus_base.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/data/corpus_base.cpp b/src/data/corpus_base.cpp index f3b41f422..58a83a451 100644 --- a/src/data/corpus_base.cpp +++ b/src/data/corpus_base.cpp @@ -30,9 +30,9 @@ const SentenceTuple& CorpusIterator::dereference() const { return tup_; } -// These types of corpus constructors are used in in-training validators -// (only?), so do not load additional files for guided alignment or data -// weighting. +// These types of corpus constructors are used in in-training validators (only? +// (also in self-adaptive translation)), so do not load additional files for +// guided alignment or data weighting. CorpusBase::CorpusBase(const std::vector& paths, const std::vector>& vocabs, Ptr options, @@ -54,6 +54,8 @@ CorpusBase::CorpusBase(const std::vector& paths, } for(auto path : paths_) { + // This constructor is also used in self-adaptive translation and it needs + // support for reading translation inputs from stdin if(path == "stdin" || path == "-") files_.emplace_back(new std::istream(std::cin.rdbuf())); else { From 2d1ff231c5fb4b77f33c2a4100d080ba86fc1230 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 3 Dec 2021 14:31:40 +0200 Subject: [PATCH 116/135] Fix a typo --- src/graph/expression_graph.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h index abec87b09..1a54d6a89 100644 --- a/src/graph/expression_graph.h +++ b/src/graph/expression_graph.h @@ -241,7 +241,7 @@ class ExpressionGraph : public std::enable_shared_from_this { /** * Extract graph parameters into a named map. - * @return A map with parameter names are keys and corresponding graph elements as values + * @return A map with parameter names as keys and the corresponding graph elements as values */ const std::unordered_map & getParamsNamedMap() const { if (paramsByElementType_.size() != 1) { From 6955a9ae989d4bb16399086683222a1b212f3ef7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 3 Dec 2021 15:11:59 +0200 Subject: [PATCH 117/135] Document the `dropF0prefix` flag --- src/graph/expression_graph.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/graph/expression_graph.h b/src/graph/expression_graph.h index 1a54d6a89..9aeb18d2d 100644 --- a/src/graph/expression_graph.h +++ b/src/graph/expression_graph.h @@ -763,7 +763,15 @@ class ExpressionGraph : public std::enable_shared_from_this { bool getThrowNaN() { return throwNaN_; } public: - /** Load model (mainly parameter objects) from array of io::Items */ + /** + * Load model (mainly parameter objects) from array of io::Items + * + * @param dropF0prefix modify the `io::Item` names upon loading by removing + * "F0::" prefixes. "F*::" prefixes are used to distinguish parameters from + * different scorers in the translation graph. This option is used by + * self-adaptive translation to support loading these `io::Item`s for + * training. + */ void load(const std::vector& ioItems, bool markReloaded = true, bool dropF0prefix = false) { setReloaded(false); for(auto& item : ioItems) { From 20cde2077be001e8af20813bc2b79887727f6abe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 3 Dec 2021 15:43:41 +0200 Subject: [PATCH 118/135] Enable option validation for adaptive marian --- src/common/config_validator.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/common/config_validator.cpp b/src/common/config_validator.cpp index 916ae14d7..7ab07d7e6 100644 --- a/src/common/config_validator.cpp +++ b/src/common/config_validator.cpp @@ -38,9 +38,9 @@ void ConfigValidator::validateOptions(cli::mode mode) const { validateOptionsTraining(); break; case cli::mode::selfadaptive: - // validateOptionsTranslation(); - // validateOptionsParallelData(); - // validateOptionsTraining(); + validateOptionsTranslation(); + validateOptionsParallelData(); + validateOptionsTraining(); break; default: ABORT("wrong CLI mode"); From bbe5196f70f400014957a740ba5234a37f93b34d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 6 Dec 2021 14:19:55 +0200 Subject: [PATCH 119/135] Add usage instructions to the adaptive/client_example.py script --- scripts/self-adaptive/client_example.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/scripts/self-adaptive/client_example.py b/scripts/self-adaptive/client_example.py index 6ef7757a7..72473c315 100644 --- a/scripts/self-adaptive/client_example.py +++ b/scripts/self-adaptive/client_example.py @@ -1,5 +1,16 @@ #!/usr/bin/env python +# This is an example for using self-adaptive translation in server mode. +# +# To run: +# 1. Start self-adaptive Marian in server mode, e.g.: +# ./build/marian-adaptive-server -p 8080 -m model.npz -v vocap.yaml vocab.yaml \ +# --after-batches 10 --after-epochs 10 --learn-rate 0.1 --mini-batch 15 # other options +# 2. In a new shell, run this script: +# python3 ./scripts/self-adaptive/client_exmaple.py -p 8080 +# +# For a more extensive example, see https://github.com/marian-cef/marian-examples/tree/master/adaptive + from __future__ import print_function, unicode_literals, division import sys From 85d831f2555e0453b8d7fbcbafae08ad9c3a630b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 6 Dec 2021 14:37:37 +0200 Subject: [PATCH 120/135] Mention the tutorial repo as well --- scripts/self-adaptive/client_example.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/self-adaptive/client_example.py b/scripts/self-adaptive/client_example.py index 72473c315..e1fa52d37 100644 --- a/scripts/self-adaptive/client_example.py +++ b/scripts/self-adaptive/client_example.py @@ -10,6 +10,7 @@ # python3 ./scripts/self-adaptive/client_exmaple.py -p 8080 # # For a more extensive example, see https://github.com/marian-cef/marian-examples/tree/master/adaptive +# or https://github.com/tilde-nlp/runtime-domain-adaptation-tutorial from __future__ import print_function, unicode_literals, division From 7bb887afb864191a6b341e6986610f9ecbd2aa77 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Mon, 6 Dec 2021 14:42:13 +0200 Subject: [PATCH 121/135] Add punctiation for clarity --- src/common/config_parser.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index 0da03706b..b845bbceb 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -418,10 +418,11 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) { // // These options are disabled for self-adaptive translation because they seem // to not make much sense in that context, except for --shuffle, because they - // deal with the storage of training data but in self-adaptive translation + // deal with the storage of training data, but, in self-adaptive translation, // training data sets are small and they typically change for each input // sentence. --shuffle isn't currently supported because we use `TextInput` - // for training data and shuffle is a no-op in that class. + // for training data and shuffle is a no-op in that class. This might get + // implement the future. if (mode_ != cli::mode::selfadaptive) { cli.add("--shuffle", "How to shuffle input data (data: shuffles data and sorted batches; batches: " From 9f0307083ba446c1043429a6eb1eab1d01b56c50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 9 Dec 2021 11:37:03 +0200 Subject: [PATCH 122/135] Fix a typo in a comment Co-authored-by: Roman Grundkiewicz --- src/common/config_parser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index b845bbceb..39b788511 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -422,7 +422,7 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) { // training data sets are small and they typically change for each input // sentence. --shuffle isn't currently supported because we use `TextInput` // for training data and shuffle is a no-op in that class. This might get - // implement the future. + // implemented in the future. if (mode_ != cli::mode::selfadaptive) { cli.add("--shuffle", "How to shuffle input data (data: shuffles data and sorted batches; batches: " From 96615e7a4162c1a5696db69f0d757725b38ec864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 9 Dec 2021 11:37:27 +0200 Subject: [PATCH 123/135] Fix a typo in a comment Co-authored-by: Roman Grundkiewicz --- src/common/config_parser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index 39b788511..41880bea8 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -948,7 +948,7 @@ void ConfigParser::addSuboptionsInputLength(cli::CLIWrapper& cli) { // In self-adaptive translation, the user might want to be able to set // different max lengths for training and translation. In that case, // --max-length is assumed to be meant for training (as per the help message) - // and we add a --max-lenght-translate parameter for translation. + // and we add a --max-length-translate parameter for translation. if (mode_ == cli::mode::selfadaptive) { cli.add("--max-length-translate", "Maximum input sentence length for translation", From d4a77bae17463a8cdd23ea2ecfbde5ee7251510e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 9 Dec 2021 11:38:16 +0200 Subject: [PATCH 124/135] Fix a typo in a comment Co-authored-by: Roman Grundkiewicz --- src/graph/parameters.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/graph/parameters.h b/src/graph/parameters.h index e7f2efa19..1d7808c92 100644 --- a/src/graph/parameters.h +++ b/src/graph/parameters.h @@ -46,8 +46,7 @@ class Parameters { } /** - *01234 Retrieves the memory corresponding to the parameter values - * + * Retrieves the memory corresponding to the parameter values. * @return A vector of memorypieces each corresponding to a single parameter */ std::vector toMemoryPieces() { From 379418b180ae677034ce33741d6bd5726ac7c989 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 9 Dec 2021 11:51:50 +0200 Subject: [PATCH 125/135] Revert an added space Wasn't intentional --- src/common/config_parser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index 41880bea8..3bd16b8fa 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -108,7 +108,7 @@ ConfigParser::ConfigParser(cli::mode mode) // clang-format on } -void ConfigParser::addOptionsGeneral(cli::CLIWrapper & cli) { +void ConfigParser::addOptionsGeneral(cli::CLIWrapper& cli) { int defaultWorkspace = (mode_ == cli::mode::translation) ? 512 : 2048; cli.switchGroup("General options"); From 4bb6f5c6a9aca2020544f04ea8e8db3d90ecf8c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 9 Dec 2021 12:12:08 +0200 Subject: [PATCH 126/135] Clarify the server mode handling in ConfigParser --- src/common/config_parser.cpp | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index 3bd16b8fa..26db51e45 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -69,12 +69,27 @@ std::string const& ConfigParser::cmdLine() const { return cmdLine_; } +/** + * Convert some special modes (currently, server-like modes) to their non-special counterparts. + */ +cli::mode convertSpecialModes(cli::mode mode) { + switch(mode) { + case cli::mode::server: + return cli::mode::translation; + case cli::mode::selfadaptiveServer: + return cli::mode::selfadaptive; + default: + return mode; + } +} + ConfigParser::ConfigParser(cli::mode mode) : cli_(config_, "Marian: Fast Neural Machine Translation in C++", "General options", "", 40), - mode_(mode == cli::mode::server - ? cli::mode::translation - : (mode == cli::mode::selfadaptiveServer ? cli::mode::selfadaptive : mode)) { - + // Server-like modes should mostly act like their non-server counterparts + // when parsing options. We keep all special handling in the constructor + // but in the rest of the parsing code we just pretend that we have a + // non-server mode. + mode_(convertSpecialModes(mode)) { addOptionsGeneral(cli_); if (mode == cli::mode::server || mode == cli::mode::selfadaptiveServer) addOptionsServer(cli_); From c41a56b9ec01eba1ca4a7924312375698566bd34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 9 Dec 2021 18:11:04 +0200 Subject: [PATCH 127/135] Remove TSV options from self-adaptive translation --- src/common/config_parser.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index 26db51e45..edfbb2140 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -427,7 +427,11 @@ void ConfigParser::addOptionsTraining(cli::CLIWrapper& cli) { {"1e", "0"}); addSuboptionsInputLength(cli); - addSuboptionsTSV(cli); + // TSV inputs aren't currently supported for self-adaptive translation because + // self-adaptive translation uses a custom training data reader + // (`AdaptiveContextReader`) which doesn't yet support TSV. + if (mode_ != cli::mode::selfadaptive) + addSuboptionsTSV(cli); // data management options // From 6c97f825017aa79f6492c171929b373f3a264009 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Fri, 10 Dec 2021 13:39:37 +0200 Subject: [PATCH 128/135] Share code between marian-server and marian-adaptive-server --- src/command/marian_adaptive_server.cpp | 57 ++---------------------- src/command/marian_server.cpp | 55 +---------------------- src/translator/server_common.h | 60 ++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 106 deletions(-) create mode 100644 src/translator/server_common.h diff --git a/src/command/marian_adaptive_server.cpp b/src/command/marian_adaptive_server.cpp index 26d6dee10..e2f03d999 100644 --- a/src/command/marian_adaptive_server.cpp +++ b/src/command/marian_adaptive_server.cpp @@ -1,60 +1,11 @@ -#include "marian.h" - -#include "3rd_party/simple-websocket-server/server_ws.hpp" -#include "common/file_stream.h" -#include "common/timer.h" -#include "common/utils.h" -#include "training/training.h" #include "translator/self_adaptive.h" - -using namespace marian; - -typedef SimpleWeb::SocketServer WSServer; +#include "translator/server_common.h" int main(int argc, char **argv) { + using namespace marian; + auto options = parseOptions(argc, argv, cli::mode::selfadaptiveServer); auto task = New(options); - // Initialize web server - WSServer server; - server.config.port = options->get("port", 8080); - - auto &translate = server.endpoint["^/translate/?$"]; - - translate.on_message = [&task](Ptr connection, - Ptr message) { - auto sendStream = std::make_shared(); - - // Get input text - auto inputText = message->string(); - - // Translate - timer::Timer timer; - auto outputText = task->run(inputText); - LOG(info, "Best translation: {}", outputText); - *sendStream << outputText << std::endl; - LOG(info, "Translation took: {:.5f}s", timer.elapsed()); - - // Send translation back - connection->send(sendStream, [](const SimpleWeb::error_code &ec) { - if(ec) - LOG(error, "Error sending message: ({}) {}", ec.value(), ec.message()); - }); - }; - - // Error Codes for error code meanings - // http://www.boost.org/doc/libs/1_55_0/doc/html/boost_asio/reference.html - translate.on_error = [](Ptr connection, const SimpleWeb::error_code &ec) { - LOG(error, "Connection error: ({}) {}", ec.value(), ec.message()); - }; - - // Start server thread - std::thread serverThread([&server]() { - LOG(info, "Server is listening on port {}", server.config.port); - server.start(); - }); - - serverThread.join(); - - return 0; + return runServer(task, options); } diff --git a/src/command/marian_server.cpp b/src/command/marian_server.cpp index d712e8389..ef62320b8 100644 --- a/src/command/marian_server.cpp +++ b/src/command/marian_server.cpp @@ -1,62 +1,11 @@ -#include "marian.h" -#include "translator/beam_search.h" +#include "translator/server_common.h" #include "translator/translator.h" -#include "common/timer.h" -#include "common/utils.h" - -#include "3rd_party/simple-websocket-server/server_ws.hpp" - -typedef SimpleWeb::SocketServer WSServer; int main(int argc, char **argv) { using namespace marian; - // Initialize translation task auto options = parseOptions(argc, argv, cli::mode::server, true); auto task = New>(options); - auto quiet = options->get("quiet-translation"); - - // Initialize web server - WSServer server; - server.config.port = (short)options->get("port", 8080); - - auto &translate = server.endpoint["^/translate/?$"]; - - translate.on_message = [&task, quiet](Ptr connection, - Ptr message) { - // Get input text - auto inputText = message->string(); - auto sendStream = std::make_shared(); - - // Translate - timer::Timer timer; - auto outputText = task->run(inputText); - *sendStream << outputText << std::endl; - if(!quiet) - LOG(info, "Translation took: {:.5f}s", timer.elapsed()); - - // Send translation back - connection->send(sendStream, [](const SimpleWeb::error_code &ec) { - if(ec) - LOG(error, "Error sending message: ({}) {}", ec.value(), ec.message()); - }); - }; - - // Error Codes for error code meanings - // http://www.boost.org/doc/libs/1_55_0/doc/html/boost_asio/reference.html - translate.on_error = [](Ptr /*connection*/, - const SimpleWeb::error_code &ec) { - LOG(error, "Connection error: ({}) {}", ec.value(), ec.message()); - }; - - // Start server thread - std::thread serverThread([&server]() { - server.start([](unsigned short port) { - LOG(info, "Server is listening on port {}", port); - }); - }); - - serverThread.join(); - return 0; + return runServer(task, options); } diff --git a/src/translator/server_common.h b/src/translator/server_common.h new file mode 100644 index 000000000..94cec33f4 --- /dev/null +++ b/src/translator/server_common.h @@ -0,0 +1,60 @@ +#include "marian.h" +#include "translator/beam_search.h" +#include "translator/translator.h" +#include "common/timer.h" +#include "common/utils.h" + +#include "3rd_party/simple-websocket-server/server_ws.hpp" + +typedef SimpleWeb::SocketServer WSServer; + +namespace marian { + +int runServer(Ptr task, Ptr options) { + auto quiet = options->get("quiet-translation"); + + // Initialize web server + WSServer server; + server.config.port = (short)options->get("port", 8080); + + auto &translate = server.endpoint["^/translate/?$"]; + + translate.on_message = [&task, quiet](Ptr connection, + Ptr message) { + // Get input text + auto inputText = message->string(); + auto sendStream = std::make_shared(); + + // Translate + timer::Timer timer; + auto outputText = task->run(inputText); + *sendStream << outputText << std::endl; + if(!quiet) + LOG(info, "Translation took: {:.5f}s", timer.elapsed()); + + // Send translation back + connection->send(sendStream, [](const SimpleWeb::error_code &ec) { + if(ec) + LOG(error, "Error sending message: ({}) {}", ec.value(), ec.message()); + }); + }; + + // Error Codes for error code meanings + // http://www.boost.org/doc/libs/1_55_0/doc/html/boost_asio/reference.html + translate.on_error = [](Ptr /*connection*/, + const SimpleWeb::error_code &ec) { + LOG(error, "Connection error: ({}) {}", ec.value(), ec.message()); + }; + + // Start server thread + std::thread serverThread([&server]() { + server.start([](unsigned short port) { + LOG(info, "Server is listening on port {}", port); + }); + }); + + serverThread.join(); + + return 0; +} +} // namespace marian From 88308a7e2026598f5e1fa75f6126ebdd28e4d265 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 15 Dec 2021 13:27:13 +0200 Subject: [PATCH 129/135] Don't require a "models" option for self-adaptive translation --- src/common/config_validator.cpp | 8 +++++++- src/common/config_validator.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/common/config_validator.cpp b/src/common/config_validator.cpp index 7ab07d7e6..e8c192acd 100644 --- a/src/common/config_validator.cpp +++ b/src/common/config_validator.cpp @@ -38,7 +38,7 @@ void ConfigValidator::validateOptions(cli::mode mode) const { validateOptionsTraining(); break; case cli::mode::selfadaptive: - validateOptionsTranslation(); + validateOptionsVocabularies(); validateOptionsParallelData(); validateOptionsTraining(); break; @@ -64,6 +64,12 @@ void ConfigValidator::validateOptionsTranslation() const { ABORT_IF(!filesystem::exists(modelPath), "Model file does not exist: " + modelFile); } + validateOptionsVocabularies(); +} + +// Other validation methods already do vocabulary validation but we need this +// functionality separately for self-adaptive translation option validation +void ConfigValidator::validateOptionsVocabularies() const { auto vocabs = get>("vocabs"); ABORT_IF(vocabs.empty(), "Translating, but vocabularies are not given"); diff --git a/src/common/config_validator.h b/src/common/config_validator.h index 0e73a9e39..c16a62726 100644 --- a/src/common/config_validator.h +++ b/src/common/config_validator.h @@ -20,6 +20,7 @@ class ConfigValidator { bool dumpConfigOnly_{false}; void validateOptionsTranslation() const; + void validateOptionsVocabularies() const; void validateOptionsParallelData() const; void validateOptionsScoring() const; void validateOptionsTraining() const; From 08d20d5264bf9625f4606275fd0d7140bdb64e0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 16 Dec 2021 12:16:03 +0200 Subject: [PATCH 130/135] Fix crashes introduced by removing some options from self-adaptive marian --- src/common/config.cpp | 2 +- src/common/config_parser.cpp | 2 +- src/common/config_parser.h | 10 ++++++++++ src/common/config_validator.cpp | 22 +++++++++++++++------- src/common/config_validator.h | 9 +++++++++ 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/src/common/config.cpp b/src/common/config.cpp index 9878c70b0..3e03f8a6d 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -73,7 +73,7 @@ void Config::initialize(ConfigParser const& cp) { } // guess --tsv-fields, i.e. the number of fields in a TSV input, if not set - if(get("tsv") && get("tsv-fields") == 0) { + if(get("tsv", false) && get("tsv-fields") == 0) { size_t tsvFields = 0; // use the length of --input-types if given diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index edfbb2140..0e10eb2c9 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -1118,7 +1118,7 @@ Ptr ConfigParser::parseOptions(int argc, char** argv, bool doValidate) // (or --data-weighting and 'weight'). // // Note: this may modify the config, so it is safer to do it after --dump-config. - if(mode_ == cli::mode::training || get("tsv")) { + if(mode_ == cli::mode::training || get("tsv", false)) { auto inputTypes = get>("input-types"); if(!inputTypes.empty()) { bool seenAligns = false; diff --git a/src/common/config_parser.h b/src/common/config_parser.h index 5429f3d2c..b0b4f9386 100644 --- a/src/common/config_parser.h +++ b/src/common/config_parser.h @@ -122,6 +122,16 @@ class ConfigParser { return config_[key].as(); } + // Return value for given option key cast to given type. Return the supplied + // default value if option is not set. + template + T get(const std::string& key, T defaultValue) const { + if(has(key)) + return config_[key].as(); + else + return defaultValue; + } + void addOptionsGeneral(cli::CLIWrapper&); void addOptionsServer(cli::CLIWrapper&); void addOptionsModel(cli::CLIWrapper&); diff --git a/src/common/config_validator.cpp b/src/common/config_validator.cpp index e8c192acd..cc14bcb13 100644 --- a/src/common/config_validator.cpp +++ b/src/common/config_validator.cpp @@ -88,11 +88,14 @@ void ConfigValidator::validateOptionsParallelData() const { ABORT_IF(trainSets.empty(), "No train sets given in config file or on command line"); auto numVocabs = get>("vocabs").size(); - ABORT_IF(!get("tsv") && numVocabs > 0 && numVocabs != trainSets.size(), + // The "tsv" option isn't present in self-adaptive translation options so we + // have to explicitly default to false for the option + auto tsv = get("tsv", false); + ABORT_IF(!tsv && numVocabs > 0 && numVocabs != trainSets.size(), "There should be as many vocabularies as training files"); // disallow, for example --tsv --train-sets file1.tsv file2.tsv - ABORT_IF(get("tsv") && trainSets.size() != 1, + ABORT_IF(tsv && trainSets.size() != 1, "A single file must be provided with --train-sets (or stdin) for a tab-separated input"); // disallow, for example --train-sets stdin stdin or --train-sets stdin file.tsv @@ -134,7 +137,9 @@ void ConfigValidator::validateOptionsTraining() const { "Model directory does not exist"); std::string errorMsg = "There should be as many validation files as training files"; - if(get("tsv")) + // The "tsv" option isn't present in self-adaptive translation options so we + // have to explicitly default to false for the option + if(get("tsv", false)) errorMsg += ". If the training set is in the TSV format, validation sets have to also be a single TSV file"; ABORT_IF(has("valid-sets") @@ -142,10 +147,13 @@ void ConfigValidator::validateOptionsTraining() const { && !get>("valid-sets").empty(), errorMsg); - // check if --early-stopping-on has proper value - std::set supportedStops = {"first", "all", "any"}; - ABORT_IF(supportedStops.find(get("early-stopping-on")) == supportedStops.end(), - "Supported options for --early-stopping-on are: first, all, any"); + // "early-stopping" also isn't present for self-adaptive translation + if (has("early-stopping")) { + // check if --early-stopping-on has proper value + std::set supportedStops = {"first", "all", "any"}; + ABORT_IF(supportedStops.find(get("early-stopping-on")) == supportedStops.end(), + "Supported options for --early-stopping-on are: first, all, any"); + } // validations for learning rate decaying ABORT_IF(get("lr-decay") > 1.f, "Learning rate decay factor greater than 1.0 is unusual"); diff --git a/src/common/config_validator.h b/src/common/config_validator.h index c16a62726..e31188532 100644 --- a/src/common/config_validator.h +++ b/src/common/config_validator.h @@ -14,6 +14,15 @@ class ConfigValidator { T get(const std::string& key) const { return config_[key].as(); } + // Return value for given option key cast to given type. Return the supplied + // default value if option is not set. + template + T get(const std::string& key, T defaultValue) const { + if(has(key)) + return config_[key].as(); + else + return defaultValue; + } // The option --dump-config is used, so alleviate some constraints, e.g. we don't want to require // --train-sets or --vocabs From 1326bb1094c471d8ce23083606a95ee50db5a8c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Thu, 16 Dec 2021 16:03:34 +0200 Subject: [PATCH 131/135] Disable parallel data validation for self-adaptive server mode --- src/common/config_validator.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/common/config_validator.cpp b/src/common/config_validator.cpp index cc14bcb13..119525cb8 100644 --- a/src/common/config_validator.cpp +++ b/src/common/config_validator.cpp @@ -39,7 +39,10 @@ void ConfigValidator::validateOptions(cli::mode mode) const { break; case cli::mode::selfadaptive: validateOptionsVocabularies(); - validateOptionsParallelData(); + // Check that we're not running in server mode. In server mode, training + // data are passed in via the request not CLI options + if (!has("port")) + validateOptionsParallelData(); validateOptionsTraining(); break; default: From 56cfb374ca0310e61a29c4c26d3067b62cea38ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 28 Dec 2021 14:06:46 +0200 Subject: [PATCH 132/135] Introduce a separate workspace size option for the translation graph --- src/common/config_parser.cpp | 9 +++++++++ src/translator/self_adaptive.h | 2 ++ 2 files changed, 11 insertions(+) diff --git a/src/common/config_parser.cpp b/src/common/config_parser.cpp index 0e10eb2c9..a20d69c77 100644 --- a/src/common/config_parser.cpp +++ b/src/common/config_parser.cpp @@ -141,6 +141,15 @@ void ConfigParser::addOptionsGeneral(cli::CLIWrapper& cli) { cli.add("--workspace,-w", "Preallocate arg MB of work space", defaultWorkspace); + // Self-adaptive translation uses a training graph and a translation graph. We + // want to be able to prealocate different amounts of memory for both (because + // translation usually needs less) so we add a dedicated opiton for + // translation if self-adaptive translation is used. + if (mode_ == cli::mode::selfadaptive) { + cli.add("--workspace-translate", + "Preallocate arg MB of work space for translation", + 512); + } cli.add("--log", "Log training process information to file given by arg"); cli.add("--log-level", diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 0a93d8061..45c66139a 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -40,6 +40,8 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { optionsTrans_->set("maxi-batch", 1); auto maxTranslationInput = options_->get("max-length-translate"); optionsTrans_->set("max-length", maxTranslationInput); + auto translationWorkspace = options_->get("workspace-translate"); + optionsTrans_->set("workspace", translationWorkspace); optionsTrans_->set("shuffle", "none"); auto modelFilename = options_->get("model"); From d9cddf41b8d434a8a98cbf7a0850a748d6dde142 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Wed, 29 Dec 2021 14:47:24 +0200 Subject: [PATCH 133/135] Fix alignment printing during translation --- src/translator/self_adaptive.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/translator/self_adaptive.h b/src/translator/self_adaptive.h index 45c66139a..85b4f3041 100644 --- a/src/translator/self_adaptive.h +++ b/src/translator/self_adaptive.h @@ -50,6 +50,19 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { // have to adapt translation options manually. optionsTrans_->set>("models", {modelFilename}); + // We mask the alignment option for training so that the alignment loss + // nodes (self-attention heads) don't get added to the graph (for + // transformers). Adding the alignment loss nodes and not supplying guided + // alignments during training results in a crash with "There are more (n) + // than one top most nodes for the backward pass". In self-adaptive + // translation we don't support training the alignments because they are + // likely to remain good enough after the few self-adaptive updates. + // + // TODO: regarding the above, make the alignment heads non-trainable; afaik, + // they are treated like regular attantion heads currently which might + // decrease alignment precision. + options_->set("alignment", ""); + auto vocabPaths = options_->get>("vocabs"); std::vector srcVocabPaths(vocabPaths.begin(), vocabPaths.end() - 1); cpuModel_ = New(options_, modelFilename, srcVocabPaths, vocabPaths.back()); @@ -144,7 +157,7 @@ class TrainSelfAdaptive : public ModelTask, public ModelServiceTask { Iterator trainBegin, Iterator trainEnd, Ptr collector) { - auto printer = New(options_, cpuModel_->TrgVocab()); + auto printer = New(optionsTrans_, cpuModel_->TrgVocab()); for(auto testBatch : *testBatches) { ABORT_IF(trainBegin == trainEnd, "Context batches ran out before test batches"); From 3359bb7a831a583a8821051e85c6d8966fe6f4e2 Mon Sep 17 00:00:00 2001 From: Roman Grundkiewicz Date: Mon, 31 Jan 2022 17:13:36 +0000 Subject: [PATCH 134/135] Change "training-sets" to "train-sets" --- src/training/scheduler.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/training/scheduler.h b/src/training/scheduler.h index 5c91c477a..96dd31467 100644 --- a/src/training/scheduler.h +++ b/src/training/scheduler.h @@ -534,10 +534,10 @@ class Scheduler : public TrainingObserver { } void actAfterEpoch(TrainingState& state) override { - // When running self-adaptive marian in server mode the "training-sets" + // When running self-adaptive marian in server mode the "train-sets" // option isn't present because the training sentences are passed in via the // request body - if (options_->has("training-sets")) { + if (options_->has("train-sets")) { // Stop if data streaming from STDIN is stopped for a TSV input. auto trainingSets = options_->get>("train-sets"); if (trainingSets.size() > 0) { From a274dfbe0f356294ee092315ebd9a9df4dd16c5e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rihards=20Kri=C5=A1lauks?= Date: Tue, 22 Feb 2022 13:31:36 +0200 Subject: [PATCH 135/135] Mention marian-adaptive-server in the changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4927abbd..e343fd828 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] ### Added -- Adds a `marian-adaptive` executable to enable self-adaptive translation (a.k.a, runtime domain adaptation). +- Adds `marian-adaptive` and `marian-adaptive-server` executables to enable self-adaptive translation (a.k.a, runtime domain adaptation). ### Fixed - Scripts using PyYAML now use `safe_load`; see https://msg.pyyaml.org/load