From 30323c8ee7585f829d1a1779b15c19be9b8d44d6 Mon Sep 17 00:00:00 2001 From: yirongjie Date: Thu, 14 Mar 2024 15:12:52 +0800 Subject: [PATCH 1/6] fix: merge TENSOR_STATIC_SHAPED+TENSOR_STATIC_ALLOCED ->TENSOR_STATIC_READY --- include/Types.hpp | 3 +- src/Layer.hpp | 243 ++++++++++--------------- src/Module.hpp | 24 +-- src/Tensor.cpp | 75 +++----- src/Tensor.hpp | 15 +- src/backends/cpu/CPUTensorFunction.hpp | 19 ++ 6 files changed, 157 insertions(+), 222 deletions(-) diff --git a/include/Types.hpp b/include/Types.hpp index 1545d850..5e8602af 100644 --- a/include/Types.hpp +++ b/include/Types.hpp @@ -27,8 +27,7 @@ typedef enum { enum TensorStatus { TENSOR_DYNAMIC, TENSOR_STATIC_INIT , - TENSOR_STATIC_SHAPED , - TENSOR_STATIC_ALLOCED , + TENSOR_STATIC_READY , }; enum ErrorCode { diff --git a/src/Layer.hpp b/src/Layer.hpp index 4c934853..d8fb9ccb 100644 --- a/src/Layer.hpp +++ b/src/Layer.hpp @@ -17,7 +17,6 @@ #include #include - namespace mllm { class Layer { @@ -45,20 +44,21 @@ class Layer { } static int cpu_thread; + private: - std::string name_num_to_X(const std::string& input_string) { - std::regex pattern(R"(\.\d{1,3}\.)"); // Matches any number between 1 and 100 between two dots - std::string replacement = ".X."; // The string to replace the matched pattern with + std::string name_num_to_X(const std::string &input_string) { + std::regex pattern(R"(\.\d{1,3}\.)"); // Matches any number between 1 and 100 between two dots + std::string replacement = ".X."; // The string to replace the matched pattern with std::string output_string = std::regex_replace(input_string, pattern, replacement); return output_string; } - std::string name_X_to_num(const std::string& input_string, int in_idx) { - std::regex pattern(".X."); // Matches any number between 1 and 100 between two dots - std::string replacement = "."+std::to_string(in_idx)+"."; // The string to replace the matched pattern with + std::string name_X_to_num(const std::string &input_string, int in_idx) { + std::regex pattern(".X."); // Matches any number between 1 and 100 between two dots + std::string replacement = "." + std::to_string(in_idx) + "."; // The string to replace the matched pattern with std::string output_string = std::regex_replace(input_string, pattern, replacement); return output_string; } - void reset_KVCache(string input_name, string layer_next_name) { + void reset_KVCache(string input_name) { vector renameX_names; renameX_names.push_back(input_name); const vector suffixs = {"-view", ".split-0", ".split-1", ".split-2"}; @@ -73,10 +73,15 @@ class Layer { auto name = name_X_to_num(x_name, saved_list_idx); vector shape = {Tensor::gph_[x_name].batch(), Tensor::gph_[x_name].head(), Tensor::gph_[x_name].sequence(), Tensor::gph_[x_name].dimension()}; layername_2_tensorname[name] = name; - if (Tensor::gph_.find(name) == Tensor::gph_.end()) { - Tensor::gph_[name] = Tensor(backend_); - Tensor::gph_[name].setName(name); + Tensor::gph_[name] = Tensor(backend_); + Tensor::gph_[name].initFrom(Tensor::gph_[x_name]); + Tensor::gph_[name].setName(name); + vector new_chd_tensors = {}; + for (auto child : Tensor::gph_[x_name].childTensors()) { + new_chd_tensors.push_back(&Tensor::gph_[name_X_to_num(child->name(), saved_list_idx)]); } + Tensor::gph_[name].childTensors().clear(); + Tensor::gph_[name].childTensors() = new_chd_tensors; if (Tensor::gph_[x_name].aggregated() == true) { vector> new_aggregated_tensors = {}; for (const auto &aggregated_tensor : Tensor::gph_[x_name].aggregated_tensors()) { @@ -85,7 +90,6 @@ class Layer { } Tensor::gph_[name].addTensors(new_aggregated_tensors, Tensor::gph_[x_name].aggregated_dim()); } - Tensor::gph_[name].reshape(shape[0], shape[1], shape[2], shape[3]); } } @@ -106,14 +110,16 @@ class Layer { if (Tensor::gph_.find(input.name()) == Tensor::gph_.end()) { Tensor::gph_[input.name()] = input; Tensor::gph_[input.name()].setName(input.name()); - }else if(input.count() != Tensor::gph_[input.name()].count()) { + } else if (input.count() != Tensor::gph_[input.name()].count()) { Tensor::gph_[input.name()] = input; Tensor::gph_[input.name()].setName(input.name()); } - if(layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { - if(param_["type"] == KVCACHE) { + auto in_name = input.name(); + if (layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { + if (param_["type"] == KVCACHE) { layername_2_tensorname[layer_next_name] = layer_next_name; - reset_KVCache(input.name(), layer_next_name); + reset_KVCache(input.name()); + in_name = name_X_to_num(in_name, saved_list_idx); } else { layername_2_tensorname[layer_next_name] = name_num_to_X(layer_next_name); } @@ -123,32 +129,24 @@ class Layer { Tensor::gph_[next_name] = Tensor(backend_); Tensor::gph_[next_name].setName(next_name); } - vector> shared_inputs{std::shared_ptr(&Tensor::gph_[input.name()], [](Tensor*){})}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor*){})}; + vector> shared_inputs{std::shared_ptr(&Tensor::gph_[in_name], [](Tensor *) {})}; + vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; op_->reshape(shared_inputs, shared_outputs); - break; - } - case TENSOR_STATIC_SHAPED: { - auto next_name = layername_2_tensorname[layer_next_name]; - assert(Tensor::gph_[input.name()].hostPtr() != nullptr); - vector> shared_inputs{std::shared_ptr(&Tensor::gph_[input.name()], [](Tensor*){})}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor*){})}; op_->setUp(shared_inputs, shared_outputs); - if(Tensor::gph_[next_name].aggregated() == false) { + if (Tensor::gph_[next_name].aggregated() == false) { assert(Tensor::gph_[next_name].hostPtr() != nullptr); } break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { auto next_name = layername_2_tensorname[layer_next_name]; assert(Tensor::gph_[input.name()].hostPtr() != nullptr); - vector> shared_inputs{std::shared_ptr(&Tensor::gph_[input.name()], [](Tensor*){})}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor*){})}; + vector> shared_inputs{std::shared_ptr(&Tensor::gph_[input.name()], [](Tensor *) {})}; + vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; op_->execute(shared_inputs, shared_outputs); - if(Tensor::gph_[next_name].aggregated() == false) { + if (Tensor::gph_[next_name].aggregated() == false) { assert(Tensor::gph_[next_name].hostPtr() != nullptr); } - // Tensor::gph_[next_name].saveNData(layer_next_name); break; } default: { @@ -157,6 +155,7 @@ class Layer { } auto next_name = layername_2_tensorname[layer_next_name]; Tensor::gph_[next_name].status() = Tensor::gph_[input.name()].status(); + // Tensor::gph_[next_name].saveNData(layer_next_name); return Tensor::gph_[next_name]; } Tensor &_2I1O_OP(Tensor &input0, Tensor &input1) { @@ -174,8 +173,7 @@ class Layer { if (Tensor::gph_.find(input1.name()) != Tensor::gph_.end()) { Tensor::gph_[input1.name()].status() = input0.status(); } - if ((Tensor::gph_.find(input0.name()) != Tensor::gph_.end()) && - Tensor::gph_.find(input1.name()) != Tensor::gph_.end()) { + if ((Tensor::gph_.find(input0.name()) != Tensor::gph_.end()) && Tensor::gph_.find(input1.name()) != Tensor::gph_.end()) { assert(input0.status() == input1.status()); } switch (input0.status()) { @@ -188,7 +186,7 @@ class Layer { Tensor::gph_[input1.name()] = input1; Tensor::gph_[input1.name()].setName(input1.name()); } - if(layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { + if (layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { layername_2_tensorname[layer_next_name] = name_num_to_X(layer_next_name); } auto next_name = layername_2_tensorname[layer_next_name]; @@ -197,33 +195,22 @@ class Layer { Tensor::gph_[next_name].setName(next_name); } vector> shared_inputs{ - std::shared_ptr(&Tensor::gph_[input0.name()], [](Tensor*){}), - std::shared_ptr(&Tensor::gph_[input1.name()], [](Tensor*){})}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor*){})}; + std::shared_ptr(&Tensor::gph_[input0.name()], [](Tensor *) {}), + std::shared_ptr(&Tensor::gph_[input1.name()], [](Tensor *) {})}; + vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; op_->reshape(shared_inputs, shared_outputs); - break; - } - case TENSOR_STATIC_SHAPED: { - auto next_name = layername_2_tensorname[layer_next_name]; - vector> shared_inputs{ - std::shared_ptr(&Tensor::gph_[input0.name()], [](Tensor*){}), - std::shared_ptr(&Tensor::gph_[input1.name()], [](Tensor*){})}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor*){})}; op_->setUp(shared_inputs, shared_outputs); assert(Tensor::gph_[next_name].hostPtr() != nullptr); break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { auto next_name = layername_2_tensorname[layer_next_name]; vector> shared_inputs{ - std::shared_ptr(&Tensor::gph_[input0.name()], [](Tensor*){}), - std::shared_ptr(&Tensor::gph_[input1.name()], [](Tensor*){})}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor*){})}; + std::shared_ptr(&Tensor::gph_[input0.name()], [](Tensor *) {}), + std::shared_ptr(&Tensor::gph_[input1.name()], [](Tensor *) {})}; + vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; op_->execute(shared_inputs, shared_outputs); assert(Tensor::gph_[next_name].hostPtr() != nullptr); - // Tensor::gph_[input0.name()].saveNData(input0.name()); - // Tensor::gph_[input1.name()].saveNData(input1.name()); - // Tensor::gph_[next_name].saveNData(layer_next_name); break; } default: { @@ -232,6 +219,9 @@ class Layer { } auto next_name = layername_2_tensorname[layer_next_name]; Tensor::gph_[next_name].status() = Tensor::gph_[input0.name()].status(); + // Tensor::gph_[input0.name()].saveNData(input0.name()); + // Tensor::gph_[input1.name()].saveNData(input1.name()); + // Tensor::gph_[next_name].saveNData(layer_next_name); return Tensor::gph_[next_name]; } Tensor &_3I1O_OP(Tensor &input0, Tensor &input1, Tensor &input2) { @@ -251,14 +241,12 @@ class Layer { if (Tensor::gph_.find(input2.name()) != Tensor::gph_.end()) { Tensor::gph_[input2.name()].status() = input0.status(); } - if ((Tensor::gph_.find(input0.name()) != Tensor::gph_.end()) && - Tensor::gph_.find(input1.name()) != Tensor::gph_.end()) { + if ((Tensor::gph_.find(input0.name()) != Tensor::gph_.end()) && Tensor::gph_.find(input1.name()) != Tensor::gph_.end()) { assert(input0.status() == input1.status()); - } - if ((Tensor::gph_.find(input0.name()) != Tensor::gph_.end()) && - Tensor::gph_.find(input2.name()) != Tensor::gph_.end()) { + } + if ((Tensor::gph_.find(input0.name()) != Tensor::gph_.end()) && Tensor::gph_.find(input2.name()) != Tensor::gph_.end()) { assert(input0.status() == input2.status()); - } + } switch (input0.status()) { case TENSOR_STATIC_INIT: { if (Tensor::gph_.find(input0.name()) == Tensor::gph_.end()) { @@ -273,7 +261,7 @@ class Layer { Tensor::gph_[input2.name()] = input2; Tensor::gph_[input2.name()].setName(input2.name()); } - if(layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { + if (layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { layername_2_tensorname[layer_next_name] = name_num_to_X(layer_next_name); } auto next_name = layername_2_tensorname[layer_next_name]; @@ -287,20 +275,11 @@ class Layer { std::shared_ptr(&Tensor::gph_[input2.name()], [](Tensor *) {})}; vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; op_->reshape(shared_inputs, shared_outputs); - break; - } - case TENSOR_STATIC_SHAPED: { - auto next_name = layername_2_tensorname[layer_next_name]; - vector> shared_inputs{ - std::shared_ptr(&Tensor::gph_[input0.name()], [](Tensor *) {}), - std::shared_ptr(&Tensor::gph_[input1.name()], [](Tensor *) {}), - std::shared_ptr(&Tensor::gph_[input2.name()], [](Tensor *) {})}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; op_->setUp(shared_inputs, shared_outputs); assert(Tensor::gph_[next_name].hostPtr() != nullptr); break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { auto next_name = layername_2_tensorname[layer_next_name]; vector> shared_inputs{ std::shared_ptr(&Tensor::gph_[input0.name()], [](Tensor *) {}), @@ -309,7 +288,6 @@ class Layer { vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; op_->execute(shared_inputs, shared_outputs); assert(Tensor::gph_[next_name].hostPtr() != nullptr); - // Tensor::gph_[next_name].saveNData(layer_next_name); break; } default: { @@ -318,6 +296,7 @@ class Layer { } auto next_name = layername_2_tensorname[layer_next_name]; Tensor::gph_[next_name].status() = Tensor::gph_[input0.name()].status(); + // Tensor::gph_[next_name].saveNData(layer_next_name); return Tensor::gph_[next_name]; } Tensor &_0I1O_OP() { @@ -329,8 +308,8 @@ class Layer { string layer_next_name = "param-" + op_->name(); switch (Module::tensor_status) { case TENSOR_STATIC_INIT: { - if(layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { - layername_2_tensorname[layer_next_name] = name_num_to_X(layer_next_name); + if (layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { + layername_2_tensorname[layer_next_name] = name_num_to_X(layer_next_name); } auto next_name = layername_2_tensorname[layer_next_name]; if (Tensor::gph_.find(next_name) == Tensor::gph_.end()) { @@ -338,29 +317,22 @@ class Layer { Tensor::gph_[next_name].setName(next_name); } vector> shared_inputs{}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor*){})}; + vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; op_->reshape(shared_inputs, shared_outputs); - break; - } - case TENSOR_STATIC_SHAPED: { - auto next_name = layername_2_tensorname[layer_next_name]; - vector> shared_inputs{}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor*){})}; op_->setUp(shared_inputs, shared_outputs); - if(Tensor::gph_[next_name].aggregated() == false) { + if (Tensor::gph_[next_name].aggregated() == false) { assert(Tensor::gph_[next_name].hostPtr() != nullptr); } break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { auto next_name = layername_2_tensorname[layer_next_name]; vector> shared_inputs{}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor*){})}; + vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; op_->execute(shared_inputs, shared_outputs); - if(Tensor::gph_[next_name].aggregated() == false) { + if (Tensor::gph_[next_name].aggregated() == false) { assert(Tensor::gph_[next_name].hostPtr() != nullptr); } - // Tensor::gph_[next_name].saveData(); break; } default: { @@ -369,6 +341,7 @@ class Layer { } auto next_name = layername_2_tensorname[layer_next_name]; Tensor::gph_[next_name].status() = Module::tensor_status; + // Tensor::gph_[next_name].saveNData(layer_next_name); return Tensor::gph_[next_name]; } vector _1INO_OP(Tensor &input, int N) { @@ -390,14 +363,14 @@ class Layer { if (Tensor::gph_.find(input.name()) == Tensor::gph_.end()) { Tensor::gph_[input.name()] = input; Tensor::gph_[input.name()].setName(input.name()); - }else if(input.count() != Tensor::gph_[input.name()].count()) { + } else if (input.count() != Tensor::gph_[input.name()].count()) { Tensor::gph_[input.name()] = input; Tensor::gph_[input.name()].setName(input.name()); } vector> shared_outputs = {}; vector next_names = {}; - for (const auto& layer_next_name : layer_next_names) { - if(layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { + for (const auto &layer_next_name : layer_next_names) { + if (layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { layername_2_tensorname[layer_next_name] = name_num_to_X(layer_next_name); } auto next_name = layername_2_tensorname[layer_next_name]; @@ -406,44 +379,28 @@ class Layer { Tensor::gph_[next_name].setName(next_name); } next_names.push_back(next_name); - shared_outputs.push_back(std::shared_ptr(&Tensor::gph_[next_name], [](Tensor*){})); + shared_outputs.push_back(std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})); } - vector> shared_inputs{std::shared_ptr(&Tensor::gph_[input.name()], [](Tensor*){})}; + vector> shared_inputs{std::shared_ptr(&Tensor::gph_[input.name()], [](Tensor *) {})}; op_->reshape(shared_inputs, shared_outputs); - break; - } - case TENSOR_STATIC_SHAPED: { - // auto next_name = layername_2_tensorname[layer_next_name]; - vector> shared_outputs = {}; - vector next_names = {}; - for (const auto& layer_next_name : layer_next_names) { - auto next_name = layername_2_tensorname[layer_next_name]; - next_names.push_back(next_name); - shared_outputs.push_back(std::shared_ptr(&Tensor::gph_[next_name], [](Tensor*){})); - } - if(Tensor::gph_[input.name()].aggregated() == false) { - assert(Tensor::gph_[input.name()].hostPtr() != nullptr); - } - vector> shared_inputs{std::shared_ptr(&Tensor::gph_[input.name()], [](Tensor*){})}; op_->setUp(shared_inputs, shared_outputs); break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { vector> shared_outputs = {}; vector next_names = {}; - for (const auto& layer_next_name : layer_next_names) { + for (const auto &layer_next_name : layer_next_names) { auto next_name = layername_2_tensorname[layer_next_name]; next_names.push_back(next_name); - shared_outputs.push_back(std::shared_ptr(&Tensor::gph_[next_name], [](Tensor*){})); + shared_outputs.push_back(std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})); } - if(Tensor::gph_[input.name()].aggregated() == false) { + if (Tensor::gph_[input.name()].aggregated() == false) { assert(Tensor::gph_[input.name()].hostPtr() != nullptr); } - vector> shared_inputs{std::shared_ptr(&Tensor::gph_[input.name()], [](Tensor*){})}; + vector> shared_inputs{std::shared_ptr(&Tensor::gph_[input.name()], [](Tensor *) {})}; op_->execute(shared_inputs, shared_outputs); for (int i = 0; i < shared_outputs.size(); ++i) { assert(Tensor::gph_[next_names[i]].hostPtr() != nullptr); - //Tensor::gph_[next_names[i]].saveNData(layer_next_names[i]); } break; } @@ -452,9 +409,10 @@ class Layer { } } vector output_result = {}; - for (const auto& layer_next_name : layer_next_names) { + for (const auto &layer_next_name : layer_next_names) { auto next_name = layername_2_tensorname[layer_next_name]; Tensor::gph_[next_name].status() = Tensor::gph_[input.name()].status(); + // Tensor::gph_[next_name].saveNData(layer_next_name); output_result.push_back(Tensor::gph_[next_name]); } return output_result; @@ -466,7 +424,6 @@ class Layer { OpParam param_; bool init_ = false; int saved_list_idx; - }; class Linear final : public Layer { @@ -537,13 +494,13 @@ class QuickGELU final : public Layer { } }; -using ActFnConstructor = std::function; +using ActFnConstructor = std::function; inline std::map ACT_FN = { - {"SiLU", []( const std::string& name) { return SiLU( name); }}, - {"ReLU", []( const std::string& name) { return ReLU( name); }}, - {"ReLU2", [](const std::string& name) { return ReLUSquaredActivation( name); }}, - {"GELU", [](const std::string& name) { return GELU( name); }}, - {"QuickGELU", []( const std::string& name) { return QuickGELU( name); }}, + {"SiLU", [](const std::string &name) { return SiLU(name); }}, + {"ReLU", [](const std::string &name) { return ReLU(name); }}, + {"ReLU2", [](const std::string &name) { return ReLUSquaredActivation(name); }}, + {"GELU", [](const std::string &name) { return GELU(name); }}, + {"QuickGELU", [](const std::string &name) { return QuickGELU(name); }}, }; class Softmax final : public Layer { @@ -609,7 +566,7 @@ class KVCache final : public Layer { class LayerNorm final : public Layer { public: - explicit LayerNorm(int norm_size, bool bias, float epsilon,std::string name) { + explicit LayerNorm(int norm_size, bool bias, float epsilon, std::string name) { param_["norm_size"] = norm_size; param_["epsilon"] = epsilon; param_["bias"] = (float)bias; @@ -632,10 +589,9 @@ class RMSNorm final : public Layer { } }; - class Matmul final : public Layer { public: - explicit Matmul(bool transpose0, bool transpose1, std::string name) { + explicit Matmul(bool transpose0, bool transpose1, std::string name) { param_["transpose0"] = transpose0; param_["transpose1"] = transpose1; init(std::move(name), OpType::MATMUL); @@ -645,14 +601,13 @@ class Matmul final : public Layer { } }; - class Split final : public Layer { public: Split() = default; explicit Split(int split_num, Chl split_dim, int split_dim_size, std::string name) { - param_["split_num"] =(float) split_num; - param_["split_dim"] =(float) split_dim; - param_["split_dim_size"] =(float) split_dim_size; + param_["split_num"] = (float)split_num; + param_["split_dim"] = (float)split_dim; + param_["split_dim_size"] = (float)split_dim_size; init(std::move(name), OpType::SPLIT); } vector operator()(Tensor &input) { @@ -663,14 +618,14 @@ class Split final : public Layer { class Convolution2D final : public Layer { public: explicit Convolution2D(int in_channel, int out_channel, vector kernal, vector stride, PaddingType padding, bool bias, std::string name) { - param_["in_channel"] =(float) in_channel; - param_["out_channel"] =(float) out_channel; - param_["kernal_h"] =(float) kernal[0]; - param_["kernal_w"] =(float) kernal[1]; - param_["stride_h"] =(float) stride[0]; - param_["stride_w"] =(float) stride[1]; - param_["padding"] =(float) padding; - param_["bias"] =(float) bias; + param_["in_channel"] = (float)in_channel; + param_["out_channel"] = (float)out_channel; + param_["kernal_h"] = (float)kernal[0]; + param_["kernal_w"] = (float)kernal[1]; + param_["stride_h"] = (float)stride[0]; + param_["stride_w"] = (float)stride[1]; + param_["padding"] = (float)padding; + param_["bias"] = (float)bias; init(std::move(name), OpType::CONVOLUTION2D); } Tensor &operator()(Tensor &input) { @@ -681,16 +636,16 @@ class Convolution2D final : public Layer { class Convolution3D final : public Layer { public: explicit Convolution3D(int in_channel, int out_channel, vector kernal, vector stride, PaddingType padding, bool bias, std::string name) { - param_["in_channel"] =(float) in_channel; - param_["out_channel"] =(float) out_channel; - param_["kernal_t"] =(float) kernal[0]; - param_["kernal_h"] =(float) kernal[1]; - param_["kernal_w"] =(float) kernal[2]; - param_["stride_t"] =(float) stride[0]; - param_["stride_h"] =(float) stride[1]; - param_["stride_w"] =(float) stride[2]; - param_["padding"] =(float) padding; - param_["bias"] =(float) bias; + param_["in_channel"] = (float)in_channel; + param_["out_channel"] = (float)out_channel; + param_["kernal_t"] = (float)kernal[0]; + param_["kernal_h"] = (float)kernal[1]; + param_["kernal_w"] = (float)kernal[2]; + param_["stride_t"] = (float)stride[0]; + param_["stride_h"] = (float)stride[1]; + param_["stride_w"] = (float)stride[2]; + param_["padding"] = (float)padding; + param_["bias"] = (float)bias; init(std::move(name), OpType::CONVOLUTION3D); } Tensor &operator()(Tensor &input) { @@ -701,7 +656,7 @@ class Convolution3D final : public Layer { class Concat final : public Layer { public: explicit Concat(Chl axis, std::string name) { - param_["axis"] =(float)axis; + param_["axis"] = (float)axis; init(std::move(name), OpType::CAT); } Tensor &operator()(Tensor &input0, Tensor &input1) { diff --git a/src/Module.hpp b/src/Module.hpp index fa17b2b6..8b06a230 100644 --- a/src/Module.hpp +++ b/src/Module.hpp @@ -68,15 +68,9 @@ class Module { Forward(inputs, anyArgs); for (auto &input : inputs) { - input.status() = TENSOR_STATIC_SHAPED; + input.status() = TENSOR_STATIC_READY; } - tensor_status = TENSOR_STATIC_SHAPED; - - Forward(inputs, anyArgs); - for (auto &input : inputs) { - input.status() = TENSOR_STATIC_ALLOCED; - } - tensor_status = TENSOR_STATIC_ALLOCED; + tensor_status = TENSOR_STATIC_READY; return Forward(inputs, anyArgs); } else { @@ -84,20 +78,6 @@ class Module { } } - // vector call(vector inputs, vector args) { - // return operator()(inputs, args); - // } - - // template - // static vector List(int n) { - // static_assert(std::is_base_of::value, "T must be a subclass of Module"); - // - // vector modules; - // for (int i = 0; i < n; i++) { - // modules.push_back(new T()); - // } - // return modules; - // } static int listIdx; static int runlistIdx; diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 81ee3033..527d367a 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -148,13 +148,11 @@ Tensor &Tensor::binaryCompute(Func operation, string append_s, float data) { gph_[next_name].setName(next_name); } CPUbinaryFunction::reshape(gph_[name_], gph_[next_name]); - break; - } - case TENSOR_STATIC_SHAPED: { + CPUbinaryFunction::setup(gph_[name_], gph_[next_name]); break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { CPUbinaryFunction::execute(gph_[name_], gph_[next_name], operation, data); break; } @@ -207,13 +205,11 @@ Tensor &Tensor::binaryTwoCompute(Func operation, string append_s, Tensor& other) gph_[next_name].setName(next_name); } CPUbinaryTwoFunction::reshape(gph_[name_], gph_[other.name_], gph_[next_name]); - break; - } - case TENSOR_STATIC_SHAPED: { + CPUbinaryTwoFunction::setup(gph_[name_], gph_[other.name_], gph_[next_name]); break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { CPUbinaryTwoFunction::execute(gph_[name_], gph_[other.name_], gph_[next_name], operation); break; } @@ -253,13 +249,11 @@ Tensor& Tensor::mean(Chl axis) { gph_[next_name].setName(next_name); } CPUmeanFunction::reshape(gph_[name_], gph_[next_name], axis); - break; - } - case TENSOR_STATIC_SHAPED: { + CPUmeanFunction::setup(gph_[name_], gph_[next_name], axis); break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { CPUmeanFunction::execute(gph_[name_], gph_[next_name], axis); break; } @@ -287,13 +281,11 @@ Tensor& Tensor::view(int b, int h, int s, int d) { gph_[next_name].setName(next_name); } CPUviewFunction::reshape(gph_[name_], gph_[next_name], b, h, s, d); - break; - } - case TENSOR_STATIC_SHAPED: { + CPUviewFunction::setup(gph_[name_], gph_[next_name], b, h, s, d); break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { CPUviewFunction::execute(gph_[name_], gph_[next_name]); break; } @@ -321,13 +313,11 @@ Tensor& Tensor::flatten(Chl axis_start, Chl axis_end) { gph_[next_name].setName(next_name); } CPUflattenFunction::reshape(gph_[name_], gph_[next_name], axis_start, axis_end); - break; - } - case TENSOR_STATIC_SHAPED: { + CPUflattenFunction::setup(gph_[name_], gph_[next_name], axis_start, axis_end); break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { CPUflattenFunction::execute(gph_[name_], gph_[next_name]); break; } @@ -376,9 +366,9 @@ Tensor &Tensor::transpose(vector> axiss) { gph_[next_name].changeCtype(gph_[name_].shape().size()); gph_[next_name].undiffusion_ = true; } - break; - } - case TENSOR_STATIC_SHAPED: { + // break; + // } + // case TENSOR_STATIC_SHAPED: { if(gph_[name_].masterTensor() != nullptr) { if (gph_[next_name].master_tensor_ == nullptr) { gph_[next_name].setDtype(gph_[name_].dtype()); @@ -396,7 +386,7 @@ Tensor &Tensor::transpose(vector> axiss) { } break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { break; } default: { @@ -424,13 +414,11 @@ Tensor &Tensor::clip(vector b, vector h, vector s, vector d) gph_[next_name].setName(next_name); } CPUclipFunction::reshape(gph_[name_], gph_[next_name], b, h, s, d); - break; - } - case TENSOR_STATIC_SHAPED: { + CPUclipFunction::setup(gph_[name_], gph_[next_name], b, h, s, d); break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { CPUclipFunction::execute(gph_[name_], gph_[next_name], b, h, s, d); break; } @@ -459,13 +447,11 @@ Tensor &Tensor::clip(Chl keep_axis, vector b, vector h, vector s, gph_[next_name].setName(next_name); } CPUclipaxisFunction::reshape(gph_[name_], gph_[next_name], keep_axis, b, h, s, d); - break; - } - case TENSOR_STATIC_SHAPED: { + CPUclipaxisFunction::setup(gph_[name_], gph_[next_name], keep_axis, b, h, s, d); break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { CPUclipaxisFunction::execute(gph_[name_], gph_[next_name], keep_axis, b, h, s, d); break; } @@ -502,13 +488,10 @@ Tensor &Tensor::cat(vector input_tensors, Chl axis) { gph_[next_name].setName(next_name); } CPUcatFunction::reshape(inputs, gph_[next_name], axis, expd_batch_, expd_batch_input_idx); - break; - } - case TENSOR_STATIC_SHAPED: { CPUcatFunction::setup(inputs, gph_[next_name], axis, expd_batch_, expd_batch_input_idx); break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { CPUcatFunction::execute(inputs, gph_[next_name], axis, expd_batch_, expd_batch_input_idx); break; } @@ -536,13 +519,10 @@ Tensor &Tensor::mm(Tensor& input0, Tensor& input1) { } else { CPUmmFunction::reshape(gph_[input0.name()], gph_[input1.name()], gph_[next_name]); } - break; - } - case TENSOR_STATIC_SHAPED: { CPUmmFunction::setup(gph_[input0.name()], gph_[input1.name()], gph_[next_name]); break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { CPUmmFunction::execute(gph_[input0.name()], gph_[input1.name()], gph_[next_name]); break; } @@ -571,13 +551,10 @@ Tensor& Tensor::norm(int L_n) { gph_[next_name].setName(next_name); } CPUnormFunction::reshape(gph_[name_], gph_[next_name], L_n); - break; - } - case TENSOR_STATIC_SHAPED: { CPUnormFunction::setup(gph_[name_], gph_[next_name], L_n); break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { CPUnormFunction::execute(gph_[name_], gph_[next_name], L_n); break; } @@ -604,13 +581,10 @@ Tensor& Tensor::where(float value, Chl axis) { gph_[next_name].setName(next_name); } CPUwhereFunction::reshape(gph_[name_], gph_[next_name], value, axis); - break; - } - case TENSOR_STATIC_SHAPED: { CPUwhereFunction::setup(gph_[name_], gph_[next_name], value, axis); break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { CPUwhereFunction::execute(gph_[name_], gph_[next_name], value, axis); break; } @@ -635,13 +609,10 @@ Tensor& Tensor::range(int start, int end) { gph_[next_name].setName(next_name); } CPURangeFunction::reshape(gph_[next_name], start, end); - break; - } - case TENSOR_STATIC_SHAPED: { CPURangeFunction::setup(gph_[next_name], start, end); break; } - case TENSOR_STATIC_ALLOCED: { + case TENSOR_STATIC_READY: { CPURangeFunction::execute(gph_[next_name], start, end); range_name_idx++; break; diff --git a/src/Tensor.hpp b/src/Tensor.hpp index 3e791217..4dc270e9 100644 --- a/src/Tensor.hpp +++ b/src/Tensor.hpp @@ -646,6 +646,16 @@ class Tensor { assert(source.count() == count()); memcpy(host_ptr_, source.host_ptr_, cntSize()); } + void initFrom(const Tensor &source) { + dtype_ = source.dtype(); + chls_ = source.chls_; + ctype_ = source.ctype_; + shape_ = source.shape_; + count_ = source.count_; + if(source.host_ptr_!= nullptr) { + alloc(); + } + } void copyFrom(const shared_ptr &source) { assert(masterTensor() == nullptr); assert(source->dtype() == dtype()); @@ -866,7 +876,7 @@ class Tensor { master_tensor_ = master_tensor; } - vector childTensors() { + vector &childTensors() { return child_tensors_; } void addChildTensor(Tensor *child) { @@ -1201,7 +1211,8 @@ class Tensor { template void saveNData(string new_name = "", string ex = "") { - if (status() == TENSOR_STATIC_ALLOCED || (TENSOR_STATIC_SHAPED == status()&& shape().size()>0)) { + // if (status() == TENSOR_STATIC_ALLOCED || (TENSOR_STATIC_SHAPED == status()&& shape().size()>0)) { + if (status() == TENSOR_STATIC_READY && shape().size()>0) { if (ctype() == BTHWC || ctype() == BCTHW) { save5Data(ex); return; diff --git a/src/backends/cpu/CPUTensorFunction.hpp b/src/backends/cpu/CPUTensorFunction.hpp index 301f115a..e6a4f077 100644 --- a/src/backends/cpu/CPUTensorFunction.hpp +++ b/src/backends/cpu/CPUTensorFunction.hpp @@ -29,6 +29,25 @@ class CPUmmFunction { input.reshape(b, h, s, d); input.transed() = true; input.undiffusion() = false; + // if no TENSOR_STATIC_SHAPED + if (input.masterTensor() != nullptr) { + auto b = input.masterTensor()->batch(); + auto h = input.masterTensor()->head(); + auto d = input.masterTensor()->dimension(); + auto s = input.masterTensor()->sequence(); + input.masterTensor()->chls_ = input.chls_; + input.masterTensor()->changeCtype(); + input.masterTensor()->reshape(b, h, s, d); + for (auto child : input.masterTensor()->childTensors()) { + auto b = child->batch(); + auto h = child->head(); + auto d = child->dimension(); + auto s = child->sequence(); + child->chls_ = input.chls_; + child->changeCtype(); + child->reshape(b, h, s, d); + } + } } public: static void reshape(Tensor &input0, Tensor &input1, Tensor &output) { From 32cceab829bc894b2c1d2f2ab8557bf15c95cdfe Mon Sep 17 00:00:00 2001 From: yirongjie Date: Thu, 14 Mar 2024 19:46:16 +0800 Subject: [PATCH 2/6] fix: static load --- src/Layer.hpp | 556 +++++++++++++------------ src/Module.cpp | 2 +- src/Module.hpp | 14 + src/Tensor.cpp | 49 ++- src/Tensor.hpp | 139 ++++--- src/backends/cpu/CPUTensorFunction.hpp | 20 +- 6 files changed, 405 insertions(+), 375 deletions(-) diff --git a/src/Layer.hpp b/src/Layer.hpp index d8fb9ccb..ba6b7fe6 100644 --- a/src/Layer.hpp +++ b/src/Layer.hpp @@ -29,10 +29,6 @@ class Layer { backend_ = Module::backends[MLLM_CPU]; saved_list_idx = Module::listIdx; init_ = true; - // std::cout<opCreate(param_, std::move(name), threadCount); - // op_->load(*Module::loader); } bool ready() { return init_; @@ -94,282 +90,225 @@ class Layer { } protected: - Tensor &_1I1O_OP(Tensor &input) { - Module::runlistIdx = saved_list_idx; + bool INIT_OP() { if (op_ == nullptr) { op_ = backend_->opCreate(param_, name_, cpu_thread); - op_->load(*Module::loader); } - - string layer_next_name = "out-" + op_->name(); - if (Tensor::gph_.find(input.name()) != Tensor::gph_.end()) { - Tensor::gph_[input.name()].status() = input.status(); + if (Module::doLoad) { + op_->load(*Module::loader); } - switch (input.status()) { - case TENSOR_STATIC_INIT: { - if (Tensor::gph_.find(input.name()) == Tensor::gph_.end()) { - Tensor::gph_[input.name()] = input; - Tensor::gph_[input.name()].setName(input.name()); - } else if (input.count() != Tensor::gph_[input.name()].count()) { - Tensor::gph_[input.name()] = input; - Tensor::gph_[input.name()].setName(input.name()); - } - auto in_name = input.name(); - if (layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { - if (param_["type"] == KVCACHE) { - layername_2_tensorname[layer_next_name] = layer_next_name; - reset_KVCache(input.name()); - in_name = name_X_to_num(in_name, saved_list_idx); - } else { - layername_2_tensorname[layer_next_name] = name_num_to_X(layer_next_name); + return Module::doLoad; + } + Tensor &_1I1O_OP(Tensor &input) { + Module::runlistIdx = saved_list_idx; + if (INIT_OP()) { + return input; + } else { + string layer_next_name = "out-" + op_->name(); + if (Tensor::gph_.find(input.name()) != Tensor::gph_.end()) { + Tensor::gph_[input.name()].status() = input.status(); + } + switch (input.status()) { + case TENSOR_STATIC_INIT: { + if (Tensor::gph_.find(input.name()) == Tensor::gph_.end()) { + Tensor::gph_[input.name()] = input; + Tensor::gph_[input.name()].setName(input.name()); + } else if (input.count() != Tensor::gph_[input.name()].count()) { + Tensor::gph_[input.name()] = input; + Tensor::gph_[input.name()].setName(input.name()); } + auto in_name = input.name(); + if (layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { + if (param_["type"] == KVCACHE) { + layername_2_tensorname[layer_next_name] = layer_next_name; + reset_KVCache(input.name()); + in_name = name_X_to_num(in_name, saved_list_idx); + } else { + layername_2_tensorname[layer_next_name] = name_num_to_X(layer_next_name); + } + } + auto next_name = layername_2_tensorname[layer_next_name]; + if (Tensor::gph_.find(next_name) == Tensor::gph_.end()) { + Tensor::gph_[next_name] = Tensor(backend_); + Tensor::gph_[next_name].setName(next_name); + } + vector> shared_inputs{std::shared_ptr(&Tensor::gph_[in_name], [](Tensor *) {})}; + vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; + op_->reshape(shared_inputs, shared_outputs); + op_->setUp(shared_inputs, shared_outputs); + if (Tensor::gph_[next_name].aggregated() == false) { + assert(Tensor::gph_[next_name].hostPtr() != nullptr); + } + break; } - auto next_name = layername_2_tensorname[layer_next_name]; - if (Tensor::gph_.find(next_name) == Tensor::gph_.end()) { - Tensor::gph_[next_name] = Tensor(backend_); - Tensor::gph_[next_name].setName(next_name); - } - vector> shared_inputs{std::shared_ptr(&Tensor::gph_[in_name], [](Tensor *) {})}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; - op_->reshape(shared_inputs, shared_outputs); - op_->setUp(shared_inputs, shared_outputs); - if (Tensor::gph_[next_name].aggregated() == false) { - assert(Tensor::gph_[next_name].hostPtr() != nullptr); + case TENSOR_STATIC_READY: { + auto next_name = layername_2_tensorname[layer_next_name]; + assert(Tensor::gph_[input.name()].hostPtr() != nullptr); + vector> shared_inputs{std::shared_ptr(&Tensor::gph_[input.name()], [](Tensor *) {})}; + vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; + op_->execute(shared_inputs, shared_outputs); + if (Tensor::gph_[next_name].aggregated() == false) { + assert(Tensor::gph_[next_name].hostPtr() != nullptr); + } + break; } - break; - } - case TENSOR_STATIC_READY: { - auto next_name = layername_2_tensorname[layer_next_name]; - assert(Tensor::gph_[input.name()].hostPtr() != nullptr); - vector> shared_inputs{std::shared_ptr(&Tensor::gph_[input.name()], [](Tensor *) {})}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; - op_->execute(shared_inputs, shared_outputs); - if (Tensor::gph_[next_name].aggregated() == false) { - assert(Tensor::gph_[next_name].hostPtr() != nullptr); + default: { + break; } - break; - } - default: { - break; - } + } + auto next_name = layername_2_tensorname[layer_next_name]; + Tensor::gph_[next_name].status() = Tensor::gph_[input.name()].status(); + // Tensor::gph_[next_name].saveNData(layer_next_name); + return Tensor::gph_[next_name]; } - auto next_name = layername_2_tensorname[layer_next_name]; - Tensor::gph_[next_name].status() = Tensor::gph_[input.name()].status(); - // Tensor::gph_[next_name].saveNData(layer_next_name); - return Tensor::gph_[next_name]; } Tensor &_2I1O_OP(Tensor &input0, Tensor &input1) { Module::runlistIdx = saved_list_idx; - if (op_ == nullptr) { - op_ = backend_->opCreate(param_, name_, cpu_thread); - op_->load(*Module::loader); - } - - string layer_next_name = "out-" + op_->name(); - if (Tensor::gph_.find(input0.name()) != Tensor::gph_.end()) { - Tensor::gph_[input0.name()].status() = input0.status(); - } + if (INIT_OP()) { + return input0; + } else { + string layer_next_name = "out-" + op_->name(); + if (Tensor::gph_.find(input0.name()) != Tensor::gph_.end()) { + Tensor::gph_[input0.name()].status() = input0.status(); + } - if (Tensor::gph_.find(input1.name()) != Tensor::gph_.end()) { - Tensor::gph_[input1.name()].status() = input0.status(); - } - if ((Tensor::gph_.find(input0.name()) != Tensor::gph_.end()) && Tensor::gph_.find(input1.name()) != Tensor::gph_.end()) { - assert(input0.status() == input1.status()); - } - switch (input0.status()) { - case TENSOR_STATIC_INIT: { - if (Tensor::gph_.find(input0.name()) == Tensor::gph_.end()) { - Tensor::gph_[input0.name()] = input0; - Tensor::gph_[input0.name()].setName(input0.name()); + if (Tensor::gph_.find(input1.name()) != Tensor::gph_.end()) { + Tensor::gph_[input1.name()].status() = input0.status(); } - if (Tensor::gph_.find(input1.name()) == Tensor::gph_.end()) { - Tensor::gph_[input1.name()] = input1; - Tensor::gph_[input1.name()].setName(input1.name()); + if ((Tensor::gph_.find(input0.name()) != Tensor::gph_.end()) && Tensor::gph_.find(input1.name()) != Tensor::gph_.end()) { + assert(input0.status() == input1.status()); + } + switch (input0.status()) { + case TENSOR_STATIC_INIT: { + if (Tensor::gph_.find(input0.name()) == Tensor::gph_.end()) { + Tensor::gph_[input0.name()] = input0; + Tensor::gph_[input0.name()].setName(input0.name()); + } + if (Tensor::gph_.find(input1.name()) == Tensor::gph_.end()) { + Tensor::gph_[input1.name()] = input1; + Tensor::gph_[input1.name()].setName(input1.name()); + } + if (layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { + layername_2_tensorname[layer_next_name] = name_num_to_X(layer_next_name); + } + auto next_name = layername_2_tensorname[layer_next_name]; + if (Tensor::gph_.find(next_name) == Tensor::gph_.end()) { + Tensor::gph_[next_name] = Tensor(backend_); + Tensor::gph_[next_name].setName(next_name); + } + vector> shared_inputs{ + std::shared_ptr(&Tensor::gph_[input0.name()], [](Tensor *) {}), + std::shared_ptr(&Tensor::gph_[input1.name()], [](Tensor *) {})}; + vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; + op_->reshape(shared_inputs, shared_outputs); + op_->setUp(shared_inputs, shared_outputs); + assert(Tensor::gph_[next_name].hostPtr() != nullptr); + break; + } + case TENSOR_STATIC_READY: { + auto next_name = layername_2_tensorname[layer_next_name]; + vector> shared_inputs{ + std::shared_ptr(&Tensor::gph_[input0.name()], [](Tensor *) {}), + std::shared_ptr(&Tensor::gph_[input1.name()], [](Tensor *) {})}; + vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; + op_->execute(shared_inputs, shared_outputs); + assert(Tensor::gph_[next_name].hostPtr() != nullptr); + break; + } + default: { + break; } - if (layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { - layername_2_tensorname[layer_next_name] = name_num_to_X(layer_next_name); } auto next_name = layername_2_tensorname[layer_next_name]; - if (Tensor::gph_.find(next_name) == Tensor::gph_.end()) { - Tensor::gph_[next_name] = Tensor(backend_); - Tensor::gph_[next_name].setName(next_name); - } - vector> shared_inputs{ - std::shared_ptr(&Tensor::gph_[input0.name()], [](Tensor *) {}), - std::shared_ptr(&Tensor::gph_[input1.name()], [](Tensor *) {})}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; - op_->reshape(shared_inputs, shared_outputs); - op_->setUp(shared_inputs, shared_outputs); - assert(Tensor::gph_[next_name].hostPtr() != nullptr); - break; - } - case TENSOR_STATIC_READY: { - auto next_name = layername_2_tensorname[layer_next_name]; - vector> shared_inputs{ - std::shared_ptr(&Tensor::gph_[input0.name()], [](Tensor *) {}), - std::shared_ptr(&Tensor::gph_[input1.name()], [](Tensor *) {})}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; - op_->execute(shared_inputs, shared_outputs); - assert(Tensor::gph_[next_name].hostPtr() != nullptr); - break; - } - default: { - break; - } + Tensor::gph_[next_name].status() = Tensor::gph_[input0.name()].status(); + // Tensor::gph_[input0.name()].saveNData(input0.name()); + // Tensor::gph_[input1.name()].saveNData(input1.name()); + // Tensor::gph_[next_name].saveNData(layer_next_name); + return Tensor::gph_[next_name]; } - auto next_name = layername_2_tensorname[layer_next_name]; - Tensor::gph_[next_name].status() = Tensor::gph_[input0.name()].status(); - // Tensor::gph_[input0.name()].saveNData(input0.name()); - // Tensor::gph_[input1.name()].saveNData(input1.name()); - // Tensor::gph_[next_name].saveNData(layer_next_name); - return Tensor::gph_[next_name]; } Tensor &_3I1O_OP(Tensor &input0, Tensor &input1, Tensor &input2) { Module::runlistIdx = saved_list_idx; - if (op_ == nullptr) { - op_ = backend_->opCreate(param_, name_, cpu_thread); - op_->load(*Module::loader); - } - - string layer_next_name = "out-" + op_->name(); - if (Tensor::gph_.find(input0.name()) != Tensor::gph_.end()) { - Tensor::gph_[input0.name()].status() = input0.status(); - } - if (Tensor::gph_.find(input1.name()) != Tensor::gph_.end()) { - Tensor::gph_[input1.name()].status() = input0.status(); - } - if (Tensor::gph_.find(input2.name()) != Tensor::gph_.end()) { - Tensor::gph_[input2.name()].status() = input0.status(); - } - if ((Tensor::gph_.find(input0.name()) != Tensor::gph_.end()) && Tensor::gph_.find(input1.name()) != Tensor::gph_.end()) { - assert(input0.status() == input1.status()); - } - if ((Tensor::gph_.find(input0.name()) != Tensor::gph_.end()) && Tensor::gph_.find(input2.name()) != Tensor::gph_.end()) { - assert(input0.status() == input2.status()); - } - switch (input0.status()) { - case TENSOR_STATIC_INIT: { - if (Tensor::gph_.find(input0.name()) == Tensor::gph_.end()) { - Tensor::gph_[input0.name()] = input0; - Tensor::gph_[input0.name()].setName(input0.name()); + if (INIT_OP()) { + return input0; + } else { + string layer_next_name = "out-" + op_->name(); + if (Tensor::gph_.find(input0.name()) != Tensor::gph_.end()) { + Tensor::gph_[input0.name()].status() = input0.status(); } - if (Tensor::gph_.find(input1.name()) == Tensor::gph_.end()) { - Tensor::gph_[input1.name()] = input1; - Tensor::gph_[input1.name()].setName(input1.name()); + if (Tensor::gph_.find(input1.name()) != Tensor::gph_.end()) { + Tensor::gph_[input1.name()].status() = input0.status(); } - if (Tensor::gph_.find(input2.name()) == Tensor::gph_.end()) { - Tensor::gph_[input2.name()] = input2; - Tensor::gph_[input2.name()].setName(input2.name()); + if (Tensor::gph_.find(input2.name()) != Tensor::gph_.end()) { + Tensor::gph_[input2.name()].status() = input0.status(); } - if (layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { - layername_2_tensorname[layer_next_name] = name_num_to_X(layer_next_name); + if ((Tensor::gph_.find(input0.name()) != Tensor::gph_.end()) && Tensor::gph_.find(input1.name()) != Tensor::gph_.end()) { + assert(input0.status() == input1.status()); } - auto next_name = layername_2_tensorname[layer_next_name]; - if (Tensor::gph_.find(next_name) == Tensor::gph_.end()) { - Tensor::gph_[next_name] = Tensor(backend_); - Tensor::gph_[next_name].setName(next_name); - } - vector> shared_inputs{ - std::shared_ptr(&Tensor::gph_[input0.name()], [](Tensor *) {}), - std::shared_ptr(&Tensor::gph_[input1.name()], [](Tensor *) {}), - std::shared_ptr(&Tensor::gph_[input2.name()], [](Tensor *) {})}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; - op_->reshape(shared_inputs, shared_outputs); - op_->setUp(shared_inputs, shared_outputs); - assert(Tensor::gph_[next_name].hostPtr() != nullptr); - break; - } - case TENSOR_STATIC_READY: { - auto next_name = layername_2_tensorname[layer_next_name]; - vector> shared_inputs{ - std::shared_ptr(&Tensor::gph_[input0.name()], [](Tensor *) {}), - std::shared_ptr(&Tensor::gph_[input1.name()], [](Tensor *) {}), - std::shared_ptr(&Tensor::gph_[input2.name()], [](Tensor *) {})}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; - op_->execute(shared_inputs, shared_outputs); - assert(Tensor::gph_[next_name].hostPtr() != nullptr); - break; - } - default: { - break; - } - } - auto next_name = layername_2_tensorname[layer_next_name]; - Tensor::gph_[next_name].status() = Tensor::gph_[input0.name()].status(); - // Tensor::gph_[next_name].saveNData(layer_next_name); - return Tensor::gph_[next_name]; - } - Tensor &_0I1O_OP() { - Module::runlistIdx = saved_list_idx; - if (op_ == nullptr) { - op_ = backend_->opCreate(param_, name_, cpu_thread); - op_->load(*Module::loader); - } - string layer_next_name = "param-" + op_->name(); - switch (Module::tensor_status) { - case TENSOR_STATIC_INIT: { - if (layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { - layername_2_tensorname[layer_next_name] = name_num_to_X(layer_next_name); + if ((Tensor::gph_.find(input0.name()) != Tensor::gph_.end()) && Tensor::gph_.find(input2.name()) != Tensor::gph_.end()) { + assert(input0.status() == input2.status()); } - auto next_name = layername_2_tensorname[layer_next_name]; - if (Tensor::gph_.find(next_name) == Tensor::gph_.end()) { - Tensor::gph_[next_name] = Tensor(backend_); - Tensor::gph_[next_name].setName(next_name); - } - vector> shared_inputs{}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; - op_->reshape(shared_inputs, shared_outputs); - op_->setUp(shared_inputs, shared_outputs); - if (Tensor::gph_[next_name].aggregated() == false) { + switch (input0.status()) { + case TENSOR_STATIC_INIT: { + if (Tensor::gph_.find(input0.name()) == Tensor::gph_.end()) { + Tensor::gph_[input0.name()] = input0; + Tensor::gph_[input0.name()].setName(input0.name()); + } + if (Tensor::gph_.find(input1.name()) == Tensor::gph_.end()) { + Tensor::gph_[input1.name()] = input1; + Tensor::gph_[input1.name()].setName(input1.name()); + } + if (Tensor::gph_.find(input2.name()) == Tensor::gph_.end()) { + Tensor::gph_[input2.name()] = input2; + Tensor::gph_[input2.name()].setName(input2.name()); + } + if (layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { + layername_2_tensorname[layer_next_name] = name_num_to_X(layer_next_name); + } + auto next_name = layername_2_tensorname[layer_next_name]; + if (Tensor::gph_.find(next_name) == Tensor::gph_.end()) { + Tensor::gph_[next_name] = Tensor(backend_); + Tensor::gph_[next_name].setName(next_name); + } + vector> shared_inputs{ + std::shared_ptr(&Tensor::gph_[input0.name()], [](Tensor *) {}), + std::shared_ptr(&Tensor::gph_[input1.name()], [](Tensor *) {}), + std::shared_ptr(&Tensor::gph_[input2.name()], [](Tensor *) {})}; + vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; + op_->reshape(shared_inputs, shared_outputs); + op_->setUp(shared_inputs, shared_outputs); assert(Tensor::gph_[next_name].hostPtr() != nullptr); + break; } - break; - } - case TENSOR_STATIC_READY: { - auto next_name = layername_2_tensorname[layer_next_name]; - vector> shared_inputs{}; - vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; - op_->execute(shared_inputs, shared_outputs); - if (Tensor::gph_[next_name].aggregated() == false) { + case TENSOR_STATIC_READY: { + auto next_name = layername_2_tensorname[layer_next_name]; + vector> shared_inputs{ + std::shared_ptr(&Tensor::gph_[input0.name()], [](Tensor *) {}), + std::shared_ptr(&Tensor::gph_[input1.name()], [](Tensor *) {}), + std::shared_ptr(&Tensor::gph_[input2.name()], [](Tensor *) {})}; + vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; + op_->execute(shared_inputs, shared_outputs); assert(Tensor::gph_[next_name].hostPtr() != nullptr); + break; } - break; - } - default: { - break; - } + default: { + break; + } + } + auto next_name = layername_2_tensorname[layer_next_name]; + Tensor::gph_[next_name].status() = Tensor::gph_[input0.name()].status(); + // Tensor::gph_[next_name].saveNData(layer_next_name); + return Tensor::gph_[next_name]; } - auto next_name = layername_2_tensorname[layer_next_name]; - Tensor::gph_[next_name].status() = Module::tensor_status; - // Tensor::gph_[next_name].saveNData(layer_next_name); - return Tensor::gph_[next_name]; } - vector _1INO_OP(Tensor &input, int N) { + Tensor &_0I1O_OP() { Module::runlistIdx = saved_list_idx; - if (op_ == nullptr) { - op_ = backend_->opCreate(param_, name_, cpu_thread); - op_->load(*Module::loader); - } - if (Tensor::gph_.find(input.name()) != Tensor::gph_.end()) { - Tensor::gph_[input.name()].status() = input.status(); - } - - vector layer_next_names = {}; - for (int i = 0; i < N; ++i) { - layer_next_names.push_back("out-" + op_->name() + "-" + std::to_string(i)); - } - switch (input.status()) { - case TENSOR_STATIC_INIT: { - if (Tensor::gph_.find(input.name()) == Tensor::gph_.end()) { - Tensor::gph_[input.name()] = input; - Tensor::gph_[input.name()].setName(input.name()); - } else if (input.count() != Tensor::gph_[input.name()].count()) { - Tensor::gph_[input.name()] = input; - Tensor::gph_[input.name()].setName(input.name()); - } - vector> shared_outputs = {}; - vector next_names = {}; - for (const auto &layer_next_name : layer_next_names) { + if (INIT_OP()) { + return Tensor::gph_["0"]; + } else { + string layer_next_name = "param-" + op_->name(); + switch (Module::tensor_status) { + case TENSOR_STATIC_INIT: { if (layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { layername_2_tensorname[layer_next_name] = name_num_to_X(layer_next_name); } @@ -378,44 +317,107 @@ class Layer { Tensor::gph_[next_name] = Tensor(backend_); Tensor::gph_[next_name].setName(next_name); } - next_names.push_back(next_name); - shared_outputs.push_back(std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})); + vector> shared_inputs{}; + vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; + op_->reshape(shared_inputs, shared_outputs); + op_->setUp(shared_inputs, shared_outputs); + if (Tensor::gph_[next_name].aggregated() == false) { + assert(Tensor::gph_[next_name].hostPtr() != nullptr); + } + break; } - vector> shared_inputs{std::shared_ptr(&Tensor::gph_[input.name()], [](Tensor *) {})}; - op_->reshape(shared_inputs, shared_outputs); - op_->setUp(shared_inputs, shared_outputs); - break; - } - case TENSOR_STATIC_READY: { - vector> shared_outputs = {}; - vector next_names = {}; - for (const auto &layer_next_name : layer_next_names) { + case TENSOR_STATIC_READY: { auto next_name = layername_2_tensorname[layer_next_name]; - next_names.push_back(next_name); - shared_outputs.push_back(std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})); + vector> shared_inputs{}; + vector> shared_outputs{std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})}; + op_->execute(shared_inputs, shared_outputs); + if (Tensor::gph_[next_name].aggregated() == false) { + assert(Tensor::gph_[next_name].hostPtr() != nullptr); + } + break; } - if (Tensor::gph_[input.name()].aggregated() == false) { - assert(Tensor::gph_[input.name()].hostPtr() != nullptr); + default: { + break; } - vector> shared_inputs{std::shared_ptr(&Tensor::gph_[input.name()], [](Tensor *) {})}; - op_->execute(shared_inputs, shared_outputs); - for (int i = 0; i < shared_outputs.size(); ++i) { - assert(Tensor::gph_[next_names[i]].hostPtr() != nullptr); } - break; - } - default: { - break; - } - } - vector output_result = {}; - for (const auto &layer_next_name : layer_next_names) { auto next_name = layername_2_tensorname[layer_next_name]; - Tensor::gph_[next_name].status() = Tensor::gph_[input.name()].status(); + Tensor::gph_[next_name].status() = Module::tensor_status; // Tensor::gph_[next_name].saveNData(layer_next_name); - output_result.push_back(Tensor::gph_[next_name]); + return Tensor::gph_[next_name]; + } + } + vector _1INO_OP(Tensor &input, int N) { + Module::runlistIdx = saved_list_idx; + if (INIT_OP()) { + return {input}; + } else { + if (Tensor::gph_.find(input.name()) != Tensor::gph_.end()) { + Tensor::gph_[input.name()].status() = input.status(); + } + + vector layer_next_names = {}; + for (int i = 0; i < N; ++i) { + layer_next_names.push_back("out-" + op_->name() + "-" + std::to_string(i)); + } + switch (input.status()) { + case TENSOR_STATIC_INIT: { + if (Tensor::gph_.find(input.name()) == Tensor::gph_.end()) { + Tensor::gph_[input.name()] = input; + Tensor::gph_[input.name()].setName(input.name()); + } else if (input.count() != Tensor::gph_[input.name()].count()) { + Tensor::gph_[input.name()] = input; + Tensor::gph_[input.name()].setName(input.name()); + } + vector> shared_outputs = {}; + vector next_names = {}; + for (const auto &layer_next_name : layer_next_names) { + if (layername_2_tensorname.find(layer_next_name) == layername_2_tensorname.end()) { + layername_2_tensorname[layer_next_name] = name_num_to_X(layer_next_name); + } + auto next_name = layername_2_tensorname[layer_next_name]; + if (Tensor::gph_.find(next_name) == Tensor::gph_.end()) { + Tensor::gph_[next_name] = Tensor(backend_); + Tensor::gph_[next_name].setName(next_name); + } + next_names.push_back(next_name); + shared_outputs.push_back(std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})); + } + vector> shared_inputs{std::shared_ptr(&Tensor::gph_[input.name()], [](Tensor *) {})}; + op_->reshape(shared_inputs, shared_outputs); + op_->setUp(shared_inputs, shared_outputs); + break; + } + case TENSOR_STATIC_READY: { + vector> shared_outputs = {}; + vector next_names = {}; + for (const auto &layer_next_name : layer_next_names) { + auto next_name = layername_2_tensorname[layer_next_name]; + next_names.push_back(next_name); + shared_outputs.push_back(std::shared_ptr(&Tensor::gph_[next_name], [](Tensor *) {})); + } + if (Tensor::gph_[input.name()].aggregated() == false) { + assert(Tensor::gph_[input.name()].hostPtr() != nullptr); + } + vector> shared_inputs{std::shared_ptr(&Tensor::gph_[input.name()], [](Tensor *) {})}; + op_->execute(shared_inputs, shared_outputs); + for (int i = 0; i < shared_outputs.size(); ++i) { + assert(Tensor::gph_[next_names[i]].hostPtr() != nullptr); + } + break; + } + default: { + break; + } + } + vector output_result = {}; + for (const auto &layer_next_name : layer_next_names) { + auto next_name = layername_2_tensorname[layer_next_name]; + Tensor::gph_[next_name].status() = Tensor::gph_[input.name()].status(); + // Tensor::gph_[next_name].saveNData(layer_next_name); + output_result.push_back(Tensor::gph_[next_name]); + } + return output_result; } - return output_result; } std::string name_; diff --git a/src/Module.cpp b/src/Module.cpp index c902f630..f6d489b8 100644 --- a/src/Module.cpp +++ b/src/Module.cpp @@ -11,5 +11,5 @@ ParamLoader *Module::loader; int Module::listIdx; int Module::runlistIdx; TensorStatus Module::tensor_status; - +bool Module::doLoad = false; } // namespace mllm \ No newline at end of file diff --git a/src/Module.hpp b/src/Module.hpp index 8b06a230..e85f7624 100644 --- a/src/Module.hpp +++ b/src/Module.hpp @@ -21,6 +21,7 @@ class Module { static map backends; static ParamLoader *loader; static TensorStatus tensor_status; + static bool doLoad; Module() = default; virtual ~Module() = default; @@ -48,6 +49,16 @@ class Module { void load(string path) { initLoader(path); + Module::doLoad = true; + vector tmps; + int max_in_size = 5; + for (int i = 0; i < max_in_size; ++i) { + Tensor::gph_[std::to_string(i)] = Tensor(); + tmps.push_back(Tensor::gph_[std::to_string(i)]); + } + operator()(tmps, 0); + Module::doLoad = false; + Tensor::gph_.clear(); } virtual vector Forward(vector inputs, vector args) = 0; @@ -58,6 +69,9 @@ class Module { } template vector operator()(vector inputs, Args... args) { + if(doLoad) { + return Forward(inputs, {}); + } vector anyArgs = convertArgsToAnyVector(args...); if (inputs[0].ttype() == TensorType::INPUT_TENSOR) { for (auto &input : inputs) { diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 527d367a..fdbbcb72 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -27,10 +27,10 @@ Tensor::Tensor(const vector &shape) : bool Tensor::reshape(const int batch, const int head, const int sequence, const int dimension) { vector shape(4); - shape[chls_[BATCH]] = batch; - shape[chls_[HEAD]] = head; - shape[chls_[SEQUENCE]] = sequence; - shape[chls_[DIMENSION]] = dimension; + shape[chls()[BATCH]] = batch; + shape[chls()[HEAD]] = head; + shape[chls()[SEQUENCE]] = sequence; + shape[chls()[DIMENSION]] = dimension; // shape[0] = batch; // switch (ctype_) { @@ -54,10 +54,10 @@ bool Tensor::reshape(const int batch, const int head, const int sequence, const // } // vector shape1(4); - // shape1[chls_[BATCH]] = batch; - // shape1[chls_[HEAD]] = head; - // shape1[chls_[SEQUENCE]] = sequence; - // shape1[chls_[DIMENSION]] = dimension; + // shape1[chls()[BATCH]] = batch; + // shape1[chls()[HEAD]] = head; + // shape1[chls()[SEQUENCE]] = sequence; + // shape1[chls()[DIMENSION]] = dimension; // bool isSame = std::equal(shape.begin(), shape.end(), shape1.begin()); // if(!isSame) { // std::cout<<""; @@ -92,11 +92,11 @@ bool Tensor::reshape(const int batch, const int channel, const int time, const i ctype_ = BCTHW; } vector shape(5); - shape[chls_[BATCH]] = batch; - shape[chls_[CHANNLE]] = channel; - shape[chls_[TIME]] = time; - shape[chls_[HEIGHT]] = height; - shape[chls_[WIDTH]] = width; + shape[chls()[BATCH]] = batch; + shape[chls()[CHANNLE]] = channel; + shape[chls()[TIME]] = time; + shape[chls()[HEIGHT]] = height; + shape[chls()[WIDTH]] = width; return reshape(shape); // if (ctype_ != BTHWC) { // ctype_ = BCTHW; @@ -122,6 +122,7 @@ map Tensor::gph_; template Tensor &Tensor::binaryCompute(Func operation, string append_s, float data) { + if(Module::doLoad){return *this;} const std::string next_name = name_ + append_s; switch (status_) { case TENSOR_DYNAMIC: { @@ -179,6 +180,7 @@ Tensor &Tensor::operator/(double data) { } template Tensor &Tensor::binaryTwoCompute(Func operation, string append_s, Tensor& other) { + if(Module::doLoad){return *this;} const std::string next_name = name_ + append_s; switch (status_) { case TENSOR_DYNAMIC: { @@ -233,6 +235,7 @@ Tensor& Tensor::operator/(Tensor& other){ } Tensor& Tensor::mean(Chl axis) { + if(Module::doLoad){return *this;} const std::string next_name = name_ + "-mean"; switch (status_) { case TENSOR_DYNAMIC: { @@ -265,6 +268,7 @@ Tensor& Tensor::mean(Chl axis) { } Tensor& Tensor::view(int b, int h, int s, int d) { + if(Module::doLoad){return *this;} const std::string next_name = name_ + "-view"; switch (status_) { case TENSOR_DYNAMIC: { @@ -297,6 +301,7 @@ Tensor& Tensor::view(int b, int h, int s, int d) { } Tensor& Tensor::flatten(Chl axis_start, Chl axis_end) { + if(Module::doLoad){return *this;} const std::string next_name = name_ + "-flatten"; switch (status_) { case TENSOR_DYNAMIC: { @@ -332,6 +337,7 @@ Tensor &Tensor::transpose(Chl axis0, Chl axis1) { return transpose({{axis0, axis1}}); } Tensor &Tensor::transpose(vector> axiss) { + if(Module::doLoad){return *this;} const std::string next_name = name_ + "-transpose"; if (next_name.find(".X.") != std::string::npos && Module::runlistIdx > 0) {} else { @@ -358,10 +364,10 @@ Tensor &Tensor::transpose(vector> axiss) { for (auto axis : axiss) { auto axis0 = axis.first; auto axis1 = axis.second; - auto ori_0_idx = gph_[next_name].chls_[axis0]; - auto ori_1_idx = gph_[next_name].chls_[axis1]; - gph_[next_name].chls_[axis0] = ori_1_idx; - gph_[next_name].chls_[axis1] = ori_0_idx; + auto ori_0_idx = gph_[next_name].chls()[axis0]; + auto ori_1_idx = gph_[next_name].chls()[axis1]; + gph_[next_name].chls()[axis0] = ori_1_idx; + gph_[next_name].chls()[axis1] = ori_0_idx; } gph_[next_name].changeCtype(gph_[name_].shape().size()); gph_[next_name].undiffusion_ = true; @@ -398,6 +404,7 @@ Tensor &Tensor::transpose(vector> axiss) { } Tensor &Tensor::clip(vector b, vector h, vector s, vector d) { + if(Module::doLoad){return *this;} const std::string next_name = name_ + "-clip"; switch (status_) { case TENSOR_DYNAMIC: { @@ -429,8 +436,8 @@ Tensor &Tensor::clip(vector b, vector h, vector s, vector d) return gph_[next_name]; } - Tensor &Tensor::clip(Chl keep_axis, vector b, vector h, vector s, vector d) { + if(Module::doLoad){return *this;} const std::string next_name = name_ + "-clip"; switch (status_) { case TENSOR_DYNAMIC: { @@ -463,6 +470,7 @@ Tensor &Tensor::clip(Chl keep_axis, vector b, vector h, vector s, } Tensor &Tensor::cat(vector input_tensors, Chl axis) { + if(Module::doLoad){return Tensor::gph_["0"];} const std::string next_name = input_tensors[0].name() + "-cat"; int expd_batch_ = input_tensors[0].batch(); int expd_batch_input_idx = 0; @@ -503,6 +511,7 @@ Tensor &Tensor::cat(vector input_tensors, Chl axis) { } Tensor &Tensor::mm(Tensor& input0, Tensor& input1) { + if(Module::doLoad){return Tensor::gph_["0"];} const std::string next_name = input0.name() + "-mm-" + input1.name(); switch (input0.status()) { case TENSOR_DYNAMIC: { @@ -534,6 +543,7 @@ Tensor &Tensor::mm(Tensor& input0, Tensor& input1) { } Tensor& Tensor::norm(int L_n) { + if(Module::doLoad){return *this;} assert(L_n ==1 || L_n ==2); const std::string next_name = name_ + "-norm"; switch (status_) { @@ -564,7 +574,9 @@ Tensor& Tensor::norm(int L_n) { gph_[next_name].status() = status_; return gph_[next_name]; } + Tensor& Tensor::where(float value, Chl axis) { + if(Module::doLoad){return *this;} const std::string next_name = name_ + "-where"; switch (status_) { case TENSOR_DYNAMIC: { @@ -596,6 +608,7 @@ Tensor& Tensor::where(float value, Chl axis) { } Tensor& Tensor::range(int start, int end) { + if(Module::doLoad){return Tensor::gph_["0"];} static int range_name_idx = 0; const std::string next_name = "range" + std::to_string(range_name_idx); switch (Module::tensor_status) { diff --git a/src/Tensor.hpp b/src/Tensor.hpp index 4dc270e9..c2b2b75b 100644 --- a/src/Tensor.hpp +++ b/src/Tensor.hpp @@ -68,11 +68,12 @@ class Tensor { } } static map gph_; - - std::map chls_ = {{BATCH, 0}, {SEQUENCE, 1}, {HEAD, 2}, {DIMENSION, 3}, - {CHANNLE, 1}, {TIME, 2}, {HEIGHT, 3}, {WIDTH, 4}}; - + std::map& chls() { + return chls_; + } private: + std::map chls_={{BATCH, 0}, {SEQUENCE, 1}, {HEAD, 2}, {DIMENSION, 3}, + {CHANNLE, 1}, {TIME, 2}, {HEIGHT, 3}, {WIDTH, 4}}; string name_; DataType dtype_; ChlType ctype_ = BSHD; @@ -220,16 +221,16 @@ class Tensor { */ int batch() { - return legacyShape(chls_[BATCH]); + return legacyShape(chls()[BATCH]); } int head() { - return legacyShape(chls_[HEAD]); + return legacyShape(chls()[HEAD]); } int sequence() { - return legacyShape(chls_[SEQUENCE]); + return legacyShape(chls()[SEQUENCE]); } int dimension() { - return legacyShape(chls_[DIMENSION]); + return legacyShape(chls()[DIMENSION]); } /** @@ -499,36 +500,36 @@ class Tensor { ctype_ = type; switch (ctype_) { case BSHD: - chls_[BATCH] = 0; - chls_[SEQUENCE] = 1; - chls_[HEAD] = 2; - chls_[DIMENSION] = 3; + chls()[BATCH] = 0; + chls()[SEQUENCE] = 1; + chls()[HEAD] = 2; + chls()[DIMENSION] = 3; break; case BHDS: - chls_[BATCH] = 0; - chls_[HEAD] = 1; - chls_[DIMENSION] = 2; - chls_[SEQUENCE] = 3; + chls()[BATCH] = 0; + chls()[HEAD] = 1; + chls()[DIMENSION] = 2; + chls()[SEQUENCE] = 3; break; case SBHD: - chls_[SEQUENCE] = 0; - chls_[BATCH] = 1; - chls_[HEAD] = 2; - chls_[DIMENSION] = 3; + chls()[SEQUENCE] = 0; + chls()[BATCH] = 1; + chls()[HEAD] = 2; + chls()[DIMENSION] = 3; break; case BTHWC: - chls_[BATCH] = 0; - chls_[TIME] = 1; - chls_[HEIGHT] = 2; - chls_[WIDTH] = 3; - chls_[CHANNLE] = 3; + chls()[BATCH] = 0; + chls()[TIME] = 1; + chls()[HEIGHT] = 2; + chls()[WIDTH] = 3; + chls()[CHANNLE] = 3; break; case BCTHW: - chls_[BATCH] = 0; - chls_[CHANNLE] = 1; - chls_[TIME] = 2; - chls_[HEIGHT] = 3; - chls_[WIDTH] = 3; + chls()[BATCH] = 0; + chls()[CHANNLE] = 1; + chls()[TIME] = 2; + chls()[HEIGHT] = 3; + chls()[WIDTH] = 3; break; default: break; @@ -577,12 +578,12 @@ class Tensor { auto d = dimension(); auto s = sequence(); ctype_ = BHDS; - auto ori_seq_idx = chls_[SEQUENCE]; - auto ori_head_idx = chls_[HEAD]; - auto ori_dim_idx = chls_[DIMENSION]; - chls_[HEAD] = ori_seq_idx; - chls_[DIMENSION] = ori_head_idx; - chls_[SEQUENCE] = ori_dim_idx; + auto ori_seq_idx = chls()[SEQUENCE]; + auto ori_head_idx = chls()[HEAD]; + auto ori_dim_idx = chls()[DIMENSION]; + chls()[HEAD] = ori_seq_idx; + chls()[DIMENSION] = ori_head_idx; + chls()[SEQUENCE] = ori_dim_idx; reshape(b, h, s, d); transed_ = true; undiffusion_ = undiffusion; @@ -592,12 +593,12 @@ class Tensor { auto d = dimension(); auto s = sequence(); ctype_ = BSHD; - auto ori_seq_idx = chls_[SEQUENCE]; - auto ori_head_idx = chls_[HEAD]; - auto ori_dim_idx = chls_[DIMENSION]; - chls_[SEQUENCE] = ori_head_idx; - chls_[HEAD] = ori_dim_idx; - chls_[DIMENSION] = ori_seq_idx; + auto ori_seq_idx = chls()[SEQUENCE]; + auto ori_head_idx = chls()[HEAD]; + auto ori_dim_idx = chls()[DIMENSION]; + chls()[SEQUENCE] = ori_head_idx; + chls()[HEAD] = ori_dim_idx; + chls()[DIMENSION] = ori_seq_idx; reshape(b, h, s, d); transed_ = false; undiffusion_ = undiffusion; @@ -608,14 +609,14 @@ class Tensor { auto h = height(); auto w = width(); ctype_ = BTHWC; - auto ori_chl_idx = chls_[CHANNLE]; - auto ori_time_idx = chls_[TIME]; - auto ori_height_idx = chls_[HEIGHT]; - auto ori_width_idx = chls_[WIDTH]; - chls_[TIME] = ori_chl_idx; - chls_[HEIGHT] = ori_time_idx; - chls_[WIDTH] = ori_height_idx; - chls_[CHANNLE] = ori_width_idx; + auto ori_chl_idx = chls()[CHANNLE]; + auto ori_time_idx = chls()[TIME]; + auto ori_height_idx = chls()[HEIGHT]; + auto ori_width_idx = chls()[WIDTH]; + chls()[TIME] = ori_chl_idx; + chls()[HEIGHT] = ori_time_idx; + chls()[WIDTH] = ori_height_idx; + chls()[CHANNLE] = ori_width_idx; reshape(b, c, t, h, w); transed_ = true; undiffusion_ = undiffusion; @@ -625,10 +626,10 @@ class Tensor { auto d = dimension(); auto s = sequence(); ctype_ = SBHD; - auto ori_batch_idx = chls_[BATCH]; - auto ori_seq_idx = chls_[SEQUENCE]; - chls_[SEQUENCE] = ori_batch_idx; - chls_[BATCH] = ori_seq_idx; + auto ori_batch_idx = chls()[BATCH]; + auto ori_seq_idx = chls()[SEQUENCE]; + chls()[SEQUENCE] = ori_batch_idx; + chls()[BATCH] = ori_seq_idx; reshape(b, h, s, d); transed_ = true; undiffusion_ = undiffusion; @@ -675,10 +676,10 @@ class Tensor { size = shape().size(); } if(size == 4) { - vector a = {chls_[BATCH] , chls_[HEAD] , chls_[SEQUENCE] , chls_[DIMENSION]}; + vector a = {chls()[BATCH] , chls()[HEAD] , chls()[SEQUENCE] , chls()[DIMENSION]}; ctype_ = Chls2Type[a]; }else { - vector a = {chls_[BATCH] , chls_[TIME] , chls_[HEIGHT] , chls_[WIDTH] , chls_[CHANNLE]}; + vector a = {chls()[BATCH] , chls()[TIME] , chls()[HEIGHT] , chls()[WIDTH] , chls()[CHANNLE]}; ctype_ = Chls2Type[a]; } } @@ -782,10 +783,10 @@ class Tensor { auto tf = trans_from_[i]; auto axis0 = tf.first; auto axis1 = tf.second; - auto ori_0_idx = child_tensors_[0]->chls_[axis0]; - auto ori_1_idx = child_tensors_[0]->chls_[axis1]; - child_tensors_[0]->chls_[axis0] = ori_1_idx; - child_tensors_[0]->chls_[axis1] = ori_0_idx; + auto ori_0_idx = child_tensors_[0]->chls()[axis0]; + auto ori_1_idx = child_tensors_[0]->chls()[axis1]; + child_tensors_[0]->chls()[axis0] = ori_1_idx; + child_tensors_[0]->chls()[axis1] = ori_0_idx; } changeCtype(); child_tensors_[0]->changeCtype(); @@ -808,10 +809,10 @@ class Tensor { auto tf = trans_from_[i]; auto axis0 = tf.first; auto axis1 = tf.second; - auto ori_0_idx = child_tensors_[0]->chls_[axis0]; - auto ori_1_idx = child_tensors_[0]->chls_[axis1]; - child_tensors_[0]->chls_[axis0] = ori_1_idx; - child_tensors_[0]->chls_[axis1] = ori_0_idx; + auto ori_0_idx = child_tensors_[0]->chls()[axis0]; + auto ori_1_idx = child_tensors_[0]->chls()[axis1]; + child_tensors_[0]->chls()[axis0] = ori_1_idx; + child_tensors_[0]->chls()[axis1] = ori_0_idx; } // chls_ ={{BATCH, 0}, {CHANNLE, 1}, {TIME, 2}, {HEIGHT, 3}, {WIDTH, 4}}; // child_tensors_[0]->chls_ = {{BATCH, 0}, {CHANNLE, 4}, {TIME, 1}, {HEIGHT, 2}, {WIDTH, 3}}; @@ -833,7 +834,7 @@ class Tensor { shape_offset_ = shape_offset; shape_master_ = {source->batch(), source->head(), source->sequence(), source->dimension()}; if (!std::equal(source->chls_.begin(), source->chls_.end(), chls_.begin())) { - if(chls_[SEQUENCE] == source->chls_[DIMENSION] && source->chls_[SEQUENCE] == chls_[DIMENSION]) { + if(chls()[SEQUENCE] == source->chls()[DIMENSION] && source->chls()[SEQUENCE] == chls()[DIMENSION]) { shape_master_ = {source->batch(), source->head(), source->dimension(), source->sequence()}; shape_offset_ = {shape_offset[0], shape_offset[1], shape_offset[3], shape_offset[2]}; } else { @@ -984,7 +985,7 @@ class Tensor { */ int channel() { assert(shape().size() == 5); - return legacyShape(chls_[CHANNLE]); + return legacyShape(chls()[CHANNLE]); // switch (ctype_) { // case BCTHW: // return legacyShape(1); @@ -995,7 +996,7 @@ class Tensor { } int time() { assert(shape().size() == 5); - return legacyShape(chls_[TIME]); + return legacyShape(chls()[TIME]); switch (ctype_) { case BCTHW: return legacyShape(2); @@ -1006,7 +1007,7 @@ class Tensor { } int height() { assert(shape().size() == 5); - return legacyShape(chls_[HEIGHT]); + return legacyShape(chls()[HEIGHT]); // switch (ctype_) { // case BCTHW: // return legacyShape(3); @@ -1017,7 +1018,7 @@ class Tensor { } int width() { assert(shape().size() == 5); - return legacyShape(chls_[WIDTH]); + return legacyShape(chls()[WIDTH]); // switch (ctype_) { // case BCTHW: // return legacyShape(4); diff --git a/src/backends/cpu/CPUTensorFunction.hpp b/src/backends/cpu/CPUTensorFunction.hpp index e6a4f077..caf74d97 100644 --- a/src/backends/cpu/CPUTensorFunction.hpp +++ b/src/backends/cpu/CPUTensorFunction.hpp @@ -19,12 +19,12 @@ class CPUmmFunction { auto h = input.head(); auto d = input.dimension(); auto s = input.sequence(); - auto ori_seq_idx = input.chls_[SEQUENCE]; - auto ori_head_idx = input.chls_[HEAD]; - auto ori_dim_idx = input.chls_[DIMENSION]; - input.chls_[HEAD] = ori_seq_idx; - input.chls_[DIMENSION] = ori_head_idx; - input.chls_[SEQUENCE] = ori_dim_idx; + auto ori_seq_idx = input.chls()[SEQUENCE]; + auto ori_head_idx = input.chls()[HEAD]; + auto ori_dim_idx = input.chls()[DIMENSION]; + input.chls()[HEAD] = ori_seq_idx; + input.chls()[DIMENSION] = ori_head_idx; + input.chls()[SEQUENCE] = ori_dim_idx; input.changeCtype(); input.reshape(b, h, s, d); input.transed() = true; @@ -35,7 +35,7 @@ class CPUmmFunction { auto h = input.masterTensor()->head(); auto d = input.masterTensor()->dimension(); auto s = input.masterTensor()->sequence(); - input.masterTensor()->chls_ = input.chls_; + input.masterTensor()->chls() = input.chls(); input.masterTensor()->changeCtype(); input.masterTensor()->reshape(b, h, s, d); for (auto child : input.masterTensor()->childTensors()) { @@ -43,7 +43,7 @@ class CPUmmFunction { auto h = child->head(); auto d = child->dimension(); auto s = child->sequence(); - child->chls_ = input.chls_; + child->chls() = input.chls(); child->changeCtype(); child->reshape(b, h, s, d); } @@ -51,7 +51,7 @@ class CPUmmFunction { } public: static void reshape(Tensor &input0, Tensor &input1, Tensor &output) { - if(input1.chls_[SEQUENCE] != 3) { + if(input1.chls()[SEQUENCE] != 3) { tranTensorChl(input1); } assert(input0.dimension() == input1.sequence()); @@ -64,7 +64,7 @@ class CPUmmFunction { output.alloc(); } static void execute(Tensor &input0, Tensor &input1, Tensor &output) { - bool isSame = std::equal(input0.chls_.begin(), input0.chls_.end(), input1.chls_.begin()); + bool isSame = std::equal(input0.chls().begin(), input0.chls().end(), input1.chls().begin()); assert(input0.dtype() == MLLM_TYPE_F32); switch (input1.dtype()) { case MLLM_TYPE_F32: { From 9157339b080387ed16fb8f1e170b527fba24871e Mon Sep 17 00:00:00 2001 From: yirongjie Date: Fri, 15 Mar 2024 01:46:22 +0000 Subject: [PATCH 3/6] fix: load inputs --- src/Layer.hpp | 6 +++++- src/Module.hpp | 7 ++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/Layer.hpp b/src/Layer.hpp index ba6b7fe6..7a1e03dd 100644 --- a/src/Layer.hpp +++ b/src/Layer.hpp @@ -349,7 +349,11 @@ class Layer { vector _1INO_OP(Tensor &input, int N) { Module::runlistIdx = saved_list_idx; if (INIT_OP()) { - return {input}; + vector out; + for (int i = 0; i < N; ++i) { + out.push_back(input); + } + return out; } else { if (Tensor::gph_.find(input.name()) != Tensor::gph_.end()) { Tensor::gph_[input.name()].status() = input.status(); diff --git a/src/Module.hpp b/src/Module.hpp index e85f7624..16581295 100644 --- a/src/Module.hpp +++ b/src/Module.hpp @@ -56,7 +56,8 @@ class Module { Tensor::gph_[std::to_string(i)] = Tensor(); tmps.push_back(Tensor::gph_[std::to_string(i)]); } - operator()(tmps, 0); + vector tmpt = {0, 0}; + operator()(tmps, tmpt); Module::doLoad = false; Tensor::gph_.clear(); } @@ -69,10 +70,10 @@ class Module { } template vector operator()(vector inputs, Args... args) { + vector anyArgs = convertArgsToAnyVector(args...); if(doLoad) { - return Forward(inputs, {}); + return Forward(inputs, anyArgs); } - vector anyArgs = convertArgsToAnyVector(args...); if (inputs[0].ttype() == TensorType::INPUT_TENSOR) { for (auto &input : inputs) { input.setTtype(TensorType::NORMAL_TENSOR); From 9ce646a2e86e09d39bbb4970c1017d9ab6046d93 Mon Sep 17 00:00:00 2001 From: yirongjie Date: Fri, 15 Mar 2024 02:23:49 +0000 Subject: [PATCH 4/6] fix: cjls() error in testloader --- .gitignore | 1 + examples/demo_vit.cpp | 1 - test/TestLoader.cpp | 4 ++-- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitignore b/.gitignore index 655f4a28..97d630c7 100644 --- a/.gitignore +++ b/.gitignore @@ -25,3 +25,4 @@ Makefile models/* /.devcontainer/ /.vscode/ +models diff --git a/examples/demo_vit.cpp b/examples/demo_vit.cpp index dba88d3e..361e4006 100644 --- a/examples/demo_vit.cpp +++ b/examples/demo_vit.cpp @@ -1,5 +1,4 @@ #include -#include #include "cmdline.h" #include "models/vit/modeling_vit.hpp" #include "models/vit/labels_vit.hpp" diff --git a/test/TestLoader.cpp b/test/TestLoader.cpp index 4ad9bafa..4d0cfd05 100644 --- a/test/TestLoader.cpp +++ b/test/TestLoader.cpp @@ -71,13 +71,13 @@ bool TestLoader::load(Tensor *tensor, bool strict) { } } if(index->dims.size() == 5) { - tensor->chls_ = {{BATCH, 0},{CHANNLE, 1}, {TIME, 2}, {HEIGHT, 3}, {WIDTH, 4}}; + tensor->chls() = {{BATCH, 0},{CHANNLE, 1}, {TIME, 2}, {HEIGHT, 3}, {WIDTH, 4}}; tensor->setCtype(BCTHW); } if (tensor->shape().empty()) { // Get shape from TensorIndex if(index->dims.size() == 5) { - tensor->chls_ = {{BATCH, 0},{CHANNLE, 1}, {TIME, 2}, {HEIGHT, 3}, {WIDTH, 4}}; + tensor->chls() = {{BATCH, 0},{CHANNLE, 1}, {TIME, 2}, {HEIGHT, 3}, {WIDTH, 4}}; tensor->reshape(index->dims[0], index->dims[1], index->dims[2], index->dims[3], index->dims[4]); }else { tensor->reshape(index->dims[0], index->dims[1], index->dims[2], index->dims[3]); From 6048dae88bfc1c3e5db0c5d872dd46145992804b Mon Sep 17 00:00:00 2001 From: yirongjie Date: Fri, 15 Mar 2024 10:04:39 +0000 Subject: [PATCH 5/6] fix: merge Tensor CPU Functions --- src/Tensor.cpp | 546 +++--------------- src/Tensor.hpp | 21 +- src/backends/cpu/CPUTensorFunction.hpp | 276 ++++----- .../transformer/configuration_transformer.hpp | 2 + 4 files changed, 201 insertions(+), 644 deletions(-) diff --git a/src/Tensor.cpp b/src/Tensor.cpp index fdbbcb72..f1926a50 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -4,6 +4,7 @@ #include "backends/cpu/CPUTensorFunction.hpp" #include +#include namespace mllm { @@ -31,37 +32,6 @@ bool Tensor::reshape(const int batch, const int head, const int sequence, const shape[chls()[HEAD]] = head; shape[chls()[SEQUENCE]] = sequence; shape[chls()[DIMENSION]] = dimension; - - // shape[0] = batch; - // switch (ctype_) { - // case BSHD: - // shape[1] = sequence; - // shape[2] = head; - // shape[3] = dimension; - // break; - // case BHDS: - // shape[1] = head; - // shape[2] = dimension; - // shape[3] = sequence; - // break; - // case SBHD: - // shape[0] = sequence; - // shape[1] = batch; - // shape[2] = head; - // shape[3] = dimension; - // default: - // break; - // } - - // vector shape1(4); - // shape1[chls()[BATCH]] = batch; - // shape1[chls()[HEAD]] = head; - // shape1[chls()[SEQUENCE]] = sequence; - // shape1[chls()[DIMENSION]] = dimension; - // bool isSame = std::equal(shape.begin(), shape.end(), shape1.begin()); - // if(!isSame) { - // std::cout<<""; - // } return reshape(shape); } @@ -87,7 +57,6 @@ void Tensor::alloc() { } bool Tensor::reshape(const int batch, const int channel, const int time, const int height, const int width) { - if (ctype_ != BTHWC) { ctype_ = BCTHW; } @@ -98,47 +67,15 @@ bool Tensor::reshape(const int batch, const int channel, const int time, const i shape[chls()[HEIGHT]] = height; shape[chls()[WIDTH]] = width; return reshape(shape); - // if (ctype_ != BTHWC) { - // ctype_ = BCTHW; - // vector shape(5); - // shape[0] = batch; - // shape[1] = channel; - // shape[2] = time; - // shape[3] = height; - // shape[4] = width; - // return reshape(shape); - // } else { - // vector shape(5); - // shape[0] = batch; - // shape[1] = time; - // shape[2] = height; - // shape[3] = width; - // shape[4] = channel; - // return reshape(shape); - // } } map Tensor::gph_; -template -Tensor &Tensor::binaryCompute(Func operation, string append_s, float data) { - if(Module::doLoad){return *this;} - const std::string next_name = name_ + append_s; +template +Tensor &Tensor::applyFunc(const std::string &suffix, Func func, Args... args) { + if (Module::doLoad) { return *this; } + const std::string next_name = name_ + "-" + suffix; switch (status_) { - case TENSOR_DYNAMIC: { - if (gph_.find(name_) == gph_.end()) { - gph_[name_] = *this; - gph_[name_].status() = status_; - } - if (gph_.find(next_name) == gph_.end()) { - gph_[next_name] = Tensor(backend_); - gph_[next_name].setName(next_name); - } - CPUbinaryFunction::reshape(gph_[name_], gph_[next_name]); - CPUbinaryFunction::setup(gph_[name_], gph_[next_name]); - CPUbinaryFunction::execute(gph_[name_], gph_[next_name], operation, data); - break; - } case TENSOR_STATIC_INIT: { if (gph_.find(name_) == gph_.end()) { gph_[name_] = *this; @@ -148,13 +85,11 @@ Tensor &Tensor::binaryCompute(Func operation, string append_s, float data) { gph_[next_name] = Tensor(backend_); gph_[next_name].setName(next_name); } - CPUbinaryFunction::reshape(gph_[name_], gph_[next_name]); - - CPUbinaryFunction::setup(gph_[name_], gph_[next_name]); + func.setup(gph_[name_], gph_[next_name], args...); break; } case TENSOR_STATIC_READY: { - CPUbinaryFunction::execute(gph_[name_], gph_[next_name], operation, data); + func.execute(gph_[name_], gph_[next_name], args...); break; } default: { @@ -163,477 +98,126 @@ Tensor &Tensor::binaryCompute(Func operation, string append_s, float data) { gph_[next_name].status() = status_; return gph_[next_name]; } + +template +Tensor &Tensor::binaryCompute(Func operation, string append_s, float data) { + return applyFunc(append_s, CPUbinaryFunction(), operation, data); +} + Tensor &Tensor::operator+(float data) { - return binaryCompute(std::plus(), "-TDadd", data); + return binaryCompute(std::plus(), "-TDadd", data); } Tensor &Tensor::operator-(float data) { - return binaryCompute(std::minus(), "-TDsub", data); + return binaryCompute(std::minus(), "-TDsub", data); } Tensor &Tensor::operator*(float data) { - return binaryCompute(std::multiplies(), "-TDmul", data); + return binaryCompute(std::multiplies(), "-TDmul", data); } Tensor &Tensor::operator/(float data) { - return binaryCompute(std::divides(), "-TDdiv", data); + return binaryCompute(std::divides(), "-TDdiv", data); } Tensor &Tensor::operator/(double data) { - return binaryCompute(std::divides(), "-TDdiv", static_cast(data)); + return binaryCompute(std::divides(), "-TDdiv", static_cast(data)); } -template -Tensor &Tensor::binaryTwoCompute(Func operation, string append_s, Tensor& other) { - if(Module::doLoad){return *this;} - const std::string next_name = name_ + append_s; - switch (status_) { - case TENSOR_DYNAMIC: { - if (gph_.find(name_) == gph_.end()) { - gph_[name_] = *this; - gph_[name_].status() = status_; - } - if (gph_.find(next_name) == gph_.end()) { - gph_[next_name] = Tensor(backend_); - gph_[next_name].setName(next_name); - } - CPUbinaryTwoFunction::reshape(gph_[name_], gph_[other.name_], gph_[next_name]); - CPUbinaryTwoFunction::setup(gph_[name_], gph_[other.name_], gph_[next_name]); - CPUbinaryTwoFunction::execute(gph_[name_], gph_[other.name_], gph_[next_name], operation); - break; - } - case TENSOR_STATIC_INIT: { - if (gph_.find(name_) == gph_.end()) { - gph_[name_] = *this; - gph_[name_].status() = status_; - } - if (gph_.find(next_name) == gph_.end()) { - gph_[next_name] = Tensor(backend_); - gph_[next_name].setName(next_name); - } - CPUbinaryTwoFunction::reshape(gph_[name_], gph_[other.name_], gph_[next_name]); - CPUbinaryTwoFunction::setup(gph_[name_], gph_[other.name_], gph_[next_name]); - break; - } - case TENSOR_STATIC_READY: { - CPUbinaryTwoFunction::execute(gph_[name_], gph_[other.name_], gph_[next_name], operation); - break; - } - default: { - } - } - gph_[next_name].status() = status_; - return gph_[next_name]; +template +Tensor &Tensor::binaryTwoCompute(Func operation, string append_s, Tensor &other) { + return applyFunc(append_s, CPUbinaryTwoFunction(), other, operation); } -Tensor& Tensor::operator+(Tensor& other) { + +Tensor &Tensor::operator+(Tensor &other) { return binaryTwoCompute(std::plus(), "-TTadd", other); } -Tensor& Tensor::operator-(Tensor& other){ +Tensor &Tensor::operator-(Tensor &other) { return binaryTwoCompute(std::minus(), "-TTsub", other); } -Tensor& Tensor::operator*(Tensor& other){ +Tensor &Tensor::operator*(Tensor &other) { return binaryTwoCompute(std::multiplies(), "-TTmul", other); } -Tensor& Tensor::operator/(Tensor& other){ +Tensor &Tensor::operator/(Tensor &other) { return binaryTwoCompute(std::divides(), "-TTdiv", other); } -Tensor& Tensor::mean(Chl axis) { - if(Module::doLoad){return *this;} - const std::string next_name = name_ + "-mean"; - switch (status_) { - case TENSOR_DYNAMIC: { - std::cout<<"[TODO] not support dynamic tensor view"<> axiss) { - if(Module::doLoad){return *this;} - const std::string next_name = name_ + "-transpose"; - if (next_name.find(".X.") != std::string::npos && Module::runlistIdx > 0) {} - else { - switch (status_) { - case TENSOR_DYNAMIC: { - std::cout << "[TODO] not support dynamic tensor view" << std::endl; - break; - } - case TENSOR_STATIC_INIT: { - if (gph_.find(name_) == gph_.end()) { - gph_[name_] = *this; - gph_[name_].status() = status_; - } - // reshape - if (gph_.find(next_name) == gph_.end()) { - gph_[next_name] = Tensor(backend_); - gph_[next_name].setName(next_name); - } - gph_[next_name].trans_copy_shape(gph_[name_].shape()); - std::map origin_chls = {{BATCH, 0}, {SEQUENCE, 1}, {HEAD, 2}, {DIMENSION, 3}, - {CHANNLE, 1}, {TIME, 2}, {HEIGHT, 3}, {WIDTH, 4}}; - if(std::equal(gph_[next_name].chls_.begin(), gph_[next_name].chls_.end(), origin_chls.begin())) { - gph_[next_name].chls_ = gph_[name_].chls_; - for (auto axis : axiss) { - auto axis0 = axis.first; - auto axis1 = axis.second; - auto ori_0_idx = gph_[next_name].chls()[axis0]; - auto ori_1_idx = gph_[next_name].chls()[axis1]; - gph_[next_name].chls()[axis0] = ori_1_idx; - gph_[next_name].chls()[axis1] = ori_0_idx; - } - gph_[next_name].changeCtype(gph_[name_].shape().size()); - gph_[next_name].undiffusion_ = true; - } - // break; - // } - // case TENSOR_STATIC_SHAPED: { - if(gph_[name_].masterTensor() != nullptr) { - if (gph_[next_name].master_tensor_ == nullptr) { - gph_[next_name].setDtype(gph_[name_].dtype()); - gph_[next_name].deepCopyFrom(gph_[name_], false); - } - }else { - if(gph_[name_].masterTensor() == nullptr) { - gph_[name_].free(); - } - gph_[next_name].setDtype(gph_[name_].dtype()); - gph_[next_name].alloc(); - gph_[name_].undiffusion_ = true; - gph_[name_].deepCopyFrom(gph_[next_name], false); - gph_[next_name].trans_from_ = axiss; - } - break; - } - case TENSOR_STATIC_READY: { - break; - } - default: { - } - } - } - gph_[next_name].status() = status_; - return gph_[next_name]; + return applyFunc("transpose", CPUtransposeFunction(), axiss); } Tensor &Tensor::clip(vector b, vector h, vector s, vector d) { - if(Module::doLoad){return *this;} - const std::string next_name = name_ + "-clip"; - switch (status_) { - case TENSOR_DYNAMIC: { - std::cout << "[TODO] not support dynamic tensor view" << std::endl; - break; - } - case TENSOR_STATIC_INIT: { - if (gph_.find(name_) == gph_.end()) { - gph_[name_] = *this; - gph_[name_].status() = status_; - } - if (gph_.find(next_name) == gph_.end()) { - gph_[next_name] = Tensor(backend_); - gph_[next_name].setName(next_name); - } - CPUclipFunction::reshape(gph_[name_], gph_[next_name], b, h, s, d); - - CPUclipFunction::setup(gph_[name_], gph_[next_name], b, h, s, d); - break; - } - case TENSOR_STATIC_READY: { - CPUclipFunction::execute(gph_[name_], gph_[next_name], b, h, s, d); - break; - } - default: { - } - } - gph_[next_name].status() = status_; - return gph_[next_name]; + return applyFunc("clip", CPUclipFunction(), b, h, s, d); } Tensor &Tensor::clip(Chl keep_axis, vector b, vector h, vector s, vector d) { - if(Module::doLoad){return *this;} - const std::string next_name = name_ + "-clip"; - switch (status_) { - case TENSOR_DYNAMIC: { - std::cout << "[TODO] not support dynamic tensor view" << std::endl; - break; - } - case TENSOR_STATIC_INIT: { - if (gph_.find(name_) == gph_.end()) { - gph_[name_] = *this; - gph_[name_].status() = status_; - } - if (gph_.find(next_name) == gph_.end()) { - gph_[next_name] = Tensor(backend_); - gph_[next_name].setName(next_name); - } - CPUclipaxisFunction::reshape(gph_[name_], gph_[next_name], keep_axis, b, h, s, d); + return applyFunc("clip", CPUclipaxisFunction(), keep_axis, b, h, s, d); +} - CPUclipaxisFunction::setup(gph_[name_], gph_[next_name], keep_axis, b, h, s, d); - break; - } - case TENSOR_STATIC_READY: { - CPUclipaxisFunction::execute(gph_[name_], gph_[next_name], keep_axis, b, h, s, d); - break; - } - default: { - } - } - gph_[next_name].status() = status_; - return gph_[next_name]; +Tensor &Tensor::norm(int L_n) { + return applyFunc("norm", CPUnormFunction(), L_n); } -Tensor &Tensor::cat(vector input_tensors, Chl axis) { - if(Module::doLoad){return Tensor::gph_["0"];} - const std::string next_name = input_tensors[0].name() + "-cat"; - int expd_batch_ = input_tensors[0].batch(); - int expd_batch_input_idx = 0; - for (int ii = 0; ii < input_tensors.size(); ++ii) { - auto input = input_tensors[ii]; - if (input.batch() > expd_batch_) { - expd_batch_ = input.batch(); - expd_batch_input_idx = ii; - } - } - vector inputs = {}; - for (const auto& input_tensor : input_tensors) { - inputs.push_back(&gph_[input_tensor.name()]); - } - switch (input_tensors[0].status()) { - case TENSOR_DYNAMIC: { - std::cout << "[TODO] not support dynamic tensor view" << std::endl; - break; - } - case TENSOR_STATIC_INIT: { - if (gph_.find(next_name) == gph_.end()) { - gph_[next_name] = Tensor(input_tensors[0].backend()); - gph_[next_name].setName(next_name); - } - CPUcatFunction::reshape(inputs, gph_[next_name], axis, expd_batch_, expd_batch_input_idx); - CPUcatFunction::setup(inputs, gph_[next_name], axis, expd_batch_, expd_batch_input_idx); - break; - } - case TENSOR_STATIC_READY: { - CPUcatFunction::execute(inputs, gph_[next_name], axis, expd_batch_, expd_batch_input_idx); - break; - } - default: { - } - } - gph_[next_name].status() = input_tensors[0].status(); - return gph_[next_name]; +Tensor &Tensor::where(float value, Chl axis) { + return applyFunc("where", CPUwhereFunction(), value, axis); } +/** + * static function + */ -Tensor &Tensor::mm(Tensor& input0, Tensor& input1) { - if(Module::doLoad){return Tensor::gph_["0"];} - const std::string next_name = input0.name() + "-mm-" + input1.name(); - switch (input0.status()) { +template +Tensor &Tensor::applyStaticFunc(const std::string &suffix, Func func, Args... args) { + if (Module::doLoad) { return Tensor::gph_["0"]; } + const std::string next_name = suffix; + switch (Module::tensor_status) { case TENSOR_DYNAMIC: { std::cout << "[TODO] not support dynamic tensor view" << std::endl; break; } case TENSOR_STATIC_INIT: { if (gph_.find(next_name) == gph_.end()) { - gph_[next_name] = Tensor(input0.backend()); + gph_[next_name] = Tensor(Module::backends[MLLM_CPU]); gph_[next_name].setName(next_name); } - if (input0.name().find(".X.") != std::string::npos && input1.name().find(".X.") != std::string::npos && next_name.find(".X.") != std::string::npos - && Module::runlistIdx > 0) { - } else { - CPUmmFunction::reshape(gph_[input0.name()], gph_[input1.name()], gph_[next_name]); - } - CPUmmFunction::setup(gph_[input0.name()], gph_[input1.name()], gph_[next_name]); + func.setup(gph_[next_name], args...); break; } case TENSOR_STATIC_READY: { - CPUmmFunction::execute(gph_[input0.name()], gph_[input1.name()], gph_[next_name]); + func.execute(gph_[next_name], args...); break; } default: { } } - gph_[next_name].status() = input0.status(); + gph_[next_name].status() = Module::tensor_status; return gph_[next_name]; } -Tensor& Tensor::norm(int L_n) { - if(Module::doLoad){return *this;} - assert(L_n ==1 || L_n ==2); - const std::string next_name = name_ + "-norm"; - switch (status_) { - case TENSOR_DYNAMIC: { - std::cout << "[TODO] not support dynamic tensor view" << std::endl; - break; - } - case TENSOR_STATIC_INIT: { - if (gph_.find(name_) == gph_.end()) { - gph_[name_] = *this; - gph_[name_].status() = status_; - } - if (gph_.find(next_name) == gph_.end()) { - gph_[next_name] = Tensor(backend_); - gph_[next_name].setName(next_name); - } - CPUnormFunction::reshape(gph_[name_], gph_[next_name], L_n); - CPUnormFunction::setup(gph_[name_], gph_[next_name], L_n); - break; - } - case TENSOR_STATIC_READY: { - CPUnormFunction::execute(gph_[name_], gph_[next_name], L_n); - break; - } - default: { - } +Tensor &Tensor::cat(vector input_tensors, Chl axis) { + vector inputs = {}; + for (const auto &input_tensor : input_tensors) { + inputs.push_back(&gph_[input_tensor.name()]); } - gph_[next_name].status() = status_; - return gph_[next_name]; + const std::string next_name = input_tensors[0].name() + "-cat"; + return applyStaticFunc(next_name, CPUcatFunction(), inputs, axis); } -Tensor& Tensor::where(float value, Chl axis) { - if(Module::doLoad){return *this;} - const std::string next_name = name_ + "-where"; - switch (status_) { - case TENSOR_DYNAMIC: { - std::cout << "[TODO] not support dynamic tensor view" << std::endl; - break; - } - case TENSOR_STATIC_INIT: { - if (gph_.find(name_) == gph_.end()) { - gph_[name_] = *this; - gph_[name_].status() = status_; - } - if (gph_.find(next_name) == gph_.end()) { - gph_[next_name] = Tensor(backend_); - gph_[next_name].setName(next_name); - } - CPUwhereFunction::reshape(gph_[name_], gph_[next_name], value, axis); - CPUwhereFunction::setup(gph_[name_], gph_[next_name], value, axis); - break; - } - case TENSOR_STATIC_READY: { - CPUwhereFunction::execute(gph_[name_], gph_[next_name], value, axis); - break; - } - default: { - } - } - gph_[next_name].status() = status_; - return gph_[next_name]; +Tensor &Tensor::mm(Tensor &input0, Tensor &input1) { + const std::string next_name = input0.name() + "-mm-" + input1.name(); + return applyStaticFunc(next_name, CPUmmFunction(), gph_[input0.name()], gph_[input1.name()]); } -Tensor& Tensor::range(int start, int end) { - if(Module::doLoad){return Tensor::gph_["0"];} - static int range_name_idx = 0; - const std::string next_name = "range" + std::to_string(range_name_idx); - switch (Module::tensor_status) { - case TENSOR_DYNAMIC: { - std::cout<<"[TODO] not support dynamic tensor view"<>& transFrom() { + return trans_from_; + } + /** * \brief Overload the operators. * \param data binary data @@ -702,6 +706,8 @@ class Tensor { Tensor& operator/(float data); Tensor& operator/(double data); + + /** * \brief Overload the operators. * \param other The Other Tensor @@ -711,16 +717,16 @@ class Tensor { Tensor& operator-(Tensor& other); Tensor& operator*(Tensor& other); Tensor& operator/(Tensor& other); + Tensor& mean(Chl axis); Tensor& view(int b, int h, int s, int d); Tensor& flatten(Chl axis_start, Chl axis_end); - Tensor& transpose(Chl axis0, Chl axis1); + Tensor& transpose(Chl axis0, Chl axis1){ + return transpose({{axis0, axis1}}); + } Tensor& transpose(vector> axiss); - // Tensor& transpose(vector dims); - // Tensor& transpose_(Chl axis0, Chl axis1); - // Tensor& transpose(vector axis); Tensor& clip(vector b, vector h, vector s, vector d); Tensor &clip(Chl keep_axis, vector b, vector h, vector s, vector d); static Tensor& cat(vector input_tensors, Chl dims);; @@ -1512,6 +1518,13 @@ class Tensor { template Tensor& binaryTwoCompute(Func operation, string append_s, Tensor& other) ; + + template + Tensor& applyFunc(const std::string& suffix, Func func, Args... args); + + template + static Tensor& applyStaticFunc(const std::string& suffix, Func func, Args... args); + }; } // namespace mllm #endif // MLLM_TENSOR_H \ No newline at end of file diff --git a/src/backends/cpu/CPUTensorFunction.hpp b/src/backends/cpu/CPUTensorFunction.hpp index caf74d97..7e579d7d 100644 --- a/src/backends/cpu/CPUTensorFunction.hpp +++ b/src/backends/cpu/CPUTensorFunction.hpp @@ -49,21 +49,18 @@ class CPUmmFunction { } } } + public: - static void reshape(Tensor &input0, Tensor &input1, Tensor &output) { - if(input1.chls()[SEQUENCE] != 3) { + static void setup(Tensor &output, Tensor &input0, Tensor &input1) { + if (input1.chls()[SEQUENCE] != 3) { tranTensorChl(input1); } assert(input0.dimension() == input1.sequence()); - if (input0.dimension() == input1.sequence()) { - output.reshape(input0.batch(), input0.head(), input0.sequence(), input1.dimension()); - } - } - static void setup(Tensor &input0, Tensor &input1, Tensor &output) { + output.reshape(input0.batch(), input0.head(), input0.sequence(), input1.dimension()); output.setDtype(input0.dtype()); output.alloc(); } - static void execute(Tensor &input0, Tensor &input1, Tensor &output) { + static void execute(Tensor &output, Tensor &input0, Tensor &input1) { bool isSame = std::equal(input0.chls().begin(), input0.chls().end(), input1.chls().begin()); assert(input0.dtype() == MLLM_TYPE_F32); switch (input1.dtype()) { @@ -83,52 +80,46 @@ class CPUmmFunction { class CPUnormFunction { public: - static void reshape(Tensor &input, Tensor &output, int L_n) { + static void setup(Tensor &input, Tensor &output, int L_n) { output.reshape(input.batch(), input.head(), input.sequence(), input.dimension()); - } - static void setup(Tensor &input, Tensor &output, int L_n) { output.setDtype(input.dtype()); output.alloc(); } - static void execute(Tensor &input, Tensor &output, int L_n) { + static void execute(Tensor &input, Tensor &output, int L_n) { for (int h = 0; h < input.head(); h++) { for (int n = 0; n < input.batch(); n++) { for (int s = 0; s < input.sequence(); s++) { if (L_n == 2) { float sum_of_squares = 0.0f; for (int d = 0; d < input.dimension(); ++d) { - sum_of_squares += input.dataAt(n, h, s,d) * input.dataAt(n, h, s,d); + sum_of_squares += input.dataAt(n, h, s, d) * input.dataAt(n, h, s, d); } float l2_norm = std::sqrt(sum_of_squares); #pragma omp parallel for num_threads(Layer::cpu_thread) for (int d = 0; d < input.dimension(); d++) { - output.setDataAt(n, h, s,d, l2_norm); + output.setDataAt(n, h, s, d, l2_norm); } } else { float sum_of_abs_values = 0.0f; for (int d = 0; d < input.dimension(); ++d) { - sum_of_abs_values += std::abs(input.dataAt(n, h, s,d)); + sum_of_abs_values += std::abs(input.dataAt(n, h, s, d)); } #pragma omp parallel for num_threads(Layer::cpu_thread) for (int d = 0; d < input.dimension(); d++) { - output.setDataAt(n, h, s,d, sum_of_abs_values); + output.setDataAt(n, h, s, d, sum_of_abs_values); } - } } } } } - }; - class CPUbinaryFunction { public: - static void reshape(Tensor &input, Tensor &output) { + template + static void setup(Tensor &input, Tensor &output, Func operation, float data) { output.reshape(input.batch(), input.head(), input.sequence(), input.dimension()); - } - static void setup(Tensor &input, Tensor &output) { output.setDtype(input.dtype()); output.alloc(); } @@ -158,15 +149,14 @@ class CPUbinaryFunction { class CPUbinaryTwoFunction { public: - static void reshape(Tensor &input0, Tensor &input1, Tensor &output) { + template + static void setup(Tensor &input0, Tensor &output, Tensor &input1, Func operation) { output.reshape(std::max(input0.batch(), input1.batch()), input0.head(), input0.sequence(), input0.dimension()); - } - static void setup(Tensor &input0, Tensor &input1, Tensor &output) { output.setDtype(input0.dtype()); output.alloc(); } template - static void execute(Tensor &input0, Tensor &input1, Tensor &output, Func operation) { + static void execute(Tensor &input0, Tensor &output, Tensor &input1, Func operation) { int batch_ = std::max(input0.batch(), input1.batch()); if (input0.masterTensor() == nullptr && output.masterTensor() == nullptr && input0.ctype() == output.ctype()) { for (int n = 0; n < batch_; ++n) { @@ -199,7 +189,7 @@ class CPUbinaryTwoFunction { }; class CPUmeanFunction { public: - static void reshape(Tensor &input, Tensor &output, Chl axis) { + static void setup(Tensor &input, Tensor &output, Chl axis) { int batch = input.batch(); int head = input.head(); int sequence = input.sequence(); @@ -221,8 +211,6 @@ class CPUmeanFunction { break; } output.reshape(batch, head, sequence, dimension); - } - static void setup(Tensor &input, Tensor &output, Chl axis) { output.setDtype(input.dtype()); output.alloc(); } @@ -296,7 +284,7 @@ class CPUmeanFunction { class CPUviewFunction { public: - static void reshape(Tensor &input, Tensor &output, int b, int h, int s, int d) { + static void setup(Tensor &input, Tensor &output, int b, int h, int s, int d) { int dim_b = input.batch(); int dim_h = input.head(); int dim_s = input.sequence(); @@ -347,8 +335,6 @@ class CPUviewFunction { std::cout << "[TODO]Tensor.View not support!!!!" << std::endl; } output.reshape(dim_b, dim_h, dim_s, dim_d); - } - static void setup(Tensor &input, Tensor &output, int b, int h, int s, int d) { if ((b == -1 && s == -1 && input.ctype() != BCTHW) // head & dimension || (b == -1 && d == -1 && input.ctype() == BSHD) // head & sequence || (h == -1 && d == -1 && input.ctype() == BSHD) // batch & sequence @@ -363,96 +349,33 @@ class CPUviewFunction { std::cout << "[TODO]Tensor.View not support!!!!" << std::endl; } } - static void execute(Tensor &input, Tensor &output) { + static void execute(Tensor &input, Tensor &output, int b, int h, int s, int d) { } }; class CPUflattenFunction { public: - static void reshape(Tensor &input, Tensor &output, Chl axis_start, Chl axis_end) { + static void setup(Tensor &input, Tensor &output, Chl axis_start, Chl axis_end) { int dim_b = input.batch(); int dim_h = 0; int dim_s = 0; int dim_d = 0; - /* - if (input.ctype() == BSHD) { + if (input.shape().size() == 4) { dim_h = input.head(); dim_s = input.sequence(); dim_d = input.dimension(); if (axis_start == BATCH & axis_end == SEQUENCE) { - // data_dims = {-1, HEAD, BATCH + SEQUENCE, DIMENSION}; dim_b = 1; dim_s = input.sequence() * input.batch(); } else if (axis_start == HEAD & axis_end == SEQUENCE) { - // data_dims = {BATCH, -1, HEAD + SEQUENCE, DIMENSION}; dim_h = 1; dim_s = input.sequence() * input.head(); } else if (axis_start == HEAD & axis_end == DIMENSION) { - // data_dims = {BATCH, HEAD, -1, SEQUENCE + DIMENSION}; dim_h = 1; dim_d = input.dimension() * input.head(); } else { std::cout << "ERROR: flatten " << axis_start << "&" << axis_end << std::endl; } - } else if (input.ctype() == BHDS) { - dim_h = input.head(); - dim_s = input.dimension(); - dim_d = input.sequence(); - if (axis_start == BATCH & axis_end == SEQUENCE) { - // data_dims = {-1, HEAD, BATCH + SEQUENCE, DIMENSION}; - dim_b = 1; - dim_s = dim_s * input.batch(); - } else if (axis_start == HEAD & axis_end == SEQUENCE) { - // data_dims = {BATCH, -1, HEAD + SEQUENCE, DIMENSION}; - dim_h = 1; - dim_s = dim_s * input.head(); - } else if (axis_start == HEAD & axis_end == DIMENSION) { - // data_dims = {BATCH, HEAD, -1, SEQUENCE + DIMENSION}; - dim_h = 1; - dim_d = dim_d * input.head(); - } else { - std::cout << "ERROR: flatten " << axis_start << "&" << axis_end << std::endl; - } - } else if (input.ctype() == BDHS) { - dim_h = input.head(); - dim_s = input.sequence(); - dim_d = input.dimension(); - if (axis_start == HEAD & axis_end == SEQUENCE) { - dim_h = 1; - dim_s = input.sequence() * input.head(); - } - }else { - if (axis_start == TIME & axis_end == CHANNLE) { - // data_dims = {BATCH, -1, TIME + HEIGHT + WIDTH, CHANNLE}; - if (input.ctype() == BTHWC) { - dim_h = 1; - dim_s = input.time() * input.height() * input.width(); - dim_d = input.channel(); - } else if (input.ctype() == BCTHW) { - dim_h = 1; - dim_s = input.time() * input.height() * input.channel(); - dim_d = input.width(); - } else { - std::cout << "ERROR: flatten " << axis_start << "&" << axis_end << std::endl; - } - } - }*/ - if(input.shape().size() == 4) { - dim_h = input.head(); - dim_s = input.sequence(); - dim_d = input.dimension(); - if (axis_start == BATCH & axis_end == SEQUENCE) { - dim_b = 1; - dim_s = input.sequence() * input.batch(); - } else if (axis_start == HEAD & axis_end == SEQUENCE) { - dim_h = 1; - dim_s = input.sequence() * input.head(); - } else if (axis_start == HEAD & axis_end == DIMENSION) { - dim_h = 1; - dim_d = input.dimension() * input.head(); - }else { - std::cout << "ERROR: flatten " << axis_start << "&" << axis_end << std::endl; - } } else if (input.shape().size() == 5) { if (axis_start == CHANNLE & axis_end == HEIGHT) { dim_h = 1; @@ -464,45 +387,81 @@ class CPUflattenFunction { dim_d = input.time(); } } - assert(dim_d+dim_s+dim_h > 0); + assert(dim_d + dim_s + dim_h > 0); output.reshape(dim_b, dim_h, dim_s, dim_d); - } - static void setup(Tensor &input, Tensor &output, Chl axis_start, Chl axis_end) { - if ( (axis_start == TIME & axis_end == WIDTH && input.ctype()==BCTHW) - || (axis_start == CHANNLE & axis_end == HEIGHT && input.ctype()==BWCTH) - || (axis_start == HEIGHT & axis_end == CHANNLE && input.ctype()==BTHWC) - || (axis_start == BATCH & axis_end == SEQUENCE && input.ctype()!=BCTHW) - || (axis_start == HEAD & axis_end == SEQUENCE && input.ctype()==BSHD) - || (axis_start == HEAD & axis_end == SEQUENCE && input.ctype()==BHDS) - || (axis_start == HEAD & axis_end == SEQUENCE && input.ctype()==BDHS) - || (axis_start == HEAD & axis_end == DIMENSION && input.ctype()==BSHD) - || (axis_start == HEAD & axis_end == DIMENSION && input.ctype()==BHDS) - || (axis_start == HEAD & axis_end == SEQUENCE && input.ctype()==BDSH) - ){ - if(input.masterTensor() == nullptr) { + if ((axis_start == TIME & axis_end == WIDTH && input.ctype() == BCTHW) + || (axis_start == CHANNLE & axis_end == HEIGHT && input.ctype() == BWCTH) + || (axis_start == HEIGHT & axis_end == CHANNLE && input.ctype() == BTHWC) + || (axis_start == BATCH & axis_end == SEQUENCE && input.ctype() != BCTHW) + || (axis_start == HEAD & axis_end == SEQUENCE && input.ctype() == BSHD) + || (axis_start == HEAD & axis_end == SEQUENCE && input.ctype() == BHDS) + || (axis_start == HEAD & axis_end == SEQUENCE && input.ctype() == BDHS) + || (axis_start == HEAD & axis_end == DIMENSION && input.ctype() == BSHD) + || (axis_start == HEAD & axis_end == DIMENSION && input.ctype() == BHDS) + || (axis_start == HEAD & axis_end == SEQUENCE && input.ctype() == BDSH)) { + if (input.masterTensor() == nullptr) { input.free(); } output.setDtype(input.dtype()); output.alloc(); input.deepCopyFrom(output, false); - }else { - std::cout<<"[TODO]Tensor.Flatten not support!!!!"<> axiss) { + if (output.count() <= 0 || output.shape() != input.shape()) { + output.trans_copy_shape(input.shape()); + std::map origin_chls = {{BATCH, 0}, {SEQUENCE, 1}, {HEAD, 2}, {DIMENSION, 3}, {CHANNLE, 1}, {TIME, 2}, {HEIGHT, 3}, {WIDTH, 4}}; + if (std::equal(output.chls().begin(), output.chls().end(), origin_chls.begin())) { + output.chls() = input.chls(); + for (auto axis : axiss) { + auto axis0 = axis.first; + auto axis1 = axis.second; + auto ori_0_idx = output.chls()[axis0]; + auto ori_1_idx = output.chls()[axis1]; + output.chls()[axis0] = ori_1_idx; + output.chls()[axis1] = ori_0_idx; + } + output.changeCtype(input.shape().size()); + output.undiffusion() = true; + } + if (input.masterTensor() != nullptr) { + if (output.masterTensor() == nullptr) { + output.setDtype(input.dtype()); + output.deepCopyFrom(input, false); + } + } else { + if (input.masterTensor() == nullptr) { + input.free(); + } + output.setDtype(input.dtype()); + output.alloc(); + input.undiffusion() = true; + input.deepCopyFrom(output, false); + output.transFrom() = axiss; + } + } + } + static void execute(Tensor &input, Tensor &output, vector> axiss) { } }; class CPUclipFunction { public: - static void reshape(Tensor &input, Tensor &output, vector b, vector h, vector s, vector d) { + static void setup(Tensor &input, Tensor &output, vector b, vector h, vector s, vector d) { // reshape int dim_b = input.batch(); int dim_h = input.head(); int dim_s = input.sequence(); int dim_d = input.dimension(); - std::vector, int*>> data = {{b, &dim_b}, {h, &dim_h}, {s, &dim_s}, {d, &dim_d}}; - for (auto& pair : data) { + std::vector, int *>> data = {{b, &dim_b}, {h, &dim_h}, {s, &dim_s}, {d, &dim_d}}; + for (auto &pair : data) { if (pair.first.size() == 2) { *pair.second = pair.first[1] - pair.first[0]; } else if (pair.first.size() == 1) { @@ -510,8 +469,6 @@ class CPUclipFunction { } } output.reshape(dim_b, dim_h, dim_s, dim_d); - } - static void setup(Tensor &input, Tensor &output, vector b, vector h, vector s, vector d) { output.setDtype(input.dtype()); output.alloc(); } @@ -532,33 +489,24 @@ class CPUclipFunction { input.hostPtr() + input.offset(b, 0, seq_idx, 0), input.head() * 1 * input.dimension() * sizeof(float)); } - }else { - std::cout<<"[TODO]Tensor.CLip not support!!!!"< b, vector h, vector s, vector d) { + static void setup(Tensor &input, Tensor &output, Chl axis, vector b, vector h, vector s, vector d) { // reshape int dim_b = input.batch(); int dim_h = input.head(); int dim_s = input.sequence(); int dim_d = input.dimension(); - - /* - std::vector, int*>> data = {{b, &dim_b}, {h, &dim_h}, {s, &dim_s}, {d, &dim_d}}; - for (auto& pair : data) { - if (pair.first.size() > 0) { - *pair.second = 1; - } - } - */ switch (axis) { case BATCH: { - std::vector, int*>> data = {{h, &dim_h}, {s, &dim_s}, {d, &dim_d}}; - for (auto& pair : data) { + std::vector, int *>> data = {{h, &dim_h}, {s, &dim_s}, {d, &dim_d}}; + for (auto &pair : data) { if (pair.first.size() > 0) { *pair.second = 1; } @@ -566,8 +514,8 @@ class CPUclipaxisFunction { break; } case HEAD: { - std::vector, int*>> data = {{b, &dim_b}, {s, &dim_s}, {d, &dim_d}}; - for (auto& pair : data) { + std::vector, int *>> data = {{b, &dim_b}, {s, &dim_s}, {d, &dim_d}}; + for (auto &pair : data) { if (pair.first.size() > 0) { *pair.second = 1; } @@ -575,8 +523,8 @@ class CPUclipaxisFunction { break; } case SEQUENCE: { - std::vector, int*>> data = {{b, &dim_b}, {h, &dim_h}, {d, &dim_d}}; - for (auto& pair : data) { + std::vector, int *>> data = {{b, &dim_b}, {h, &dim_h}, {d, &dim_d}}; + for (auto &pair : data) { if (pair.first.size() > 0) { *pair.second = 1; } @@ -584,8 +532,8 @@ class CPUclipaxisFunction { break; } case DIMENSION: { - std::vector, int*>> data = {{b, &dim_b}, {h, &dim_h}, {s, &dim_s}}; - for (auto& pair : data) { + std::vector, int *>> data = {{b, &dim_b}, {h, &dim_h}, {s, &dim_s}}; + for (auto &pair : data) { if (pair.first.size() > 0) { *pair.second = 1; } @@ -596,14 +544,12 @@ class CPUclipaxisFunction { break; } output.reshape(dim_b, dim_h, dim_s, dim_d); - } - static void setup(Tensor &input, Tensor &output, Chl axis, vector b, vector h, vector s, vector d) { output.setDtype(input.dtype()); output.alloc(); } static void execute(Tensor &input, Tensor &output, Chl axis, vector b, vector h, vector s, vector d) { if (axis == BATCH) { - if(s.size()>0) { + if (s.size() > 0) { for (int i = 0; i < s.size(); ++i) { auto seq_idx = s[i]; memcpy(output.hostPtr() + output.offset(i, 0, 0, 0), @@ -612,33 +558,40 @@ class CPUclipaxisFunction { } } } else { - std::cout<<"[TODO]Tensor.CLip not support!!!!"<inputs, Tensor &output, Chl axis, int expd_batch_, int expd_batch_input_idx) { + static void setup(Tensor &output, vector inputs, Chl axis) { + int expd_batch_ = inputs[0]->batch(); + for (int ii = 0; ii < inputs.size(); ++ii) { + auto input = inputs[ii]; + if (input->batch() > expd_batch_) { + expd_batch_ = input->batch(); + } + } int dim_b = expd_batch_; int dim_h = inputs[0]->head(); int dim_s = inputs[0]->sequence(); int dim_d = inputs[0]->dimension(); int sizes[] = {0, 0, 0, 0}; Chl axes[] = {BATCH, HEAD, SEQUENCE, DIMENSION}; - int* dims[] = {&dim_b, &dim_h, &dim_s, &dim_d}; + int *dims[] = {&dim_b, &dim_h, &dim_s, &dim_d}; for (int i = 0; i < 4; i++) { if (axis == axes[i]) { for (auto input : inputs) { - sizes[i] += (i == 0) ? input->batch() : (i == 1) ? input->head() : (i == 2) ? input->sequence() : input->dimension(); + sizes[i] += (i == 0) ? input->batch() : (i == 1) ? input->head() : + (i == 2) ? input->sequence() : + input->dimension(); } *dims[i] = sizes[i]; break; } } output.reshape(dim_b, dim_h, dim_s, dim_d); - } - static void setup(vectorinputs, Tensor &output, Chl axis, int expd_batch_, int expd_batch_input_idx) { output.setDtype(inputs[0]->dtype()); output.alloc(); if (axis == SEQUENCE && inputs[0]->head() != 1) { @@ -651,13 +604,22 @@ class CPUcatFunction { inputs[idx]->free(); } if (idx > 0) { - cseq += inputs[idx-1]->sequence(); + cseq += inputs[idx - 1]->sequence(); } inputs[idx]->deepCopyFrom(output, false, {cbatch, chead, cseq, cdim}); // b,h,s,d } } } - static void execute(vectorinputs, Tensor &output, Chl axis, int expd_batch_, int expd_batch_input_idx) { + static void execute(Tensor &output, vector inputs, Chl axis) { + int expd_batch_ = inputs[0]->batch(); + int expd_batch_input_idx = 0; + for (int ii = 0; ii < inputs.size(); ++ii) { + auto input = inputs[ii]; + if (input->batch() > expd_batch_) { + expd_batch_ = input->batch(); + expd_batch_input_idx = ii; + } + } if (axis == BATCH) { for (int n = 0; n < inputs.size(); ++n) { auto copysize = inputs[0]->batch() * inputs[0]->head() * inputs[0]->sequence() * inputs[0]->dimension(); @@ -705,8 +667,6 @@ class CPUcatFunction { class CPUwhereFunction { public: - static void reshape(Tensor &input, Tensor &output, float value, Chl axis) { - } static void setup(Tensor &input, Tensor &output, float value, Chl axis) { } static void execute(Tensor &input, Tensor &output, float value, Chl axis) { @@ -780,16 +740,14 @@ class CPUwhereFunction { class CPURangeFunction { public: - static void reshape(Tensor &output, int start, int end) { - output.reshape(1, 1, end - start, 1); - } static void setup(Tensor &output, int start, int end) { + output.reshape(1, 1, end - start, 1); output.setDtype(MLLM_TYPE_F32); output.alloc(); } static void execute(Tensor &output, int start, int end) { - for (int i = 0; i < end-start; ++i) { - output.setDataAt(0, 0, i+start,0, (float)i); + for (int i = 0; i < end - start; ++i) { + output.setDataAt(0, 0, i + start, 0, (float)i); } } }; diff --git a/src/models/transformer/configuration_transformer.hpp b/src/models/transformer/configuration_transformer.hpp index 063b4e56..f38b4aff 100644 --- a/src/models/transformer/configuration_transformer.hpp +++ b/src/models/transformer/configuration_transformer.hpp @@ -5,6 +5,8 @@ #ifndef CONFIGURATION_TRANSFORMER_HPP #define CONFIGURATION_TRANSFORMER_HPP +#include "Layer.hpp" + using namespace mllm; using namespace std; From c5324bb331e17849763fbc61b733cac2d2e53984 Mon Sep 17 00:00:00 2001 From: Rongjie Yi <41737961+yirongjie@users.noreply.github.com> Date: Tue, 19 Mar 2024 08:18:32 +0800 Subject: [PATCH 6/6] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 63b77b7a..32f09887 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ Building mllm requires following tools: #### Run Fuyu-8B -Download the model from [here](https://huggingface.co/mllmTeam), or using the following instructions +Download the model from [here](https://huggingface.co/mllmTeam/fuyu-8b-mllm/tree/main/), or using the following instructions ```bash mkdir ../models && cd ../models @@ -125,7 +125,7 @@ Result are as followed: #### Run LLaMA-2-7B -Download model +Download model from [here](https://huggingface.co/mllmTeam/llama-2-7b-mllm/tree/main/), or using the following instructions ```bash mkdir ../models && cd ../models @@ -165,7 +165,7 @@ BUPT offers a wide range of undergraduate and graduate programs in fields such a #### Run ImageBind -Download model +Download model from [here](https://huggingface.co/mllmTeam/imagebind_huge-mllm/tree/main), or using the following instructions ```bash mkdir ../models && cd ../models