diff --git a/src/Layer.hpp b/src/Layer.hpp index 021b00e1..487ede48 100644 --- a/src/Layer.hpp +++ b/src/Layer.hpp @@ -42,14 +42,14 @@ class Layer { bool inited_loaded = false; static map layername_2_tensorname; - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } - Tensor &operator()(Tensor &input0, Tensor &input1) { + Tensor operator()(Tensor input0, Tensor input1) { auto ts = run({input0, input1}, 1); - return ts[0].get(); + return ts[0]; } private: @@ -106,7 +106,7 @@ class Layer { } return Module::doLoad; } - vector> run(vector inputs, int N=1) { + vector run(vector inputs, int N=1) { Module::runlistIdx = saved_list_idx; if (Module::doLoad || !inited_loaded) { init_run(); @@ -142,7 +142,7 @@ class Layer { } } if(Module::doLoad){ - vector> output_result = {}; + vector output_result = {}; for (const auto &layer_next_name : layer_next_names) { auto next_name = layername_2_tensorname[layer_next_name]; output_result.push_back(*Tensor::graphs[next_name]); @@ -196,7 +196,7 @@ class Layer { auto end_t = mllm_time_us(); std::cout<name() << " | "<> output_result = {}; + vector output_result = {}; for (const auto &layer_next_name : layer_next_names) { auto next_name = layername_2_tensorname[layer_next_name]; #ifdef DEBUGSAVETENSOR @@ -223,9 +223,9 @@ class Linear final : public Layer { param_["bias"] = (float)bias; init(std::move(name), OpType::LINEAR); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -236,9 +236,9 @@ class SparseIdLinear final : public Layer { param_["out_dim_"] = (float)out_dim; init(std::move(name), OpType::SPARSEIDLINEAR); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -249,9 +249,9 @@ class SparseLinear final : public Layer { param_["out_dim_"] = (float)out_dim; init(std::move(name), OpType::SPARSELINEAR); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -262,9 +262,9 @@ class Predictor final : public Layer { param_["out_dim"] = (float)out_dim; init(std::move(name), OpType::PREDICTOR); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -277,11 +277,11 @@ class ElasticLinear final : public Layer { param_["bias"] = (float)bias; init(std::move(name), OpType::ELASTICLINEAR); } - Tensor &operator()(Tensor &input0, int activate_input_dim, int activate_output_dim) { + Tensor operator()(Tensor input0, int activate_input_dim, int activate_output_dim) { auto activate_input_dim_tensor = Tensor(activate_input_dim, backend_); auto activate_output_dim_tensor = Tensor(activate_output_dim, backend_); auto ts = run({input0, activate_input_dim_tensor, activate_output_dim_tensor}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -292,9 +292,9 @@ class SiLU final : public Layer { SiLU(std::string name) { init(std::move(name), OpType::SILU); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -304,9 +304,9 @@ class ReLU final : public Layer { ReLU(std::string name) { init(std::move(name), OpType::RELU); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -316,9 +316,9 @@ class ReLUSquaredActivation final : public Layer { ReLUSquaredActivation(std::string name) { init(std::move(name), OpType::RELU2); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -328,9 +328,9 @@ class GELU final : public Layer { GELU(std::string name) { init(std::move(name), OpType::OP_GELU); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -340,9 +340,9 @@ class QuickGELU final : public Layer { explicit QuickGELU(std::string name) { init(std::move(name), OpType::QUICKGLUE); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -367,14 +367,14 @@ class Softmax final : public Layer { param_["do_causal_mask"] = do_causal_mask; init(std::move(name), OpType::SOFTMAX); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } - Tensor &operator()(Tensor &input, int axis_classes) { + Tensor operator()(Tensor input, int axis_classes) { auto axis_classes_tensor = Tensor(axis_classes, backend_); auto ts = run({input, axis_classes_tensor}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -385,9 +385,9 @@ class Embedding final : public Layer { param_["vocab_size"] = vocab_size; init(std::move(name), OpType::EMBEDDING); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -397,14 +397,14 @@ class Causalmask final : public Layer { explicit Causalmask(std::string name) { init(std::move(name), OpType::CAUSALMASK); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } - Tensor &operator()(Tensor &input0, int kvcache_seq) { + Tensor operator()(Tensor input0, int kvcache_seq) { auto kvcache_seq_tensor = Tensor(kvcache_seq, backend_); auto ts = run({input0, kvcache_seq_tensor}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -414,9 +414,9 @@ class SlidingWindowMask final : public Layer { param_["window_size"] = window_size; init(std::move(name), OpType::SLIDINGWINDOWMASK); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -439,9 +439,9 @@ class RoPE final : public Layer { param_["partial_rotary_factor"] = partial_rotary_factor; init(std::move(name), OpType::ROPE); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -458,9 +458,9 @@ class KVCache final : public Layer { param_["cache_max"] = cache_max; init(std::move(name), OpType::KVCACHE); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } int getCacheSeqLen(){ return op_->getCacheSeqLen(); @@ -478,9 +478,9 @@ class LayerNorm final : public Layer { param_["bias"] = (float)bias; init(std::move(name), OpType::LAYERNORM); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -499,9 +499,9 @@ class RMSNorm final : public Layer { init(std::move(name), OpType::RMSNORM); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -512,9 +512,9 @@ class Matmul final : public Layer { param_["transpose1"] = transpose1; init(std::move(name), OpType::MATMUL); } - Tensor &operator()(Tensor &input0, Tensor &input1) { + Tensor operator()(Tensor input0, Tensor input1) { auto ts = run({input0, input1}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -539,7 +539,7 @@ class Split final : public Layer { init(std::move(name), OpType::SPLIT); } - vector> operator()(Tensor &input) { + vector operator()(Tensor input) { return run({input}, (int)param_["split_num"]); } }; @@ -557,9 +557,9 @@ class Convolution2D final : public Layer { param_["bias"] = (float)bias; init(std::move(name), OpType::CONVOLUTION2D); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -578,9 +578,9 @@ class Convolution3D final : public Layer { param_["bias"] = (float)bias; init(std::move(name), OpType::CONVOLUTION3D); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -590,9 +590,9 @@ class Concat final : public Layer { param_["axis"] = (float)axis; init(std::move(name), OpType::CAT); } - Tensor &operator()(Tensor &input0, Tensor &input1) { + Tensor operator()(Tensor input0, Tensor input1) { auto ts = run({input0, input1}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -606,9 +606,9 @@ class Parameter final : public Layer { param_["dim"] = dim; init(std::move(name), OpType::PARAMETER); } - Tensor &operator()() { + Tensor operator()() { auto ts = run({}, 1); - return ts[0].get(); + return ts[0]; } }; @@ -617,9 +617,9 @@ class Position final : public Layer { explicit Position(std::string name) { init(std::move(name), OpType::POSITION); } - Tensor &operator()(Tensor &input) { + Tensor operator()(Tensor input) { auto ts = run({input}, 1); - return ts[0].get(); + return ts[0]; } }; diff --git a/src/Tensor.cpp b/src/Tensor.cpp index 1e96753a..caaafa1d 100644 --- a/src/Tensor.cpp +++ b/src/Tensor.cpp @@ -107,7 +107,7 @@ bool Tensor::reshape(const int batch, const int channel, const int time, const i map> Tensor::graphs; TensorStatus Tensor::tensor_status; -Tensor& Tensor::getFunc(const std::string& suffix, const TensorFuncType type, vector float_args, vector other_tensors){ +Tensor Tensor::getFunc(const std::string& suffix, const TensorFuncType type, vector float_args, vector other_tensors){ const std::string next_name = name_ + "-" + suffix; if (Tensor::graphs.find(name_) == Tensor::graphs.end()) { Tensor::graphs[name_] = std::shared_ptr(this, [](Tensor *) {}); @@ -153,7 +153,7 @@ Tensor& Tensor::getFunc(const std::string& suffix, const TensorFuncType type, ve * static function */ -std::vector> Tensor::getStaticFunc(vector out_names, const TensorFuncType type, vector float_args, vector input_tensors){ +std::vector Tensor::getStaticFunc(vector out_names, const TensorFuncType type, vector float_args, vector input_tensors){ auto backend_h = Backend::global_backends[MLLM_CPU]; if (!input_tensors.empty() && input_tensors[0]->backend_ != nullptr) { backend_h = input_tensors[0]->backend(); @@ -165,7 +165,7 @@ std::vector> Tensor::getStaticFunc(vector> results; + std::vector results; for (auto out_name: out_names) { results.push_back(*Tensor::graphs[out_name]); } @@ -200,62 +200,62 @@ std::vector> Tensor::getStaticFunc(vectorsaveNData(); } #endif - std::vector> results; + std::vector results; for (auto out_name: out_names) { results.push_back(*Tensor::graphs[out_name]); } return results; } -Tensor &Tensor::operator+(float data) { +Tensor Tensor::operator+(float data) { return getFunc("add", FUNC_ADD, {data}); } -Tensor &Tensor::operator-(float data) { +Tensor Tensor::operator-(float data) { return getFunc("sub", FUNC_SUB, {data}); } -Tensor &Tensor::operator*(float data) { +Tensor Tensor::operator*(float data) { return getFunc("mul", FUNC_MUL, {data}); } -Tensor &Tensor::operator/(float data) { +Tensor Tensor::operator/(float data) { return getFunc("div", FUNC_DIV, {data}); } -Tensor &Tensor::operator/(double data) { +Tensor Tensor::operator/(double data) { return getFunc("div", FUNC_DIV, {static_cast(data)}); } -Tensor &Tensor::operator+(Tensor &other) { +Tensor Tensor::operator+(Tensor other) { return getFunc("TTadd", FUNC_TTADD, {}, {&other}); } -Tensor &Tensor::operator-(Tensor &other) { +Tensor Tensor::operator-(Tensor other) { return getFunc("TTsub", FUNC_TTSUB, {}, {&other}); } -Tensor &Tensor::operator*(Tensor &other) { +Tensor Tensor::operator*(Tensor other) { return getFunc("TTmul", FUNC_TTMUL, {}, {&other}); } -Tensor &Tensor::operator/(Tensor &other) { +Tensor Tensor::operator/(Tensor other) { return getFunc("TTdiv", FUNC_TTDIV, {}, {&other}); } -Tensor &Tensor::mean(Chl axis) { +Tensor Tensor::mean(Chl axis) { return getFunc("mean", FUNC_MEAN, {(float)axis}); } -Tensor &Tensor::view(int b, int h, int s, int d) { +Tensor Tensor::view(int b, int h, int s, int d) { return getFunc("view", FUNC_VIEW, {(float)b, (float)h, (float)s, (float)d}); } -Tensor &Tensor::flatten(Chl axis_start, Chl axis_end) { +Tensor Tensor::flatten(Chl axis_start, Chl axis_end) { return getFunc("flatten", FUNC_FLATTEN, {(float)axis_start, (float)axis_end}); } -Tensor &Tensor::transpose(vector> axiss) { +Tensor Tensor::transpose(vector> axiss) { vector axis_s; for (auto &axis : axiss) { axis_s.push_back((float)axis.first); @@ -264,7 +264,7 @@ Tensor &Tensor::transpose(vector> axiss) { return getFunc("transpose", FUNC_TRANPOSE, axis_s); } -Tensor &Tensor::clip(vector b, vector h, vector s, vector d) { +Tensor Tensor::clip(vector b, vector h, vector s, vector d) { vector axis_s; axis_s.push_back(b.size()); axis_s.push_back(h.size()); @@ -285,7 +285,7 @@ Tensor &Tensor::clip(vector b, vector h, vector s, vector d) return getFunc("clip", FUNC_CLIP, axis_s); } -Tensor &Tensor::clip(Chl keep_axis, vector b, vector h, vector s, vector d) { +Tensor Tensor::clip(Chl keep_axis, vector b, vector h, vector s, vector d) { vector axis_s = {(float)keep_axis}; axis_s.push_back(b.size()); axis_s.push_back(h.size()); @@ -306,34 +306,34 @@ Tensor &Tensor::clip(Chl keep_axis, vector b, vector h, vector s, return getFunc("clipaxis", FUNC_CLIPAXIS, axis_s); } -Tensor &Tensor::norm(int L_n) { +Tensor Tensor::norm(int L_n) { return getFunc("norm", FUNC_NORM, {(float)L_n}); } -Tensor &Tensor::where(float value, Chl axis) { +Tensor Tensor::where(float value, Chl axis) { return getFunc("where", FUNC_WHERE, {(float)value, (float)axis}); } -Tensor &Tensor::cat(vector input_tensors, Chl axis) { +Tensor Tensor::cat(vector input_tensors, Chl axis) { vector inputs = {}; for (const auto &input_tensor : input_tensors) { inputs.push_back(Tensor::graphs[input_tensor.name()].get()); } return getStaticFunc({input_tensors[0].name() + "-cat"}, FUNC_CAT, - {(float)axis}, inputs)[0].get(); + {(float)axis}, inputs)[0]; } -Tensor &Tensor::mm(Tensor &input0, Tensor &input1) { +Tensor Tensor::mm(Tensor input0, Tensor input1) { return getStaticFunc({input0.name() + "-mm-" + input1.name()}, FUNC_MM, - {}, {Tensor::graphs[input0.name()].get(), Tensor::graphs[input1.name()].get()})[0].get(); + {}, {Tensor::graphs[input0.name()].get(), Tensor::graphs[input1.name()].get()})[0]; } -Tensor &Tensor::range(int start, int end) { +Tensor Tensor::range(int start, int end) { return getStaticFunc({"range-" + std::to_string(start) + "-" + std::to_string(end)}, FUNC_RANGE, - {(float)start, (float)end}, {})[0].get(); + {(float)start, (float)end}, {})[0]; } -vector> Tensor::split(Tensor &input, std::vector each_dims, Chl split_dim, int head_size) { +vector Tensor::split(Tensor input, std::vector each_dims, Chl split_dim, int head_size) { vector next_names; std::vector args; for (int i = 0; i < each_dims.size(); ++i) { diff --git a/src/Tensor.hpp b/src/Tensor.hpp index 04103d42..32e3c65b 100644 --- a/src/Tensor.hpp +++ b/src/Tensor.hpp @@ -795,11 +795,11 @@ class Tensor { * \param data binary data * \return Tensor */ - Tensor& operator+(float data); - Tensor& operator-(float data); - Tensor& operator*(float data); - Tensor& operator/(float data); - Tensor& operator/(double data); + Tensor operator+(float data); + Tensor operator-(float data); + Tensor operator*(float data); + Tensor operator/(float data); + Tensor operator/(double data); @@ -808,28 +808,27 @@ class Tensor { * \param other The Other Tensor * \return Tensor */ - Tensor& operator+(Tensor& other); - Tensor& operator-(Tensor& other); - Tensor& operator*(Tensor& other); - Tensor& operator/(Tensor& other); + Tensor operator+(Tensor other); + Tensor operator-(Tensor other); + Tensor operator*(Tensor other); + Tensor operator/(Tensor other); - Tensor& mean(Chl axis); + Tensor mean(Chl axis); - - Tensor& view(int b, int h, int s, int d); - Tensor& flatten(Chl axis_start, Chl axis_end); - Tensor& transpose(Chl axis0, Chl axis1){ + Tensor view(int b, int h, int s, int d); + Tensor flatten(Chl axis_start, Chl axis_end); + Tensor transpose(Chl axis0, Chl axis1){ return transpose({{axis0, axis1}}); } - Tensor& transpose(vector> axiss); - Tensor& clip(vector b, vector h, vector s, vector d); - Tensor &clip(Chl keep_axis, vector b, vector h, vector s, vector d); - static Tensor& cat(vector input_tensors, Chl dims);; - static Tensor& mm(Tensor& input0, Tensor& input1); - Tensor& norm(int L_n); - Tensor& where(float value, Chl axis); - static Tensor& range(int start, int end); - static vector> split(Tensor& input, std::vector each_dims, Chl split_dim, int head_size = -1); + Tensor transpose(vector> axiss); + Tensor clip(vector b, vector h, vector s, vector d); + Tensor clip(Chl keep_axis, vector b, vector h, vector s, vector d); + static Tensor cat(vector input_tensors, Chl dims);; + static Tensor mm(Tensor input0, Tensor input1); + Tensor norm(int L_n); + Tensor where(float value, Chl axis); + static Tensor range(int start, int end); + static vector split(Tensor input, std::vector each_dims, Chl split_dim, int head_size = -1); /* Functions used for ChildTensor: @@ -1673,9 +1672,9 @@ class Tensor { } return tensor_id; } - Tensor& getFunc(const std::string& suffix, const TensorFuncType type, vector float_args, vector other_tensors={}); + Tensor getFunc(const std::string& suffix, const TensorFuncType type, vector float_args, vector other_tensors={}); - static std::vector> getStaticFunc(vector out_names, const TensorFuncType type, vector float_args, vector input_tensors); + static std::vector getStaticFunc(vector out_names, const TensorFuncType type, vector float_args, vector input_tensors); }; } // namespace mllm #endif // MLLM_TENSOR_H \ No newline at end of file diff --git a/src/models/fuyu/modeling_fuyu.hpp b/src/models/fuyu/modeling_fuyu.hpp index 73569a12..fbe48712 100644 --- a/src/models/fuyu/modeling_fuyu.hpp +++ b/src/models/fuyu/modeling_fuyu.hpp @@ -69,9 +69,9 @@ class FuyuGather final : public Layer { explicit FuyuGather(std::string name) { init(std::move(name), OpType::GATHER); } - Tensor &operator()(Tensor &input_ids, Tensor &image_patches, Tensor &image_patches_indices) { + Tensor operator()(Tensor input_ids, Tensor image_patches, Tensor image_patches_indices) { auto ts = run({input_ids, image_patches, image_patches_indices}, 1); - return ts[0].get(); + return ts[0]; } }; diff --git a/src/models/llava/modeling_llava.hpp b/src/models/llava/modeling_llava.hpp index 6912ea80..acb4fd71 100644 --- a/src/models/llava/modeling_llava.hpp +++ b/src/models/llava/modeling_llava.hpp @@ -103,9 +103,9 @@ class VisionEmbdReplace final : public Layer { explicit VisionEmbdReplace(std::string name) { init(std::move(name), OpType::REPLACE); } - Tensor &operator()(Tensor &text, Tensor &vision, Tensor &where_indices) { + Tensor operator()(Tensor text, Tensor vision, Tensor where_indices) { auto ts = run({text, vision, where_indices}, 1); - return ts[0].get(); + return ts[0]; } }; diff --git a/src/models/phi3/modeling_phi3.hpp b/src/models/phi3/modeling_phi3.hpp index 574a4ab2..a87b5490 100644 --- a/src/models/phi3/modeling_phi3.hpp +++ b/src/models/phi3/modeling_phi3.hpp @@ -29,8 +29,7 @@ class Phi3MLP final : public Module { vector Forward(vector inputs, vector args) override { auto x = gate_up_proj(inputs[0]); auto split_tensors = Tensor::split(x, {ffn_hidden_, ffn_hidden_}, DIMENSION); - Tensor hidden = split_tensors[1]; - x = hidden * silu(split_tensors[0]); + x = split_tensors[1] * silu(split_tensors[0]); x = down_proj(x); return {x}; }