diff --git a/src/Layer.hpp b/src/Layer.hpp
index 021b00e1..487ede48 100644
--- a/src/Layer.hpp
+++ b/src/Layer.hpp
@@ -42,14 +42,14 @@ class Layer {
     bool inited_loaded = false;
     static map<string, string> layername_2_tensorname;
 
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 
-    Tensor &operator()(Tensor &input0, Tensor &input1) {
+    Tensor operator()(Tensor input0, Tensor input1) {
         auto ts = run({input0, input1}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 
 private:
@@ -106,7 +106,7 @@ class Layer {
         }
         return Module::doLoad;
     }
-    vector<std::reference_wrapper<Tensor>> run(vector<Tensor> inputs, int N=1) {
+    vector<Tensor> run(vector<Tensor> inputs, int N=1) {
         Module::runlistIdx = saved_list_idx;
         if (Module::doLoad || !inited_loaded) {
             init_run();
@@ -142,7 +142,7 @@ class Layer {
                 }
             }
             if(Module::doLoad){
-                vector<std::reference_wrapper<Tensor>> output_result = {};
+                vector<Tensor> output_result = {};
                 for (const auto &layer_next_name : layer_next_names) {
                     auto next_name = layername_2_tensorname[layer_next_name];
                     output_result.push_back(*Tensor::graphs[next_name]);
@@ -196,7 +196,7 @@ class Layer {
         auto end_t = mllm_time_us();
         std::cout<<op_->name() << " | "<<Tensor::tensor_status<<" time: " << (end_t - start_t)/1000.0F <<"ms"<< std::endl;
 #endif
-        vector<std::reference_wrapper<Tensor>> output_result = {};
+        vector<Tensor> output_result = {};
         for (const auto &layer_next_name : layer_next_names) {
             auto next_name = layername_2_tensorname[layer_next_name];
 #ifdef DEBUGSAVETENSOR
@@ -223,9 +223,9 @@ class Linear final : public Layer {
         param_["bias"] = (float)bias;
         init(std::move(name), OpType::LINEAR);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -236,9 +236,9 @@ class SparseIdLinear final : public Layer {
         param_["out_dim_"] = (float)out_dim;
         init(std::move(name), OpType::SPARSEIDLINEAR);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -249,9 +249,9 @@ class SparseLinear final : public Layer {
         param_["out_dim_"] = (float)out_dim;
         init(std::move(name), OpType::SPARSELINEAR);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -262,9 +262,9 @@ class Predictor final : public Layer {
         param_["out_dim"] = (float)out_dim;
         init(std::move(name), OpType::PREDICTOR);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -277,11 +277,11 @@ class ElasticLinear final : public Layer {
         param_["bias"] = (float)bias;
         init(std::move(name), OpType::ELASTICLINEAR);
     }
-    Tensor &operator()(Tensor &input0, int activate_input_dim, int activate_output_dim) {
+    Tensor operator()(Tensor input0, int activate_input_dim, int activate_output_dim) {
         auto activate_input_dim_tensor = Tensor(activate_input_dim, backend_);
         auto activate_output_dim_tensor = Tensor(activate_output_dim, backend_);
         auto ts = run({input0, activate_input_dim_tensor, activate_output_dim_tensor}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -292,9 +292,9 @@ class SiLU final : public Layer {
     SiLU(std::string name) {
         init(std::move(name), OpType::SILU);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -304,9 +304,9 @@ class ReLU final : public Layer {
     ReLU(std::string name) {
         init(std::move(name), OpType::RELU);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -316,9 +316,9 @@ class ReLUSquaredActivation final : public Layer {
     ReLUSquaredActivation(std::string name) {
         init(std::move(name), OpType::RELU2);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -328,9 +328,9 @@ class GELU final : public Layer {
     GELU(std::string name) {
         init(std::move(name), OpType::OP_GELU);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -340,9 +340,9 @@ class QuickGELU final : public Layer {
     explicit QuickGELU(std::string name) {
         init(std::move(name), OpType::QUICKGLUE);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -367,14 +367,14 @@ class Softmax final : public Layer {
         param_["do_causal_mask"] = do_causal_mask;
         init(std::move(name), OpType::SOFTMAX);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
-    Tensor &operator()(Tensor &input, int axis_classes) {
+    Tensor operator()(Tensor input, int axis_classes) {
         auto axis_classes_tensor = Tensor(axis_classes, backend_);
         auto ts = run({input, axis_classes_tensor}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -385,9 +385,9 @@ class Embedding final : public Layer {
         param_["vocab_size"] = vocab_size;
         init(std::move(name), OpType::EMBEDDING);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -397,14 +397,14 @@ class Causalmask final : public Layer {
     explicit Causalmask(std::string name) {
         init(std::move(name), OpType::CAUSALMASK);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
-    Tensor &operator()(Tensor &input0, int kvcache_seq) {
+    Tensor operator()(Tensor input0, int kvcache_seq) {
         auto kvcache_seq_tensor = Tensor(kvcache_seq, backend_);
         auto ts = run({input0, kvcache_seq_tensor}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -414,9 +414,9 @@ class SlidingWindowMask final : public Layer {
         param_["window_size"] = window_size;
         init(std::move(name), OpType::SLIDINGWINDOWMASK);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -439,9 +439,9 @@ class RoPE final : public Layer {
         param_["partial_rotary_factor"] = partial_rotary_factor;
         init(std::move(name), OpType::ROPE);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -458,9 +458,9 @@ class KVCache final : public Layer {
         param_["cache_max"] = cache_max;
         init(std::move(name), OpType::KVCACHE);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
     int getCacheSeqLen(){
         return op_->getCacheSeqLen();
@@ -478,9 +478,9 @@ class LayerNorm final : public Layer {
         param_["bias"] = (float)bias;
         init(std::move(name), OpType::LAYERNORM);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -499,9 +499,9 @@ class RMSNorm final : public Layer {
         init(std::move(name), OpType::RMSNORM);
     }
 
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -512,9 +512,9 @@ class Matmul final : public Layer {
         param_["transpose1"] = transpose1;
         init(std::move(name), OpType::MATMUL);
     }
-    Tensor &operator()(Tensor &input0, Tensor &input1) {
+    Tensor operator()(Tensor input0, Tensor input1) {
         auto ts = run({input0, input1}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -539,7 +539,7 @@ class Split final : public Layer {
         init(std::move(name), OpType::SPLIT);
     }
 
-    vector<std::reference_wrapper<Tensor>> operator()(Tensor &input) {
+    vector<Tensor> operator()(Tensor input) {
         return run({input}, (int)param_["split_num"]);
     }
 };
@@ -557,9 +557,9 @@ class Convolution2D final : public Layer {
         param_["bias"] = (float)bias;
         init(std::move(name), OpType::CONVOLUTION2D);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -578,9 +578,9 @@ class Convolution3D final : public Layer {
         param_["bias"] = (float)bias;
         init(std::move(name), OpType::CONVOLUTION3D);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -590,9 +590,9 @@ class Concat final : public Layer {
         param_["axis"] = (float)axis;
         init(std::move(name), OpType::CAT);
     }
-    Tensor &operator()(Tensor &input0, Tensor &input1) {
+    Tensor operator()(Tensor input0, Tensor input1) {
         auto ts = run({input0, input1}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -606,9 +606,9 @@ class Parameter final : public Layer {
         param_["dim"] = dim;
         init(std::move(name), OpType::PARAMETER);
     }
-    Tensor &operator()() {
+    Tensor operator()() {
         auto ts = run({}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
@@ -617,9 +617,9 @@ class Position final : public Layer {
     explicit Position(std::string name) {
         init(std::move(name), OpType::POSITION);
     }
-    Tensor &operator()(Tensor &input) {
+    Tensor operator()(Tensor input) {
         auto ts = run({input}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
diff --git a/src/Tensor.cpp b/src/Tensor.cpp
index 1e96753a..caaafa1d 100644
--- a/src/Tensor.cpp
+++ b/src/Tensor.cpp
@@ -107,7 +107,7 @@ bool Tensor::reshape(const int batch, const int channel, const int time, const i
 map<string, shared_ptr<Tensor>> Tensor::graphs;
 TensorStatus Tensor::tensor_status;
 
-Tensor& Tensor::getFunc(const std::string& suffix, const TensorFuncType type, vector<float> float_args, vector<Tensor *> other_tensors){
+Tensor Tensor::getFunc(const std::string& suffix, const TensorFuncType type, vector<float> float_args, vector<Tensor *> other_tensors){
     const std::string next_name = name_ + "-" + suffix;
     if (Tensor::graphs.find(name_) == Tensor::graphs.end()) {
         Tensor::graphs[name_] = std::shared_ptr<Tensor>(this, [](Tensor *) {});
@@ -153,7 +153,7 @@ Tensor& Tensor::getFunc(const std::string& suffix, const TensorFuncType type, ve
  * static function
  */
 
-std::vector<std::reference_wrapper<Tensor>> Tensor::getStaticFunc(vector<std::string> out_names, const TensorFuncType type, vector<float> float_args, vector<Tensor *> input_tensors){
+std::vector<Tensor> Tensor::getStaticFunc(vector<std::string> out_names, const TensorFuncType type, vector<float> float_args, vector<Tensor *> input_tensors){
     auto backend_h = Backend::global_backends[MLLM_CPU];
     if (!input_tensors.empty() && input_tensors[0]->backend_ != nullptr) {
         backend_h = input_tensors[0]->backend();
@@ -165,7 +165,7 @@ std::vector<std::reference_wrapper<Tensor>> Tensor::getStaticFunc(vector<std::st
         }
     }
     if (Module::doLoad) {
-        std::vector<std::reference_wrapper<Tensor>> results;
+        std::vector<Tensor> results;
         for (auto out_name: out_names) {
             results.push_back(*Tensor::graphs[out_name]);
         }
@@ -200,62 +200,62 @@ std::vector<std::reference_wrapper<Tensor>> Tensor::getStaticFunc(vector<std::st
         Tensor::graphs[out_name]->saveNData<float>();
     }
 #endif
-    std::vector<std::reference_wrapper<Tensor>> results;
+    std::vector<Tensor> results;
     for (auto out_name: out_names) {
         results.push_back(*Tensor::graphs[out_name]);
     }
     return results;
 }
 
-Tensor &Tensor::operator+(float data) {
+Tensor Tensor::operator+(float data) {
     return getFunc("add", FUNC_ADD, {data});
 }
 
-Tensor &Tensor::operator-(float data) {
+Tensor Tensor::operator-(float data) {
     return getFunc("sub", FUNC_SUB, {data});
 }
 
-Tensor &Tensor::operator*(float data) {
+Tensor Tensor::operator*(float data) {
     return getFunc("mul", FUNC_MUL, {data});
 }
 
-Tensor &Tensor::operator/(float data) {
+Tensor Tensor::operator/(float data) {
     return getFunc("div", FUNC_DIV, {data});
 }
 
-Tensor &Tensor::operator/(double data) {
+Tensor Tensor::operator/(double data) {
     return getFunc("div", FUNC_DIV, {static_cast<float>(data)});
 }
 
-Tensor &Tensor::operator+(Tensor &other) {
+Tensor Tensor::operator+(Tensor other) {
     return getFunc("TTadd", FUNC_TTADD, {}, {&other});
 }
 
-Tensor &Tensor::operator-(Tensor &other) {
+Tensor Tensor::operator-(Tensor other) {
     return getFunc("TTsub", FUNC_TTSUB, {}, {&other});
 }
 
-Tensor &Tensor::operator*(Tensor &other) {
+Tensor Tensor::operator*(Tensor other) {
     return getFunc("TTmul", FUNC_TTMUL, {}, {&other});
 }
 
-Tensor &Tensor::operator/(Tensor &other) {
+Tensor Tensor::operator/(Tensor other) {
     return getFunc("TTdiv", FUNC_TTDIV, {}, {&other});
 }
 
-Tensor &Tensor::mean(Chl axis) {
+Tensor Tensor::mean(Chl axis) {
     return getFunc("mean", FUNC_MEAN, {(float)axis});
 }
 
-Tensor &Tensor::view(int b, int h, int s, int d) {
+Tensor Tensor::view(int b, int h, int s, int d) {
     return getFunc("view", FUNC_VIEW, {(float)b, (float)h, (float)s, (float)d});
 }
 
-Tensor &Tensor::flatten(Chl axis_start, Chl axis_end) {
+Tensor Tensor::flatten(Chl axis_start, Chl axis_end) {
     return getFunc("flatten", FUNC_FLATTEN, {(float)axis_start, (float)axis_end});
 }
 
-Tensor &Tensor::transpose(vector<std::pair<Chl, Chl>> axiss) {
+Tensor Tensor::transpose(vector<std::pair<Chl, Chl>> axiss) {
     vector<float> axis_s;
     for (auto &axis : axiss) {
         axis_s.push_back((float)axis.first);
@@ -264,7 +264,7 @@ Tensor &Tensor::transpose(vector<std::pair<Chl, Chl>> axiss) {
     return getFunc("transpose", FUNC_TRANPOSE, axis_s);
 }
 
-Tensor &Tensor::clip(vector<int> b, vector<int> h, vector<int> s, vector<int> d) {
+Tensor Tensor::clip(vector<int> b, vector<int> h, vector<int> s, vector<int> d) {
     vector<float> axis_s;
     axis_s.push_back(b.size());
     axis_s.push_back(h.size());
@@ -285,7 +285,7 @@ Tensor &Tensor::clip(vector<int> b, vector<int> h, vector<int> s, vector<int> d)
     return getFunc("clip", FUNC_CLIP, axis_s);
 }
 
-Tensor &Tensor::clip(Chl keep_axis, vector<int> b, vector<int> h, vector<int> s, vector<int> d) {
+Tensor Tensor::clip(Chl keep_axis, vector<int> b, vector<int> h, vector<int> s, vector<int> d) {
     vector<float> axis_s = {(float)keep_axis};
     axis_s.push_back(b.size());
     axis_s.push_back(h.size());
@@ -306,34 +306,34 @@ Tensor &Tensor::clip(Chl keep_axis, vector<int> b, vector<int> h, vector<int> s,
     return getFunc("clipaxis", FUNC_CLIPAXIS, axis_s);
 }
 
-Tensor &Tensor::norm(int L_n) {
+Tensor Tensor::norm(int L_n) {
     return getFunc("norm", FUNC_NORM, {(float)L_n});
 }
 
-Tensor &Tensor::where(float value, Chl axis) {
+Tensor Tensor::where(float value, Chl axis) {
     return getFunc("where", FUNC_WHERE, {(float)value, (float)axis});
 }
 
-Tensor &Tensor::cat(vector<Tensor> input_tensors, Chl axis) {
+Tensor Tensor::cat(vector<Tensor> input_tensors, Chl axis) {
     vector<Tensor *> inputs = {};
     for (const auto &input_tensor : input_tensors) {
         inputs.push_back(Tensor::graphs[input_tensor.name()].get());
     }
     return getStaticFunc({input_tensors[0].name() + "-cat"}, FUNC_CAT, 
-                        {(float)axis}, inputs)[0].get();
+                        {(float)axis}, inputs)[0];
 }
 
-Tensor &Tensor::mm(Tensor &input0, Tensor &input1) {
+Tensor Tensor::mm(Tensor input0, Tensor input1) {
     return getStaticFunc({input0.name() + "-mm-" + input1.name()}, FUNC_MM, 
-                        {}, {Tensor::graphs[input0.name()].get(), Tensor::graphs[input1.name()].get()})[0].get();
+                        {}, {Tensor::graphs[input0.name()].get(), Tensor::graphs[input1.name()].get()})[0];
 }
 
-Tensor &Tensor::range(int start, int end) {
+Tensor Tensor::range(int start, int end) {
     return getStaticFunc({"range-" + std::to_string(start) + "-" + std::to_string(end)}, FUNC_RANGE, 
-                        {(float)start, (float)end}, {})[0].get();
+                        {(float)start, (float)end}, {})[0];
 }
 
-vector<std::reference_wrapper<Tensor>> Tensor::split(Tensor &input, std::vector<int> each_dims, Chl split_dim, int head_size) {
+vector<Tensor> Tensor::split(Tensor input, std::vector<int> each_dims, Chl split_dim, int head_size) {
     vector<std::string> next_names;
     std::vector<float> args;
     for (int i = 0; i < each_dims.size(); ++i) {
diff --git a/src/Tensor.hpp b/src/Tensor.hpp
index 04103d42..32e3c65b 100644
--- a/src/Tensor.hpp
+++ b/src/Tensor.hpp
@@ -795,11 +795,11 @@ class Tensor {
      * \param data binary data
      * \return Tensor
     */
-    Tensor& operator+(float data);
-    Tensor& operator-(float data);
-    Tensor& operator*(float data);
-    Tensor& operator/(float data);
-    Tensor& operator/(double data);
+    Tensor operator+(float data);
+    Tensor operator-(float data);
+    Tensor operator*(float data);
+    Tensor operator/(float data);
+    Tensor operator/(double data);
 
 
 
@@ -808,28 +808,27 @@ class Tensor {
      * \param other The Other Tensor
      * \return Tensor
     */
-    Tensor& operator+(Tensor& other);
-    Tensor& operator-(Tensor& other);
-    Tensor& operator*(Tensor& other);
-    Tensor& operator/(Tensor& other);
+    Tensor operator+(Tensor other);
+    Tensor operator-(Tensor other);
+    Tensor operator*(Tensor other);
+    Tensor operator/(Tensor other);
 
-    Tensor& mean(Chl axis);
+    Tensor mean(Chl axis);
 
-
-    Tensor& view(int b, int h, int s, int d);
-    Tensor& flatten(Chl axis_start, Chl axis_end);
-    Tensor& transpose(Chl axis0, Chl axis1){
+    Tensor view(int b, int h, int s, int d);
+    Tensor flatten(Chl axis_start, Chl axis_end);
+    Tensor transpose(Chl axis0, Chl axis1){
         return transpose({{axis0, axis1}});
     }
-    Tensor& transpose(vector<std::pair<Chl, Chl>> axiss);
-    Tensor& clip(vector<int> b, vector<int> h, vector<int> s, vector<int> d);
-    Tensor &clip(Chl keep_axis, vector<int> b, vector<int> h, vector<int> s, vector<int> d);
-    static Tensor& cat(vector<Tensor> input_tensors, Chl dims);;
-    static Tensor& mm(Tensor& input0, Tensor& input1);
-    Tensor& norm(int L_n);
-    Tensor& where(float value, Chl axis);
-    static Tensor& range(int start, int end);
-    static vector<std::reference_wrapper<Tensor>> split(Tensor& input, std::vector<int> each_dims, Chl split_dim, int head_size = -1);
+    Tensor transpose(vector<std::pair<Chl, Chl>> axiss);
+    Tensor clip(vector<int> b, vector<int> h, vector<int> s, vector<int> d);
+    Tensor clip(Chl keep_axis, vector<int> b, vector<int> h, vector<int> s, vector<int> d);
+    static Tensor cat(vector<Tensor> input_tensors, Chl dims);;
+    static Tensor mm(Tensor input0, Tensor input1);
+    Tensor norm(int L_n);
+    Tensor where(float value, Chl axis);
+    static Tensor range(int start, int end);
+    static vector<Tensor> split(Tensor input, std::vector<int> each_dims, Chl split_dim, int head_size = -1);
 
 
     /* Functions used for ChildTensor:
@@ -1673,9 +1672,9 @@ class Tensor {
         }
         return tensor_id;
     }
-    Tensor& getFunc(const std::string& suffix, const TensorFuncType type, vector<float> float_args, vector<Tensor *> other_tensors={});
+    Tensor getFunc(const std::string& suffix, const TensorFuncType type, vector<float> float_args, vector<Tensor *> other_tensors={});
 
-    static std::vector<std::reference_wrapper<Tensor>> getStaticFunc(vector<std::string> out_names, const TensorFuncType type, vector<float> float_args, vector<Tensor *> input_tensors);
+    static std::vector<Tensor> getStaticFunc(vector<std::string> out_names, const TensorFuncType type, vector<float> float_args, vector<Tensor *> input_tensors);
 };
 } // namespace mllm
 #endif // MLLM_TENSOR_H
\ No newline at end of file
diff --git a/src/models/fuyu/modeling_fuyu.hpp b/src/models/fuyu/modeling_fuyu.hpp
index 73569a12..fbe48712 100644
--- a/src/models/fuyu/modeling_fuyu.hpp
+++ b/src/models/fuyu/modeling_fuyu.hpp
@@ -69,9 +69,9 @@ class FuyuGather final : public Layer {
     explicit FuyuGather(std::string name) {
         init(std::move(name), OpType::GATHER);
     }
-    Tensor &operator()(Tensor &input_ids, Tensor &image_patches, Tensor &image_patches_indices) {
+    Tensor operator()(Tensor input_ids, Tensor image_patches, Tensor image_patches_indices) {
         auto ts = run({input_ids, image_patches, image_patches_indices}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
diff --git a/src/models/llava/modeling_llava.hpp b/src/models/llava/modeling_llava.hpp
index 6912ea80..acb4fd71 100644
--- a/src/models/llava/modeling_llava.hpp
+++ b/src/models/llava/modeling_llava.hpp
@@ -103,9 +103,9 @@ class VisionEmbdReplace final : public Layer {
     explicit VisionEmbdReplace(std::string name) {
         init(std::move(name), OpType::REPLACE);
     }
-    Tensor &operator()(Tensor &text, Tensor &vision, Tensor &where_indices) {
+    Tensor operator()(Tensor text, Tensor vision, Tensor where_indices) {
         auto ts = run({text, vision, where_indices}, 1);
-        return ts[0].get();
+        return ts[0];
     }
 };
 
diff --git a/src/models/phi3/modeling_phi3.hpp b/src/models/phi3/modeling_phi3.hpp
index 574a4ab2..a87b5490 100644
--- a/src/models/phi3/modeling_phi3.hpp
+++ b/src/models/phi3/modeling_phi3.hpp
@@ -29,8 +29,7 @@ class Phi3MLP final : public Module {
     vector<Tensor> Forward(vector<Tensor> inputs, vector<std::any> args) override {
         auto x = gate_up_proj(inputs[0]);
         auto split_tensors = Tensor::split(x, {ffn_hidden_, ffn_hidden_}, DIMENSION);
-        Tensor hidden = split_tensors[1];
-        x = hidden * silu(split_tensors[0]);
+        x = split_tensors[1] * silu(split_tensors[0]);
         x = down_proj(x);
         return {x};
     }