From a488ac2fe224e4ddc1880a0050333352a3375f9f Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@gmail.com>
Date: Thu, 15 Jul 2021 12:30:35 +0000
Subject: [PATCH 01/17] some util functions for onnx frontend

---
 .../frontend/onnx_import/core/attribute.hpp   | 27 ++++++++
 .../frontend/onnx_import/util/util.cpp        | 61 +++++++++++++++++++
 .../frontend/onnx_import/util/util.hpp        | 53 +++++++++++++++-
 3 files changed, 140 insertions(+), 1 deletion(-)

diff --git a/src/nnfusion/frontend/onnx_import/core/attribute.hpp b/src/nnfusion/frontend/onnx_import/core/attribute.hpp
index 3596bd61c..12d6b0ae0 100644
--- a/src/nnfusion/frontend/onnx_import/core/attribute.hpp
+++ b/src/nnfusion/frontend/onnx_import/core/attribute.hpp
@@ -208,6 +208,33 @@ namespace nnfusion
                     }
                 }
 
+                template <>
+                inline onnx::GraphProto get_value(const onnx::AttributeProto& attribute)
+                {
+                    NNFUSION_CHECK(attribute.type() == onnx::AttributeProto_AttributeType_GRAPH)
+                        << "invalid attribute type : "
+                        << onnx::AttributeProto_AttributeType_Name(attribute.type());
+
+                    return attribute.g();
+                }
+
+                template <>
+                inline std::vector<onnx::GraphProto>
+                    get_value(const onnx::AttributeProto& attribute)
+                {
+                    switch (attribute.type())
+                    {
+                    case onnx::AttributeProto_AttributeType_GRAPH:
+                        return {onnx::GraphProto{attribute.g()}};
+                    case onnx::AttributeProto_AttributeType_GRAPHS:
+                        return {std::begin(attribute.graphs()), std::end(attribute.graphs())};
+                    default:
+                        NNFUSION_CHECK_FAIL()
+                            << "invalid attribute type : "
+                            << onnx::AttributeProto_AttributeType_Name(attribute.type());
+                    }
+                }
+
             } // namespace detail
 
             class Attribute
diff --git a/src/nnfusion/frontend/onnx_import/util/util.cpp b/src/nnfusion/frontend/onnx_import/util/util.cpp
index d6f52653d..2fb514ff7 100644
--- a/src/nnfusion/frontend/onnx_import/util/util.cpp
+++ b/src/nnfusion/frontend/onnx_import/util/util.cpp
@@ -280,6 +280,67 @@ namespace nnfusion
                     name, std::vector<std::size_t>(kernel_shape.size(), 1UL));
             }
 
+            std::unordered_set<std::string> extract_input(const onnx::GraphProto& graph_proto)
+            {
+                std::unordered_set<std::string> node_inputs;
+                std::unordered_set<std::string> node_outputs;
+
+                for (auto node_proto : graph_proto.node())
+                {
+                    for (size_t i = 0; i < node_proto.input_size(); i++)
+                    {
+                        node_inputs.insert(node_proto.input(i));
+                    }
+                    for (size_t i = 0; i < node_proto.output_size(); i++)
+                    {
+                        node_outputs.insert(node_proto.output(i));
+                    }
+                }
+
+                std::unordered_set<std::string> graph_inputs;
+                for (auto item : node_inputs)
+                {
+                    if (node_outputs.find(item) == node_outputs.end())
+                    {
+                        graph_inputs.insert(item);
+                    }
+                }
+
+                return graph_inputs;
+            }
+
+            onnx::GraphProto complete_graphproto(const onnx::GraphProto& graph_proto)
+            {
+                onnx::GraphProto completed_graphproto(graph_proto);
+
+                auto all_inputs = extract_input(graph_proto);
+                std::unordered_set<std::string> existing_inputs;
+                for (auto input_proto : graph_proto.input())
+                {
+                    existing_inputs.insert(input_proto.name());
+                }
+
+                std::unordered_set<std::string> missing_inputs;
+                for (auto input : all_inputs)
+                {
+                    if (existing_inputs.find(input) == existing_inputs.end())
+                    {
+                        missing_inputs.insert(input);
+                        // std::cout << input << std::endl;
+                    }
+                }
+
+                for (auto item : missing_inputs)
+                {
+                    auto input = completed_graphproto.add_input();
+                    input->set_name(item);
+                }
+
+                // std::cout << completed_graphproto.DebugString() << std::endl;
+
+                return completed_graphproto;
+            }
+
         } // namespace onnx_import
     }     // namespace frontend
 } // namespace nnfusion
diff --git a/src/nnfusion/frontend/onnx_import/util/util.hpp b/src/nnfusion/frontend/onnx_import/util/util.hpp
index 871ab4801..40891233d 100644
--- a/src/nnfusion/frontend/onnx_import/util/util.hpp
+++ b/src/nnfusion/frontend/onnx_import/util/util.hpp
@@ -160,6 +160,54 @@ namespace nnfusion
                     return __get_data<uint64_t>(tensor.uint64_data());
                 }
 
+                template <>
+                inline std::vector<char> get_data(const onnx::TensorProto& tensor)
+                {
+                    // NNFUSION_CHECK(tensor.has_raw_data()) << "Data type char only supports raw_data";
+                    // return __get_raw_data<char>(tensor.raw_data());
+
+                    // the following is for control-flow test
+                    if (tensor.has_raw_data())
+                    {
+                        return __get_raw_data<char>(tensor.raw_data());
+                    }
+                    // NNFUSION_CHECK(tensor.data_type() == onnx::TensorProto_DataType_INT32)
+                    //     << "invalid data type: "
+                    //     << onnx::TensorProto_DataType_Name(
+                    //            static_cast<onnx::TensorProto_DataType>(tensor.data_type()));
+                    auto tmp = __get_data<int32_t>(tensor.int32_data());
+                    std::vector<char> ret;
+                    for (auto item : tmp)
+                    {
+                        ret.push_back((char)item);
+                    }
+                    return ret;
+                }
+
+                template <>
+                inline std::vector<bool> get_data(const onnx::TensorProto& tensor)
+                {
+                    // NNFUSION_CHECK(tensor.has_raw_data()) << "Data type boolean only supports raw_data";
+                    // return __get_raw_data<bool>(tensor.raw_data());
+
+                    // the following is for control-flow test
+                    if (tensor.has_raw_data())
+                    {
+                        return __get_raw_data<bool>(tensor.raw_data());
+                    }
+                    // NNFUSION_CHECK(tensor.data_type() == onnx::TensorProto_DataType_INT32)
+                    //     << "invalid data type: "
+                    //     << onnx::TensorProto_DataType_Name(
+                    //            static_cast<onnx::TensorProto_DataType>(tensor.data_type()));
+                    auto tmp = __get_data<int32_t>(tensor.int32_data());
+                    std::vector<bool> ret;
+                    for (auto item : tmp)
+                    {
+                        ret.push_back((bool)item);
+                    }
+                    return ret;
+                }
+
                 /// \brief      Fill specified range with monotonic sequence.
                 ///
                 /// \param[in]  first            The iterator to the beginning of the range.
@@ -181,7 +229,7 @@ namespace nnfusion
                         *first = init_value;
                     }
                 }
-            }
+            } // namespace detail
 
             class Tensor;
             class Node;
@@ -322,6 +370,9 @@ namespace nnfusion
                                        const std::string& name,
                                        const Shape& kernel_shape);
 
+            std::unordered_set<std::string> extract_input(const onnx::GraphProto& graph_proto);
+            onnx::GraphProto complete_graphproto(const onnx::GraphProto& graph_proto);
+
         } // namespace onnx_import
     }     // namespace frontend
 } // namespace nnfusion

From ae261c7baabc0270bfd6b3e27ed473a58c4d7828 Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@gmail.com>
Date: Thu, 15 Jul 2021 12:31:37 +0000
Subject: [PATCH 02/17] more datatype support in constant op

---
 src/nnfusion/frontend/onnx_import/op/constant.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/nnfusion/frontend/onnx_import/op/constant.hpp b/src/nnfusion/frontend/onnx_import/op/constant.hpp
index f3dd0bfc9..2e3d4f86a 100644
--- a/src/nnfusion/frontend/onnx_import/op/constant.hpp
+++ b/src/nnfusion/frontend/onnx_import/op/constant.hpp
@@ -48,7 +48,8 @@ namespace nnfusion
                     static const std::map<element::Type,
                                           std::function<std::shared_ptr<op::Constant>(
                                               const element::Type&, const Tensor&)>>
-                        the_map = {{element::f32, __make_constant_op<float>},
+                        the_map = {{element::boolean, __make_constant_op<char>},
+                                   {element::f32, __make_constant_op<float>},
                                    {element::f64, __make_constant_op<double>},
                                    {element::i32, __make_constant_op<int32_t>},
                                    {element::i64, __make_constant_op<int64_t>},

From 73d3e6f4c7f1202eea1fb642b4e1a4d0845793b5 Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@gmail.com>
Date: Thu, 15 Jul 2021 12:34:33 +0000
Subject: [PATCH 03/17] draft: onnx frontend support for if and loop op

---
 .../frontend/onnx_import/CMakeLists.txt       |   4 +
 src/nnfusion/frontend/onnx_import/op/if.cpp   | 376 ++++++++++++++++++
 src/nnfusion/frontend/onnx_import/op/if.hpp   |  34 ++
 src/nnfusion/frontend/onnx_import/op/loop.cpp | 277 +++++++++++++
 src/nnfusion/frontend/onnx_import/op/loop.hpp | 186 +++++++++
 5 files changed, 877 insertions(+)
 create mode 100644 src/nnfusion/frontend/onnx_import/op/if.cpp
 create mode 100644 src/nnfusion/frontend/onnx_import/op/if.hpp
 create mode 100644 src/nnfusion/frontend/onnx_import/op/loop.cpp
 create mode 100644 src/nnfusion/frontend/onnx_import/op/loop.hpp

diff --git a/src/nnfusion/frontend/onnx_import/CMakeLists.txt b/src/nnfusion/frontend/onnx_import/CMakeLists.txt
index caec38258..72eaa46c1 100644
--- a/src/nnfusion/frontend/onnx_import/CMakeLists.txt
+++ b/src/nnfusion/frontend/onnx_import/CMakeLists.txt
@@ -32,6 +32,10 @@ add_library(onnx_import STATIC
         core/attribute.cpp
         core/node.hpp
         core/node.cpp
+        op/if.hpp
+        op/if.cpp
+        op/loop.hpp
+        op/loop.cpp
         op/no.hpp
         op/no.cpp
         op/slice.hpp
diff --git a/src/nnfusion/frontend/onnx_import/op/if.cpp b/src/nnfusion/frontend/onnx_import/op/if.cpp
new file mode 100644
index 000000000..99b8fe3be
--- /dev/null
+++ b/src/nnfusion/frontend/onnx_import/op/if.cpp
@@ -0,0 +1,376 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+#include "if.hpp"
+#include "../util/graph_convert.hpp"
+#include "../util/util.hpp"
+#include "nnfusion/core/operators/op_define/if.hpp"
+
+using namespace nnfusion::frontend::onnx_import;
+
+/*
+class Model(torch.jit.ScriptModule):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    @torch.jit.script_method
+    def forward(self, x: torch.Tensor, y: torch.Tensor, z: torch.Tensor, cond: int):
+        t = x - y
+        if cond > 0:
+            if cond > 1:
+                return x * z
+            else:
+                return x * t
+        else:
+            return x + y
+
+x = torch.ones([2, 2], dtype=torch.float32)
+y = torch.ones([2, 2], dtype=torch.float32)
+z = torch.ones([2, 2], dtype=torch.float32)
+
+
+ir_version: 6
+producer_name: "pytorch"
+producer_version: "1.6"
+graph {
+  node {
+    input: "x.1"
+    input: "y.1"
+    output: "4"
+    name: "Sub_0"
+    op_type: "Sub"
+  }
+  node {
+    output: "5"
+    name: "Constant_1"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        data_type: 7
+        raw_data: "\000\000\000\000\000\000\000\000"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "cond.1"
+    input: "5"
+    output: "6"
+    name: "Greater_2"
+    op_type: "Greater"
+  }
+  node {
+    input: "6"
+    output: "7"
+    name: "If_3"
+    op_type: "If"
+    attribute {
+      name: "then_branch"
+      g {
+        node {
+          output: "8"
+          name: "Constant_4"
+          op_type: "Constant"
+          attribute {
+            name: "value"
+            t {
+              data_type: 7
+              raw_data: "\001\000\000\000\000\000\000\000"
+            }
+            type: TENSOR
+          }
+        }
+        node {
+          input: "cond.1"
+          input: "8"
+          output: "9"
+          name: "Greater_5"
+          op_type: "Greater"
+        }
+        node {
+          input: "9"
+          output: "10"
+          name: "If_6"
+          op_type: "If"
+          attribute {
+            name: "then_branch"
+            g {
+              node {
+                input: "x.1"
+                input: "z.1"
+                output: "11"
+                name: "Mul_7"
+                op_type: "Mul"
+              }
+              name: "torch-jit-export2"
+              output {
+                name: "11"
+              }
+            }
+            type: GRAPH
+          }
+          attribute {
+            name: "else_branch"
+            g {
+              node {
+                input: "x.1"
+                input: "4"
+                output: "12"
+                name: "Mul_8"
+                op_type: "Mul"
+              }
+              name: "torch-jit-export3"
+              output {
+                name: "12"
+              }
+            }
+            type: GRAPH
+          }
+        }
+        name: "torch-jit-export1"
+        output {
+          name: "10"
+        }
+      }
+      type: GRAPH
+    }
+    attribute {
+      name: "else_branch"
+      g {
+        node {
+          input: "x.1"
+          input: "y.1"
+          output: "13"
+          name: "Add_9"
+          op_type: "Add"
+        }
+        name: "torch-jit-export4"
+        output {
+          name: "13"
+        }
+      }
+      type: GRAPH
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "x.1"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "y.1"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "z.1"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "cond.1"
+    type {
+      tensor_type {
+        elem_type: 7
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "7"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
+*/
+
+namespace nnfusion
+{
+    namespace frontend
+    {
+        namespace onnx_import
+        {
+            namespace set_1
+            {
+                NamedNodeVector TranslateIfOp(const onnx::NodeProto& node_proto,
+                                              const NodeMap& all_ng_nodes,
+                                              std::shared_ptr<nnfusion::graph::Graph> m_graph)
+                {
+                    NNFUSION_CHECK_FAIL()
+                        << "This is a placeholder convert_func, please use the real one.";
+                    return {};
+                }
+
+                NamedNodeVector TranslateIfOp(
+                    const onnx::NodeProto& node_proto,
+                    const NodeMap& all_ng_nodes,
+                    std::shared_ptr<nnfusion::graph::Graph> m_graph,
+                    const std::unordered_map<std::string, ConvertFuncMap>& domain_convert_func_map,
+                    const string& model_dir,
+                    const std::unordered_map<std::string, size_t>& dim_params)
+                {
+                    Node node(node_proto);
+                    onnx::GraphProto then_branch_graph_proto =
+                        node.get_attribute_value<onnx::GraphProto>("then_branch");
+                    onnx::GraphProto else_branch_graph_proto =
+                        node.get_attribute_value<onnx::GraphProto>("else_branch");
+
+                    onnx::NodeProto completed_node_proto(node_proto);
+                    auto then_branch_graph_inputs = extract_input(then_branch_graph_proto);
+                    auto else_branch_graph_inputs = extract_input(else_branch_graph_proto);
+                    std::unordered_set<std::string> node_inputs;
+                    for (size_t i = 0; i < node_proto.input_size(); i++)
+                    {
+                        node_inputs.insert(node_proto.input(i));
+                    }
+                    for (auto item : then_branch_graph_inputs)
+                    {
+                        if (node_inputs.find(item) == node_inputs.end())
+                        {
+                            completed_node_proto.add_input(item);
+                            node_inputs.insert(item);
+                        }
+                    }
+                    for (auto item : else_branch_graph_inputs)
+                    {
+                        if (node_inputs.find(item) == node_inputs.end())
+                        {
+                            completed_node_proto.add_input(item);
+                            node_inputs.insert(item);
+                        }
+                    }
+                    auto input_indexes = GetAllInputIndex(all_ng_nodes, completed_node_proto);
+
+                    // process then_branch graph and else_branch_graph
+                    std::shared_ptr<nnfusion::graph::Graph> then_branch_graph;
+                    std::shared_ptr<nnfusion::graph::Graph> else_branch_graph;
+                    {
+                        then_branch_graph_proto = complete_graphproto(then_branch_graph_proto);
+                        GraphProtoConvert then_branch_graph_convert(then_branch_graph_proto,
+                                                                    domain_convert_func_map,
+                                                                    model_dir,
+                                                                    dim_params,
+                                                                    all_ng_nodes,
+                                                                    true);
+                        then_branch_graph = then_branch_graph_convert.get_graph();
+
+                        else_branch_graph_proto = complete_graphproto(else_branch_graph_proto);
+                        GraphProtoConvert else_branch_graph_convert(else_branch_graph_proto,
+                                                                    domain_convert_func_map,
+                                                                    model_dir,
+                                                                    dim_params,
+                                                                    all_ng_nodes,
+                                                                    true);
+                        else_branch_graph = else_branch_graph_convert.get_graph();
+                    }
+
+                    auto if_op = std::make_shared<op::If>(then_branch_graph, else_branch_graph);
+                    if_op->set_name(node_proto.name());
+                    auto if_gnode = m_graph->add_node_and_edge(if_op, input_indexes);
+
+                    NamedNodeVector ret;
+                    for (size_t i = 0; i < node_proto.output_size(); i++)
+                    {
+                        ret.push_back(NamedNode(node_proto.output(i), if_gnode, i));
+                    }
+
+                    return ret;
+
+                    // for (auto item : all_ng_nodes)
+                    // {
+                    //     std::cout << "NodeMap[" << item.first << "]: " << item.second.size() << std::endl;
+                    // }
+
+                    // // std::cout << then_branch_graph_proto.DebugString() << std::endl;
+                    // // std::cout << else_branch_graph_proto.DebugString() << std::endl;
+
+                    // std::vector<onnx::ValueInfoProto> model_inputs;
+                    // for (auto i = 0; i < model_proto.graph().input_size(); i++)
+                    // {
+                    //     model_inputs.push_back(model_proto.graph().input(i));
+                    // }
+
+                    // for (auto i = 0; i < model_inputs.size(); i++)
+                    // {
+                    //     auto input = then_branch_graph_proto.add_input();
+                    //     input->CopyFrom(model_inputs[i]);
+                    // }
+                    // for (auto i = 0; i < model_inputs.size(); i++)
+                    // {
+                    //     auto input = else_branch_graph_proto.add_input();
+                    //     input->CopyFrom(model_inputs[i]);
+                    // }
+
+                    // onnx::ModelProto then_branch_model_proto = onnx::ModelProto(model_proto);
+                    // then_branch_model_proto.set_allocated_graph(&then_branch_graph_proto);
+                    // // GraphConvert then_branch_converter =
+                    // //     GraphConvert(then_branch_model_proto, {}, "", all_ng_nodes);
+                    // // auto then_branch_graph = then_branch_converter.get_graph();
+                    // // auto then_branch_graph = std::make_shared()
+
+                    // onnx::ModelProto else_branch_model_proto = onnx::ModelProto(model_proto);
+                    // else_branch_model_proto.set_allocated_graph(&else_branch_graph_proto);
+                    // // GraphConvert else_branch_converter =
+                    // //     GraphConvert(else_branch_model_proto, {}, "", all_ng_nodes);
+                    // // auto else_branch_graph = else_branch_converter.get_graph();
+
+                    // // exit(1);
+                }
+
+            } // namespace set_1
+
+        } //namespace onnx_import
+
+    } // namespace frontend
+
+} // namespace nnfusion
diff --git a/src/nnfusion/frontend/onnx_import/op/if.hpp b/src/nnfusion/frontend/onnx_import/op/if.hpp
new file mode 100644
index 000000000..b73acbec3
--- /dev/null
+++ b/src/nnfusion/frontend/onnx_import/op/if.hpp
@@ -0,0 +1,34 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/node.hpp"
+
+namespace nnfusion
+{
+    namespace frontend
+    {
+        namespace onnx_import
+        {
+            namespace set_1
+            {
+                NamedNodeVector TranslateIfOp(const onnx::NodeProto& node_proto,
+                                              const NodeMap& all_ng_nodes,
+                                              std::shared_ptr<nnfusion::graph::Graph> m_graph);
+
+                NamedNodeVector TranslateIfOp(
+                    const onnx::NodeProto& node_proto,
+                    const NodeMap& all_ng_nodes,
+                    std::shared_ptr<nnfusion::graph::Graph> m_graph,
+                    const std::unordered_map<std::string, ConvertFuncMap>& domain_convert_func_map,
+                    const string& model_dir,
+                    const std::unordered_map<std::string, size_t>& dim_params = {});
+
+            } // namespace set_1
+
+        } //namespace onnx_import
+
+    } // namespace frontend
+
+} // namespace nnfusion
diff --git a/src/nnfusion/frontend/onnx_import/op/loop.cpp b/src/nnfusion/frontend/onnx_import/op/loop.cpp
new file mode 100644
index 000000000..ff3643732
--- /dev/null
+++ b/src/nnfusion/frontend/onnx_import/op/loop.cpp
@@ -0,0 +1,277 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+#include "loop.hpp"
+#include "../util/graph_convert.hpp"
+#include "../util/util.hpp"
+#include "nnfusion/core/operators/op_define/loop.hpp"
+
+using namespace nnfusion::frontend::onnx_import;
+
+/*
+class Model(torch.jit.ScriptModule):
+    def __init__(self):
+        super(Model, self).__init__()
+
+    @torch.jit.script_method
+    def forward(self, x: torch.Tensor, num_loop: int):
+        ret = x
+        for i in range(num_loop):
+            ret = ret + x
+        return ret
+
+x = torch.ones([2, 2], dtype=torch.float32)
+a = torch.tensor(5)
+
+
+ir_version: 6
+producer_name: "pytorch"
+producer_version: "1.9"
+graph {
+  node {
+    output: "2"
+    name: "Constant_0"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        data_type: 9
+        raw_data: "\001"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "num_loop.1"
+    input: "2"
+    input: "x.1"
+    output: "3"
+    name: "Loop_1"
+    op_type: "Loop"
+    attribute {
+      name: "body"
+      g {
+        node {
+          input: "ret.9"
+          input: "x.1"
+          output: "7"
+          name: "Add_2"
+          op_type: "Add"
+        }
+        node {
+          input: "2"
+          output: "8"
+          name: "Identity_3"
+          op_type: "Identity"
+        }
+        name: "torch-jit-export1"
+        input {
+          name: "i"
+          type {
+            tensor_type {
+              elem_type: 7
+              shape {
+              }
+            }
+          }
+        }
+        input {
+          name: "cond"
+          type {
+            tensor_type {
+              elem_type: 9
+              shape {
+              }
+            }
+          }
+        }
+        input {
+          name: "ret.9"
+          type {
+            tensor_type {
+              elem_type: 1
+              shape {
+                dim {
+                  dim_value: 2
+                }
+                dim {
+                  dim_value: 2
+                }
+              }
+            }
+          }
+        }
+        output {
+          name: "8"
+          type {
+            tensor_type {
+              elem_type: 9
+              shape {
+              }
+            }
+          }
+        }
+        output {
+          name: "7"
+          type {
+            tensor_type {
+              elem_type: 1
+              shape {
+                dim {
+                  dim_value: 2
+                }
+                dim {
+                  dim_value: 2
+                }
+              }
+            }
+          }
+        }
+      }
+      type: GRAPH
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "x.1"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+  input {
+    name: "num_loop.1"
+    type {
+      tensor_type {
+        elem_type: 7
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "3"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
+*/
+
+namespace nnfusion
+{
+    namespace frontend
+    {
+        namespace onnx_import
+        {
+            namespace set_1
+            {
+                NamedNodeVector TranslateLoopOp(const onnx::NodeProto& node_proto,
+                                                const NodeMap& all_ng_nodes,
+                                                std::shared_ptr<nnfusion::graph::Graph> m_graph)
+                {
+                    NNFUSION_CHECK_FAIL()
+                        << "This is a placeholder convert_func, please use the real one.";
+                    return {};
+                }
+
+                NamedNodeVector TranslateLoopOp(
+                    const onnx::NodeProto& node_proto,
+                    const NodeMap& all_ng_nodes,
+                    std::shared_ptr<nnfusion::graph::Graph> m_graph,
+                    const std::unordered_map<std::string, ConvertFuncMap>& domain_convert_func_map,
+                    const string& model_dir,
+                    const std::unordered_map<std::string, size_t>& dim_params)
+                {
+                    Node node(node_proto);
+                    onnx::GraphProto loop_body_graph_proto =
+                        node.get_attribute_value<onnx::GraphProto>("body");
+
+                    onnx::NodeProto completed_node_proto(node_proto);
+                    auto loop_body_graph_inputs = extract_input(loop_body_graph_proto);
+                    std::unordered_set<std::string> node_inputs;
+                    for (size_t i = 0; i < node_proto.input_size(); i++)
+                    {
+                        node_inputs.insert(node_proto.input(i));
+                    }
+                    for (const auto& input_proto : loop_body_graph_proto.input())
+                    {
+                        node_inputs.insert(input_proto.name());
+                    }
+                    for (auto item : loop_body_graph_inputs)
+                    {
+                        if (node_inputs.find(item) == node_inputs.end())
+                        {
+                            completed_node_proto.add_input(item);
+                            node_inputs.insert(item);
+                        }
+                    }
+
+                    auto input_indexes = GetAllInputIndex(all_ng_nodes, completed_node_proto);
+
+                    // process loop_body_graph
+                    std::shared_ptr<nnfusion::graph::Graph> loop_body_graph;
+                    {
+                        loop_body_graph_proto = complete_graphproto(loop_body_graph_proto);
+                        std::cout << loop_body_graph_proto.DebugString() << std::endl;
+                        GraphProtoConvert loop_body_graph_convert(loop_body_graph_proto,
+                                                                  domain_convert_func_map,
+                                                                  model_dir,
+                                                                  dim_params,
+                                                                  all_ng_nodes,
+                                                                  true);
+                        loop_body_graph = loop_body_graph_convert.get_graph();
+                    }
+
+                    std::vector<nnfusion::PartialShape> output_shapes;
+                    std::vector<nnfusion::element::Type> output_types;
+                    for (auto output : loop_body_graph_proto.output())
+                    {
+                        ValueInfo output_value_info(output, dim_params);
+                        output_shapes.push_back(output_value_info.get_shape());
+                        output_types.push_back(output_value_info.get_element_type());
+                    }
+
+                    auto loop_op =
+                        std::make_shared<op::Loop>(loop_body_graph, output_shapes, output_types);
+                    loop_op->set_name(node_proto.name());
+                    auto loop_gnode = m_graph->add_node_and_edge(loop_op, input_indexes);
+
+                    NamedNodeVector ret;
+                    for (size_t i = 0; i < node_proto.output_size(); i++)
+                    {
+                        ret.push_back(NamedNode(node_proto.output(i), loop_gnode, i));
+                    }
+
+                    return ret;
+                }
+
+            } // namespace set_1
+
+        } //namespace onnx_import
+
+    } // namespace frontend
+
+} // namespace nnfusion
diff --git a/src/nnfusion/frontend/onnx_import/op/loop.hpp b/src/nnfusion/frontend/onnx_import/op/loop.hpp
new file mode 100644
index 000000000..3004bbc49
--- /dev/null
+++ b/src/nnfusion/frontend/onnx_import/op/loop.hpp
@@ -0,0 +1,186 @@
+/*
+ir_version: 6
+producer_name: "pytorch"
+producer_version: "1.6"
+graph {
+  node {
+    output: "1"
+    name: "Constant_0"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        data_type: 7
+        raw_data: "\001\000\000\000\000\000\000\000"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    output: "2"
+    name: "Constant_1"
+    op_type: "Constant"
+    attribute {
+      name: "value"
+      t {
+        dims: 2
+        dims: 2
+        data_type: 1
+        raw_data: "\000\000\200?\000\000\200?\000\000\200?\000\000\200?"
+      }
+      type: TENSOR
+    }
+  }
+  node {
+    input: "1"
+    output: "3"
+    name: "Cast_2"
+    op_type: "Cast"
+    attribute {
+      name: "to"
+      i: 9
+      type: INT
+    }
+  }
+  node {
+    input: "num_loop.1"
+    input: "3"
+    input: "2"
+    output: "4"
+    name: "Loop_3"
+    op_type: "Loop"
+    attribute {
+      name: "body"
+      g {
+        node {
+          output: "8"
+          name: "Constant_4"
+          op_type: "Constant"
+          attribute {
+            name: "value"
+            t {
+              dims: 2
+              dims: 2
+              data_type: 1
+              raw_data: "\000\000\200?\000\000\200?\000\000\200?\000\000\200?"
+            }
+            type: TENSOR
+          }
+        }
+        node {
+          input: "ret.6"
+          input: "8"
+          output: "9"
+          name: "Add_5"
+          op_type: "Add"
+        }
+        node {
+          input: "1"
+          output: "10"
+          name: "Cast_6"
+          op_type: "Cast"
+          attribute {
+            name: "to"
+            i: 9
+            type: INT
+          }
+        }
+        name: "torch-jit-export1"
+        input {
+          name: "i"
+          type {
+            tensor_type {
+              elem_type: 7
+              shape {
+              }
+            }
+          }
+        }
+        input {
+          name: "cond"
+          type {
+            tensor_type {
+              elem_type: 9
+            }
+          }
+        }
+        input {
+          name: "ret.6"
+        }
+        output {
+          name: "10"
+        }
+        output {
+          name: "9"
+        }
+      }
+      type: GRAPH
+    }
+  }
+  name: "torch-jit-export"
+  input {
+    name: "num_loop.1"
+    type {
+      tensor_type {
+        elem_type: 7
+        shape {
+        }
+      }
+    }
+  }
+  output {
+    name: "4"
+    type {
+      tensor_type {
+        elem_type: 1
+        shape {
+          dim {
+            dim_value: 2
+          }
+          dim {
+            dim_value: 2
+          }
+        }
+      }
+    }
+  }
+}
+opset_import {
+  version: 11
+}
+*/
+
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/node.hpp"
+
+namespace nnfusion
+{
+    namespace frontend
+    {
+        namespace onnx_import
+        {
+            namespace set_1
+            {
+                NamedNodeVector TranslateLoopOp(const onnx::NodeProto& node_proto,
+                                                const NodeMap& all_ng_nodes,
+                                                std::shared_ptr<nnfusion::graph::Graph> m_graph);
+
+                NamedNodeVector TranslateLoopOp(
+                    const onnx::NodeProto& node_proto,
+                    const NodeMap& all_ng_nodes,
+                    std::shared_ptr<nnfusion::graph::Graph> m_graph,
+                    const std::unordered_map<std::string, ConvertFuncMap>& domain_convert_func_map,
+                    const string& model_dir,
+                    const std::unordered_map<std::string, size_t>& dim_params = {});
+
+            } // namespace set_1
+
+        } //namespace onnx_import
+
+    } // namespace frontend
+
+} // namespace nnfusion

From 6cff7e72ad90dfc861c23d6d831b0c1a864032b0 Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@gmail.com>
Date: Thu, 15 Jul 2021 12:35:44 +0000
Subject: [PATCH 04/17] draft: if and loop op_define

---
 src/nnfusion/core/operators/CMakeLists.txt    |  2 +
 src/nnfusion/core/operators/op_define/if.cpp  | 58 +++++++++++++++++++
 src/nnfusion/core/operators/op_define/if.hpp  | 49 ++++++++++++++++
 .../core/operators/op_define/loop.cpp         | 55 ++++++++++++++++++
 .../core/operators/op_define/loop.hpp         | 49 ++++++++++++++++
 5 files changed, 213 insertions(+)
 create mode 100644 src/nnfusion/core/operators/op_define/if.cpp
 create mode 100644 src/nnfusion/core/operators/op_define/if.hpp
 create mode 100644 src/nnfusion/core/operators/op_define/loop.cpp
 create mode 100644 src/nnfusion/core/operators/op_define/loop.hpp

diff --git a/src/nnfusion/core/operators/CMakeLists.txt b/src/nnfusion/core/operators/CMakeLists.txt
index 78544dbe7..73c70a785 100644
--- a/src/nnfusion/core/operators/CMakeLists.txt
+++ b/src/nnfusion/core/operators/CMakeLists.txt
@@ -42,9 +42,11 @@ set(SRC
     op_define/gelu.cpp
     op_define/greater_eq.cpp
     op_define/greater.cpp
+    op_define/if.cpp
     op_define/less_eq.cpp
     op_define/less.cpp
     op_define/log.cpp
+    op_define/loop.cpp
     op_define/lrn.cpp
     op_define/max_pool.cpp
     op_define/max.cpp
diff --git a/src/nnfusion/core/operators/op_define/if.cpp b/src/nnfusion/core/operators/op_define/if.cpp
new file mode 100644
index 000000000..41f45d529
--- /dev/null
+++ b/src/nnfusion/core/operators/op_define/if.cpp
@@ -0,0 +1,58 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+#include "if.hpp"
+
+using namespace std;
+using namespace nnfusion::op;
+
+If::If(std::shared_ptr<nnfusion::graph::Graph>& then_branch_graph,
+       std::shared_ptr<nnfusion::graph::Graph>& else_branch_graph)
+    : Op("If")
+    , m_then_branch_graph(then_branch_graph)
+    , m_else_branch_graph(else_branch_graph)
+{
+}
+
+void If::validate_and_infer_types(std::shared_ptr<graph::GNode> gnode)
+{
+    nnfusion::Shape cond_shape = gnode->get_input_shape(0);
+    nnfusion::element::Type cond_et = gnode->get_input_element_type(0);
+    NNFUSION_CHECK(cond_shape.size() == 0)
+        << "The condition tensor of the If operation mush be scalar.";
+    NNFUSION_CHECK(cond_et == nnfusion::element::boolean)
+        << "The condition tensor of the If operation mush be boolean.";
+
+    auto then_branch_outputs = m_then_branch_graph->get_outputs();
+    auto else_branch_outputs = m_else_branch_graph->get_outputs();
+    NNFUSION_CHECK(then_branch_outputs.size() == else_branch_outputs.size())
+        << "The outputs in the then_branch and else_branch must have the same shape and "
+           "same data type.";
+    for (size_t i = 0; i < then_branch_outputs.size(); i++)
+    {
+        NNFUSION_CHECK(then_branch_outputs[i]->get_shape() == else_branch_outputs[i]->get_shape() &&
+                       then_branch_outputs[i]->get_element_type() ==
+                           else_branch_outputs[i]->get_element_type())
+            << "The outputs in the then_branch and else_branch must have the same shape and "
+               "same data type.";
+
+        gnode->set_output_type_and_shape(
+            i, then_branch_outputs[i]->get_element_type(), then_branch_outputs[i]->get_shape());
+    }
+}
\ No newline at end of file
diff --git a/src/nnfusion/core/operators/op_define/if.hpp b/src/nnfusion/core/operators/op_define/if.hpp
new file mode 100644
index 000000000..acd093aaf
--- /dev/null
+++ b/src/nnfusion/core/operators/op_define/if.hpp
@@ -0,0 +1,49 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "../op.hpp"
+#include "nnfusion/core/graph/graph.hpp"
+
+namespace nnfusion
+{
+    namespace op
+    {
+        /// \brief If control-flow operation, with same definition as https://github.com/onnx/onnx/blob/master/docs/Changelog.md#If-1.
+        class If : public Op
+        {
+        public:
+            /// \brief Constructs an if operation
+            ///
+            /// \param then_branch_graph The then_branch graph.<br>
+            /// `[f]`
+            /// \param else_branch_graph The else_branch graph.<br>
+            /// `[f]`
+            If(std::shared_ptr<nnfusion::graph::Graph>& then_branch_graph,
+               std::shared_ptr<nnfusion::graph::Graph>& else_branch_graph);
+
+            void validate_and_infer_types(std::shared_ptr<graph::GNode> gnode) override;
+
+        protected:
+            std::shared_ptr<nnfusion::graph::Graph> m_then_branch_graph;
+            std::shared_ptr<nnfusion::graph::Graph> m_else_branch_graph;
+        };
+    }
+}
diff --git a/src/nnfusion/core/operators/op_define/loop.cpp b/src/nnfusion/core/operators/op_define/loop.cpp
new file mode 100644
index 000000000..9da8b2dea
--- /dev/null
+++ b/src/nnfusion/core/operators/op_define/loop.cpp
@@ -0,0 +1,55 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+#include "loop.hpp"
+
+using namespace std;
+using namespace nnfusion::op;
+
+Loop::Loop(std::shared_ptr<nnfusion::graph::Graph>& loop_body_graph,
+           const std::vector<nnfusion::PartialShape>& output_shapes,
+           const std::vector<nnfusion::element::Type>& output_types)
+    : Op("Loop")
+    , m_loop_body_graph(loop_body_graph)
+    , m_output_shapes(output_shapes)
+    , m_output_types(output_types)
+{
+}
+
+void Loop::validate_and_infer_types(std::shared_ptr<graph::GNode> gnode)
+{
+    nnfusion::Shape trip_shape = gnode->get_input_shape(0);
+    nnfusion::element::Type trip_et = gnode->get_input_element_type(0);
+    NNFUSION_CHECK(trip_shape.size() == 0)
+        << "The trip-count tensor of the Loop operation mush be scalar.";
+    NNFUSION_CHECK(trip_et == nnfusion::element::i64)
+        << "The trip-count tensor of the Loop operation mush be boolean.";
+
+    nnfusion::Shape cond_shape = gnode->get_input_shape(1);
+    nnfusion::element::Type cond_et = gnode->get_input_element_type(1);
+    NNFUSION_CHECK(cond_shape.size() == 0)
+        << "The condition tensor of the Loop operation mush be scalar.";
+    NNFUSION_CHECK(cond_et == nnfusion::element::boolean)
+        << "The condition tensor of the Loop operation mush be boolean.";
+
+    for (size_t i = 0; i < m_output_types.size(); i++)
+    {
+        gnode->set_output_type_and_shape(i, m_output_types[i], m_output_shapes[i]);
+    }
+}
\ No newline at end of file
diff --git a/src/nnfusion/core/operators/op_define/loop.hpp b/src/nnfusion/core/operators/op_define/loop.hpp
new file mode 100644
index 000000000..d531e6a74
--- /dev/null
+++ b/src/nnfusion/core/operators/op_define/loop.hpp
@@ -0,0 +1,49 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "../op.hpp"
+#include "nnfusion/core/graph/graph.hpp"
+
+namespace nnfusion
+{
+    namespace op
+    {
+        /// \brief Loop control-flow operation, with same definition as https://github.com/onnx/onnx/blob/master/docs/Changelog.md#Loop-11.
+        class Loop : public Op
+        {
+        public:
+            /// \brief Constructs an if operation
+            ///
+            /// \param loop_body_graph The loop body graph.<br>
+            /// `[f]`
+            Loop(std::shared_ptr<nnfusion::graph::Graph>& loop_body_graph,
+                 const std::vector<nnfusion::PartialShape>& output_shapes,
+                 const std::vector<nnfusion::element::Type>& output_types);
+
+            void validate_and_infer_types(std::shared_ptr<graph::GNode> gnode) override;
+
+        protected:
+            std::shared_ptr<nnfusion::graph::Graph> m_loop_body_graph;
+            std::vector<nnfusion::PartialShape> m_output_shapes;
+            std::vector<nnfusion::element::Type> m_output_types;
+        };
+    } // namespace op
+} // namespace nnfusion

From ed0221ae6d81cbbb2a1afb123eaa9fb26255bd0a Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@gmail.com>
Date: Thu, 15 Jul 2021 12:37:25 +0000
Subject: [PATCH 05/17] refactor GraphConvert of the ONNX frontend, support if
 and loop convert

---
 .../frontend/onnx_import/ops_bridge.cpp       |   7 +
 .../onnx_import/util/graph_convert.cpp        | 465 +++++++++++-------
 .../onnx_import/util/graph_convert.hpp        |  74 ++-
 3 files changed, 364 insertions(+), 182 deletions(-)

diff --git a/src/nnfusion/frontend/onnx_import/ops_bridge.cpp b/src/nnfusion/frontend/onnx_import/ops_bridge.cpp
index e143d3239..05d0abc19 100644
--- a/src/nnfusion/frontend/onnx_import/ops_bridge.cpp
+++ b/src/nnfusion/frontend/onnx_import/ops_bridge.cpp
@@ -45,10 +45,12 @@
 #include "op/gather.hpp"
 #include "op/gemm.hpp"
 #include "op/identity.hpp"
+#include "op/if.hpp"
 #include "op/index_reduce.hpp"
 #include "op/layer_norm.hpp"
 #include "op/leaky_relu.hpp"
 #include "op/log_softmax.hpp"
+#include "op/loop.hpp"
 #include "op/lstm.hpp"
 #include "op/matmul.hpp"
 #include "op/memory_copy.hpp"
@@ -189,12 +191,17 @@ namespace nnfusion
                 REGISTER_OPERATOR("Greater", 1, TranslateBinaryOp<op::Greater>);
                 //REGISTER_OPERATOR("HardSigmoid", 1, hard_sigmoid);
                 REGISTER_OPERATOR("Identity", 1, TranslateIdentityOp);
+                // REGISTER_OPERATOR("If", 1, TranslateIfOp);
+                REGISTER_OPERATOR(
+                    "If", 1, TranslateIdentityOp); // TODO(lingm): fix convert_func map
                 REGISTER_OPERATOR("LayerNormalization", 1, TranslateLayerNormalizationOp);
                 REGISTER_OPERATOR("LayerNormalizationGrad", 1, TranslateLayerNormalizationGradOp);
                 REGISTER_OPERATOR("LeakyRelu", 1, TranslateLeakyReluOp);
                 REGISTER_OPERATOR("Less", 1, TranslateBinaryOp<op::Less>);
                 REGISTER_OPERATOR("Log", 1, TranslateUnaryOp<op::Log>);
                 REGISTER_OPERATOR("LogSoftmax", 1, TranslateLogSoftmaxOp);
+                REGISTER_OPERATOR(
+                    "Loop", 1, TranslateIdentityOp); // TODO(lingm): fix convert_func map
                 //REGISTER_OPERATOR("LRN", 1, lrn);
                 REGISTER_OPERATOR("LSTM", 1, TranslateLstmOp);
                 REGISTER_OPERATOR("MatMul", 1, TranslateMatmulOp);
diff --git a/src/nnfusion/frontend/onnx_import/util/graph_convert.cpp b/src/nnfusion/frontend/onnx_import/util/graph_convert.cpp
index d514a6ee3..03e9ff9ac 100644
--- a/src/nnfusion/frontend/onnx_import/util/graph_convert.cpp
+++ b/src/nnfusion/frontend/onnx_import/util/graph_convert.cpp
@@ -22,9 +22,12 @@
 #include "graph_convert.hpp"
 #include <sys/stat.h>
 #include <type_traits>
+#include "../op/if.hpp"
+#include "../op/loop.hpp"
 #include "nnfusion/core/operators/generic_op/generic_op.hpp"
 #include "op/custom_op.hpp"
 #include "ops_bridge.hpp"
+#include "util.hpp"
 
 DECLARE_bool(ftraining_mode);
 
@@ -210,116 +213,22 @@ namespace nnfusion
                         tensor.set_raw_data(raw_data);
                     }
                 }
-            }
-
-            GraphConvert::GraphConvert(const onnx::ModelProto& model_proto,
-                                       const std::unordered_map<std::string, size_t>& dim_params,
-                                       const string& model_dir)
-                : onnx_model_proto{&model_proto}
-                , onnx_graph_proto(&(model_proto.graph()))
-                , m_graph(new nnfusion::graph::Graph())
+            } // namespace
+
+            GraphProtoConvert::GraphProtoConvert(
+                const onnx::GraphProto& graph_proto,
+                const std::unordered_map<std::string, ConvertFuncMap>& domain_convert_func_map,
+                const string& model_dir,
+                const std::unordered_map<std::string, size_t>& dim_params,
+                const NodeMap& node_map,
+                bool flag_subgraph)
+                : onnx_graph_proto(&graph_proto)
+                , m_domain_convert_func_map(domain_convert_func_map)
+                , m_model_dir(model_dir)
                 , m_dim_params(dim_params)
-                , model_dir(model_dir)
+                , m_node_map(node_map)
+                , m_flag_subgraph(flag_subgraph)
             {
-                print_model_proto(model_proto);
-
-                // Note: onnx connect nodes by tensor's name instead of op name
-                /*
-                ir_version: 3
-                producer_name: "ngraph ONNXImporter"
-                graph {
-                node {
-                    input: "A"
-                    input: "B"
-                    output: "X"
-                    name: "add_node1"
-                    op_type: "Add"
-                }
-                node {
-                    input: "X"
-                    input: "C"
-                    output: "Y"
-                    name: "add_node2"
-                    op_type: "Add"
-                }
-                name: "test_graph"
-                input {
-                    name: "A"
-                    type {
-                    tensor_type {
-                        elem_type: FLOAT
-                        shape {
-                        dim {
-                            dim_value: 1
-                        }
-                        }
-                    }
-                    }
-                }
-                input {
-                    name: "B"
-                    type {
-                    tensor_type {
-                        elem_type: FLOAT
-                        shape {
-                        dim {
-                            dim_value: 1
-                        }
-                        }
-                    }
-                    }
-                }
-                input {
-                    name: "C"
-                    type {
-                    tensor_type {
-                        elem_type: FLOAT
-                        shape {
-                        dim {
-                            dim_value: 1
-                        }
-                        }
-                    }
-                    }
-                }
-                output {
-                    name: "Y"
-                    type {
-                    tensor_type {
-                        elem_type: FLOAT
-                        shape {
-                        dim {
-                            dim_value: 1
-                        }
-                        }
-                    }
-                    }
-                }
-                }
-                opset_import {
-                version: 4
-                }
-                */
-                NNFUSION_LOG(INFO) << "Converting Onnx Graph";
-                // Walk through the elements of opset_import field and register operator sets
-                // for each domain. An exception UnknownDomain() will raise if the domain is
-                // unknown or invalid.
-                for (const auto& id : onnx_model_proto->opset_import())
-                {
-                    m_domain_convert_func_map.emplace(
-                        id.domain(),
-                        OperatorsBridge::get_convert_func_map(
-                            id.version(), (id.domain() == "ai.onnx" ? "" : id.domain())));
-                }
-                // onnx.proto(.3): the empty string ("") for domain or absence of opset_import field
-                // implies the operator set that is defined as part of the ONNX specification.
-                const auto dm = m_domain_convert_func_map.find("");
-                if (dm == std::end(m_domain_convert_func_map))
-                {
-                    m_domain_convert_func_map.emplace(
-                        "", OperatorsBridge::get_convert_func_map(ONNX_OPSET_VERSION, ""));
-                }
-
                 m_graph = std::make_shared<nnfusion::graph::Graph>();
 
                 NNFUSION_CHECK(onnx_graph_proto->sparse_initializer_size() == 0)
@@ -334,7 +243,7 @@ namespace nnfusion
                 {
                     if (tensor.has_name())
                     {
-                        move_external_to_rawdata(tensor, model_dir);
+                        move_external_to_rawdata(tensor, m_model_dir);
                         if (FLAGS_ftraining_mode)
                         {
                             element::Type type;
@@ -365,37 +274,24 @@ namespace nnfusion
                     }
                 }
                 // Process all ONNX graph inputs, convert them to NNFusion nodes
-                for (const auto& input_proto : onnx_graph_proto->input())
+                if (m_flag_subgraph)
                 {
-                    ValueInfo input_value_info(input_proto, m_dim_params);
-                    std::shared_ptr<graph::GNode> input_gnode;
-                    // TODO: parameter might have default value in initializer
-                    auto it = m_node_map.find(input_proto.name());
-                    if (it != std::end(m_node_map))
+                    for (const auto& input_proto : onnx_graph_proto->input())
                     {
-                        NNFUSION_LOG(NNFUSION_WARNING) << "Ignore input: " << input_proto.name()
-                                                       << ", because it has a default initializers";
-                        NNFUSION_CHECK(it->second.size() == 1)
-                            << "Multi outputs found for initializer " << input_proto.name();
-                        if (it->second[0].get_element_type() != input_value_info.get_element_type())
+                        std::shared_ptr<graph::GNode> input_gnode;
+                        auto it = m_node_map.find(input_proto.name());
+                        std::shared_ptr<op::Parameter> input_op;
+                        if (it != std::end(m_node_map))
                         {
-                            auto cast_op =
-                                std::make_shared<op::Convert>(input_value_info.get_element_type());
-                            cast_op->set_name(input_proto.name());
-                            auto input_gnode = m_graph->add_node_and_edge(cast_op, it->second);
-                            m_node_map[input_proto.name()] = {GNodeIndex{input_gnode}};
-                            if (m_output_names.find(input_gnode->get_name()) !=
-                                m_output_names.end())
-                            {
-                                // TODO: should specify which output of current gnode
-                                m_graph_outputs.emplace_back(input_gnode);
-                            }
+                            input_op = std::make_shared<op::Parameter>(
+                                it->second[0].get_element_type(), it->second[0].get_shape());
+                        }
+                        else
+                        {
+                            ValueInfo input_value_info(input_proto, m_dim_params);
+                            input_op = std::make_shared<op::Parameter>(
+                                input_value_info.get_element_type(), input_value_info.get_shape());
                         }
-                    }
-                    else
-                    {
-                        auto input_op = std::make_shared<op::Parameter>(
-                            input_value_info.get_element_type(), input_value_info.get_shape());
                         input_op->set_name(input_proto.name());
                         input_gnode = m_graph->add_node_and_edge(input_op, graph::GNodeVector({}));
                         m_node_map[input_proto.name()] = {GNodeIndex{input_gnode}};
@@ -406,38 +302,52 @@ namespace nnfusion
                         }
                     }
                 }
-
-                // Verify that ONNX graph contains only nodes of available operator types
+                else
                 {
-                    std::unordered_map<std::string, int64> domain2version;
-                    for (const auto& id : onnx_model_proto->opset_import())
-                    {
-                        if (id.domain() == "com.microsoft.nnfusion.custom")
-                        {
-                            continue;
-                        }
-                        domain2version[id.domain() == "ai.onnx" ? "" : id.domain()] = id.version();
-                    }
-                    std::unordered_set<std::string> unknown_ops;
-                    for (const auto& node_proto : onnx_graph_proto->node())
+                    for (const auto& input_proto : onnx_graph_proto->input())
                     {
-                        if (!is_operator_available(node_proto))
+                        ValueInfo input_value_info(input_proto, m_dim_params);
+                        std::shared_ptr<graph::GNode> input_gnode;
+                        // TODO: parameter might have default value in initializer
+                        auto it = m_node_map.find(input_proto.name());
+                        if (it != std::end(m_node_map))
                         {
-                            std::string op =
-                                ((node_proto.domain() == "ai.onnx") ? ""
-                                                                    : node_proto.domain() + ".") +
-                                node_proto.op_type() + ":" +
-                                std::to_string(domain2version.at(node_proto.domain()));
-                            unknown_ops.insert(op);
+                            NNFUSION_LOG(NNFUSION_WARNING)
+                                << "Ignore input: " << input_proto.name()
+                                << ", because it has a default initializers";
+                            NNFUSION_CHECK(it->second.size() == 1)
+                                << "Multi outputs found for initializer " << input_proto.name();
+                            if (it->second[0].get_element_type() !=
+                                input_value_info.get_element_type())
+                            {
+                                auto cast_op = std::make_shared<op::Convert>(
+                                    input_value_info.get_element_type());
+                                cast_op->set_name(input_proto.name());
+                                auto input_gnode = m_graph->add_node_and_edge(cast_op, it->second);
+                                m_node_map[input_proto.name()] = {GNodeIndex{input_gnode}};
+                                if (m_output_names.find(input_gnode->get_name()) !=
+                                    m_output_names.end())
+                                {
+                                    // TODO: should specify which output of current gnode
+                                    m_graph_outputs.emplace_back(input_gnode);
+                                }
+                            }
                         }
-                    }
-                    if (unknown_ops.size() > 0)
-                    {
-                        for (auto op : unknown_ops)
+                        else
                         {
-                            NNFUSION_LOG(ERROR) << "Unsupported op: " << op;
+                            auto input_op = std::make_shared<op::Parameter>(
+                                input_value_info.get_element_type(), input_value_info.get_shape());
+                            input_op->set_name(input_proto.name());
+                            input_gnode =
+                                m_graph->add_node_and_edge(input_op, graph::GNodeVector({}));
+                            m_node_map[input_proto.name()] = {GNodeIndex{input_gnode}};
+                            if (m_output_names.find(input_gnode->get_name()) !=
+                                m_output_names.end())
+                            {
+                                // TODO: should specify which output of current gnode
+                                m_graph_outputs.emplace_back(input_gnode);
+                            }
                         }
-                        NNFUSION_CHECK_FAIL() << "Unsupported op count: " << unknown_ops.size();
                     }
                 }
 
@@ -496,15 +406,35 @@ namespace nnfusion
 
                 m_graph->set_default_parameters();
                 m_graph->set_outputs(m_graph_outputs);
-
-                NNFUSION_LOG(INFO) << "convert graph done";
             }
 
-            NamedNodeVector GraphConvert::convert_node(const onnx::NodeProto& node_proto)
+            NamedNodeVector GraphProtoConvert::convert_node(const onnx::NodeProto& node_proto)
             {
                 NNFUSION_LOG(INFO) << "convert node: " << node_proto.name();
-                NamedNodeVector ret = get_convert_func(node_proto.op_type(), node_proto.domain())(
-                    node_proto, m_node_map, m_graph);
+                NamedNodeVector ret;
+                if (node_proto.op_type() == "If")
+                {
+                    ret = set_1::TranslateIfOp(node_proto,
+                                               m_node_map,
+                                               m_graph,
+                                               m_domain_convert_func_map,
+                                               m_model_dir,
+                                               m_dim_params);
+                }
+                else if (node_proto.op_type() == "Loop")
+                {
+                    ret = set_1::TranslateLoopOp(node_proto,
+                                                 m_node_map,
+                                                 m_graph,
+                                                 m_domain_convert_func_map,
+                                                 m_model_dir,
+                                                 m_dim_params);
+                }
+                else
+                {
+                    ret = get_convert_func(node_proto.op_type(),
+                                           node_proto.domain())(node_proto, m_node_map, m_graph);
+                }
                 for (int i = 0; i < ret.size(); i++)
                 {
                     NNFUSION_LOG(INFO) << "node " << node_proto.name() << ", output " << ret[i].name
@@ -513,8 +443,8 @@ namespace nnfusion
                 return std::move(ret);
             }
 
-            const ConvertFunc& GraphConvert::get_convert_func(const std::string& name,
-                                                              const std::string& domain) const
+            const ConvertFunc& GraphProtoConvert::get_convert_func(const std::string& name,
+                                                                   const std::string& domain) const
             {
                 if (domain == "com.microsoft.nnfusion.custom")
                 {
@@ -531,6 +461,203 @@ namespace nnfusion
                 return op->second;
             }
 
+            GraphConvert::GraphConvert(const onnx::ModelProto& model_proto,
+                                       const std::unordered_map<std::string, size_t>& dim_params,
+                                       const string& model_dir)
+                : onnx_model_proto{&model_proto}
+                , onnx_graph_proto(&(model_proto.graph()))
+                , m_graph(new nnfusion::graph::Graph())
+                , m_dim_params(dim_params)
+                , m_model_dir(model_dir)
+            {
+                print_model_proto(model_proto);
+
+                // Note: onnx connect nodes by tensor's name instead of op name
+                /*
+                ir_version: 3
+                producer_name: "ngraph ONNXImporter"
+                graph {
+                node {
+                    input: "A"
+                    input: "B"
+                    output: "X"
+                    name: "add_node1"
+                    op_type: "Add"
+                }
+                node {
+                    input: "X"
+                    input: "C"
+                    output: "Y"
+                    name: "add_node2"
+                    op_type: "Add"
+                }
+                name: "test_graph"
+                input {
+                    name: "A"
+                    type {
+                    tensor_type {
+                        elem_type: FLOAT
+                        shape {
+                        dim {
+                            dim_value: 1
+                        }
+                        }
+                    }
+                    }
+                }
+                input {
+                    name: "B"
+                    type {
+                    tensor_type {
+                        elem_type: FLOAT
+                        shape {
+                        dim {
+                            dim_value: 1
+                        }
+                        }
+                    }
+                    }
+                }
+                input {
+                    name: "C"
+                    type {
+                    tensor_type {
+                        elem_type: FLOAT
+                        shape {
+                        dim {
+                            dim_value: 1
+                        }
+                        }
+                    }
+                    }
+                }
+                output {
+                    name: "Y"
+                    type {
+                    tensor_type {
+                        elem_type: FLOAT
+                        shape {
+                        dim {
+                            dim_value: 1
+                        }
+                        }
+                    }
+                    }
+                }
+                }
+                opset_import {
+                version: 4
+                }
+                */
+                NNFUSION_LOG(INFO) << "Converting Onnx Graph";
+                // Walk through the elements of opset_import field and register operator sets
+                // for each domain. An exception UnknownDomain() will raise if the domain is
+                // unknown or invalid.
+                for (const auto& id : onnx_model_proto->opset_import())
+                {
+                    m_domain_convert_func_map.emplace(
+                        id.domain(),
+                        OperatorsBridge::get_convert_func_map(
+                            id.version(), (id.domain() == "ai.onnx" ? "" : id.domain())));
+                }
+                // onnx.proto(.3): the empty string ("") for domain or absence of opset_import field
+                // implies the operator set that is defined as part of the ONNX specification.
+                const auto dm = m_domain_convert_func_map.find("");
+                if (dm == std::end(m_domain_convert_func_map))
+                {
+                    m_domain_convert_func_map.emplace(
+                        "", OperatorsBridge::get_convert_func_map(ONNX_OPSET_VERSION, ""));
+                }
+
+                // Verify that ONNX graph contains only nodes of available operator types
+                {
+                    std::unordered_map<std::string, int64> domain2version;
+                    for (const auto& id : onnx_model_proto->opset_import())
+                    {
+                        if (id.domain() == "com.microsoft.nnfusion.custom")
+                        {
+                            continue;
+                        }
+                        domain2version[id.domain() == "ai.onnx" ? "" : id.domain()] = id.version();
+                    }
+                    std::unordered_set<std::string> unknown_ops;
+                    for (const auto& node_proto : onnx_graph_proto->node())
+                    {
+                        if (!is_operator_available(node_proto))
+                        {
+                            std::string op =
+                                ((node_proto.domain() == "ai.onnx") ? ""
+                                                                    : node_proto.domain() + ".") +
+                                node_proto.op_type() + ":" +
+                                std::to_string(domain2version.at(node_proto.domain()));
+                            unknown_ops.insert(op);
+                        }
+                    }
+                    if (unknown_ops.size() > 0)
+                    {
+                        for (auto op : unknown_ops)
+                        {
+                            NNFUSION_LOG(ERROR) << "Unsupported op: " << op;
+                        }
+                        NNFUSION_CHECK_FAIL() << "Unsupported op count: " << unknown_ops.size();
+                    }
+                }
+
+                // m_controlflow_graphproto_map = construct_controlflow_graphproto(*onnx_graph_proto);
+
+                m_graph = convert_graph(*onnx_graph_proto);
+
+                NNFUSION_LOG(INFO) << "convert graph done";
+            }
+
+            std::shared_ptr<nnfusion::graph::Graph>
+                GraphConvert::convert_graph(const onnx::GraphProto& graph_proto,
+                                            const NodeMap& node_map)
+            {
+                GraphProtoConvert converter = GraphProtoConvert(graph_proto,
+                                                                m_domain_convert_func_map,
+                                                                m_model_dir,
+                                                                m_dim_params,
+                                                                node_map,
+                                                                false);
+                return converter.get_graph();
+            }
+
+            // std::unordered_map<std::string, onnx::GraphProto>
+            //     GraphConvert::construct_controlflow_graphproto(const onnx::GraphProto& graph_proto)
+            // {
+            //     // currently, this function does not support nested controlflow
+            //     std::unordered_map<std::string, onnx::GraphProto> controlflow_graphproto_map;
+            //     // std::vector<onnx::NodeProto> unsorted_nodes(std::begin(onnx_graph_proto->node()),
+            //     //                                             std::end(onnx_graph_proto->node()));
+            //     // auto tensorproto_map = extract_tensorproto(graph_proto);
+            //     for (auto node_proto : graph_proto.node())
+            //     {
+            //         if (node_proto.op_type() == "If")
+            //         {
+            //             Node node(node_proto);
+            //             controlflow_graphproto_map[node_proto.name() + "_If_then_branch"] =
+            //                 complete_graphproto(
+            //                     node.get_attribute_value<onnx::GraphProto>("then_branch"));
+            //             controlflow_graphproto_map[node_proto.name() + "_If_else_branch"] =
+            //                 complete_graphproto(
+            //                     node.get_attribute_value<onnx::GraphProto>("else_branch"));
+            //         }
+            //         else if (node_proto.op_type() == "Loop")
+            //         {
+            //             Node node(node_proto);
+            //             controlflow_graphproto_map[node_proto.name() + "_Loop_body"] =
+            //                 complete_graphproto(node.get_attribute_value<onnx::GraphProto>("body"));
+            //         }
+            //         // else if (node_proto.op_type() == "Scan")
+            //         // {
+            //         //     //
+            //         // }
+            //     }
+
+            //     return controlflow_graphproto_map;
+            // }
+
             bool GraphConvert::is_operator_available(const onnx::NodeProto& node_proto) const
             {
                 if (node_proto.domain() == "com.microsoft.nnfusion.custom")
diff --git a/src/nnfusion/frontend/onnx_import/util/graph_convert.hpp b/src/nnfusion/frontend/onnx_import/util/graph_convert.hpp
index cb7041abd..7ca17d2a2 100644
--- a/src/nnfusion/frontend/onnx_import/util/graph_convert.hpp
+++ b/src/nnfusion/frontend/onnx_import/util/graph_convert.hpp
@@ -38,6 +38,49 @@ namespace nnfusion
     {
         namespace onnx_import
         {
+            class GraphProtoConvert
+            {
+            public:
+                GraphProtoConvert(
+                    const onnx::GraphProto& graph_proto,
+                    const std::unordered_map<std::string, ConvertFuncMap>& domain_convert_func_map,
+                    const string& model_dir,
+                    const std::unordered_map<std::string, size_t>& dim_params = {},
+                    const NodeMap& _node_map = NodeMap(),
+                    bool flag_subgraph = false);
+
+                std::shared_ptr<nnfusion::graph::Graph> get_graph() { return m_graph; }
+                const onnx::GraphProto& get_onnx_proto_graph() const { return *onnx_graph_proto; }
+                NamedNodeVector convert_node(const onnx::NodeProto& node_proto);
+
+                /// \brief Access an operator object by its type name and domain name
+                /// The function will return the operator object if it exists, or report an error
+                /// in case of domain or operator absence.
+                /// \param name       type name of the operator object,
+                /// \param domain     domain name of the operator object.
+                /// \return Reference to the operator object.
+                const ConvertFunc& get_convert_func(const std::string& name,
+                                                    const std::string& domain) const;
+
+            private:
+                const onnx::GraphProto* onnx_graph_proto;
+
+                std::shared_ptr<nnfusion::graph::Graph> m_graph;
+
+                std::unordered_map<std::string, ConvertFuncMap> m_domain_convert_func_map;
+
+                NodeMap m_node_map;
+
+                // TODO: to be removed
+                std::set<std::string> m_output_names;
+
+                graph::GNodeVector m_graph_outputs;
+
+                std::unordered_map<std::string, size_t> m_dim_params;
+                std::string m_model_dir;
+
+                bool m_flag_subgraph;
+            };
             class GraphConvert
             {
             public:
@@ -66,16 +109,19 @@ namespace nnfusion
                     return onnx_model_proto->producer_version();
                 }
 
-                NamedNodeVector convert_node(const onnx::NodeProto& node_proto);
+                /// \brief Convert ONNX::GraphProto to nnfusion graph
+                /// \param graph_proto ONNX GraphProto
+                /// \param _node_map pre-provided node_map, empty by default
+                /// \return std::shared_ptr<nnfusion::graph::Graph>
+                std::shared_ptr<nnfusion::graph::Graph>
+                    convert_graph(const onnx::GraphProto& graph_proto,
+                                  const NodeMap& _node_map = NodeMap());
 
-                /// \brief Access an operator object by its type name and domain name
-                /// The function will return the operator object if it exists, or report an error
-                /// in case of domain or operator absence.
-                /// \param name       type name of the operator object,
-                /// \param domain     domain name of the operator object.
-                /// \return Reference to the operator object.
-                const ConvertFunc& get_convert_func(const std::string& name,
-                                                    const std::string& domain) const;
+                // /// \brief Construct complete GraphProtos for sub-graphs in control-flow nodes (e.g., If, Loop) by adding the missing information (i.e., inputs) of the GraphProto, which could be processed by GraphProtoConvert to get nnfusion graph
+                // /// \param graph_proto the graph_proto of the ONNX model
+                // /// \returns unordered_map<controlflow_node.name, onnx::GraphProto>
+                // std::unordered_map<std::string, onnx::GraphProto>
+                //     construct_controlflow_graphproto(const onnx::GraphProto& graph_proto);
 
                 /// \brief Check availability of operator base on NodeProto.
                 /// \return `true` if the operator is available, otherwise it returns `false`.
@@ -89,15 +135,17 @@ namespace nnfusion
 
                 std::unordered_map<std::string, ConvertFuncMap> m_domain_convert_func_map;
 
-                NodeMap m_node_map;
+                // std::unordered_map<std::string, onnx::GraphProto> m_controlflow_graphproto_map;
+
+                // NodeMap m_node_map;
 
                 // TODO: to be removed
-                std::set<std::string> m_output_names;
+                // std::set<std::string> m_output_names;
 
-                graph::GNodeVector m_graph_outputs;
+                // graph::GNodeVector m_graph_outputs;
 
                 std::unordered_map<std::string, size_t> m_dim_params;
-                std::string model_dir;
+                std::string m_model_dir;
             };
         } // namespace onnx_import
     }     // namespace frontend

From 1a30651a7a88e7cf01abca0f2c72afb07914a6e5 Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@gmail.com>
Date: Thu, 15 Jul 2021 12:38:27 +0000
Subject: [PATCH 06/17] draft: cuda kernel emitter for if and loop op
 (placeholder)

---
 .../core/kernels/cuda_gpu/kernels/if.cpp      | 39 +++++++++++++++++++
 .../core/kernels/cuda_gpu/kernels/if.hpp      | 25 ++++++++++++
 .../core/kernels/cuda_gpu/kernels/loop.cpp    | 39 +++++++++++++++++++
 .../core/kernels/cuda_gpu/kernels/loop.hpp    | 25 ++++++++++++
 4 files changed, 128 insertions(+)
 create mode 100644 src/nnfusion/core/kernels/cuda_gpu/kernels/if.cpp
 create mode 100644 src/nnfusion/core/kernels/cuda_gpu/kernels/if.hpp
 create mode 100644 src/nnfusion/core/kernels/cuda_gpu/kernels/loop.cpp
 create mode 100644 src/nnfusion/core/kernels/cuda_gpu/kernels/loop.hpp

diff --git a/src/nnfusion/core/kernels/cuda_gpu/kernels/if.cpp b/src/nnfusion/core/kernels/cuda_gpu/kernels/if.cpp
new file mode 100644
index 000000000..9109ca43a
--- /dev/null
+++ b/src/nnfusion/core/kernels/cuda_gpu/kernels/if.cpp
@@ -0,0 +1,39 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+#include "if.hpp"
+#include "../cuda_cudnn.hpp"
+#include "convolution.hpp"
+
+using namespace nnfusion;
+using namespace nnfusion::kernels;
+
+cuda::If::If(shared_ptr<KernelContext> ctx)
+    : KernelEmitter(ctx)
+{
+    std::stringstream tag;
+    tag << "_IfOP";
+    custom_tag = tag.str();
+}
+
+LanguageUnit_p cuda::If::emit_function_body()
+{
+    LanguageUnit_p _lu(new LanguageUnit(get_function_name()));
+    auto& lu = *_lu;
+
+    // function signature:
+    // extern "C" __global__ void kernel(m_context->dtypes[0]* input0, m_context->dtypes[0]* input1, m_context->dtypes[2]* output0)
+    lu << "// TODO\n";
+    return _lu;
+}
+
+LanguageUnit_p cuda::If::emit_dependency()
+{
+    LanguageUnit_p _lu(new LanguageUnit(get_function_name() + "_dep"));
+    _lu->require(header::cuda);
+    return _lu;
+}
+
+REGISTER_KERNEL_EMITTER("If",                                                      // op_name
+                        Device(CUDA_GPU).TypeConstraint(element::f32).Priority(2), // attrs
+                        cuda::If)
\ No newline at end of file
diff --git a/src/nnfusion/core/kernels/cuda_gpu/kernels/if.hpp b/src/nnfusion/core/kernels/cuda_gpu/kernels/if.hpp
new file mode 100644
index 000000000..f7f957705
--- /dev/null
+++ b/src/nnfusion/core/kernels/cuda_gpu/kernels/if.hpp
@@ -0,0 +1,25 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+#pragma once
+#include "../cuda_emitter.hpp"
+#include "../cuda_langunit.hpp"
+
+namespace nnfusion
+{
+    namespace kernels
+    {
+        namespace cuda
+        {
+            class If : public KernelEmitter
+            {
+            public:
+                If(shared_ptr<KernelContext> ctx);
+
+                LanguageUnit_p emit_function_body() override;
+                LanguageUnit_p emit_dependency() override;
+                // LanguageUnit_p emit_function_signature() override;
+            };
+        } // namespace cuda
+    }     // namespace kernels
+} // namespace nnfusion
\ No newline at end of file
diff --git a/src/nnfusion/core/kernels/cuda_gpu/kernels/loop.cpp b/src/nnfusion/core/kernels/cuda_gpu/kernels/loop.cpp
new file mode 100644
index 000000000..3e2c6c8d6
--- /dev/null
+++ b/src/nnfusion/core/kernels/cuda_gpu/kernels/loop.cpp
@@ -0,0 +1,39 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+#include "loop.hpp"
+#include "../cuda_cudnn.hpp"
+#include "convolution.hpp"
+
+using namespace nnfusion;
+using namespace nnfusion::kernels;
+
+cuda::Loop::Loop(shared_ptr<KernelContext> ctx)
+    : KernelEmitter(ctx)
+{
+    std::stringstream tag;
+    tag << "_LoopOP";
+    custom_tag = tag.str();
+}
+
+LanguageUnit_p cuda::Loop::emit_function_body()
+{
+    LanguageUnit_p _lu(new LanguageUnit(get_function_name()));
+    auto& lu = *_lu;
+
+    // function signature:
+    // extern "C" __global__ void kernel(m_context->dtypes[0]* input0, m_context->dtypes[0]* input1, m_context->dtypes[2]* output0)
+    lu << "// TODO\n";
+    return _lu;
+}
+
+LanguageUnit_p cuda::Loop::emit_dependency()
+{
+    LanguageUnit_p _lu(new LanguageUnit(get_function_name() + "_dep"));
+    _lu->require(header::cuda);
+    return _lu;
+}
+
+REGISTER_KERNEL_EMITTER("Loop",                                                    // op_name
+                        Device(CUDA_GPU).TypeConstraint(element::f32).Priority(2), // attrs
+                        cuda::Loop)
\ No newline at end of file
diff --git a/src/nnfusion/core/kernels/cuda_gpu/kernels/loop.hpp b/src/nnfusion/core/kernels/cuda_gpu/kernels/loop.hpp
new file mode 100644
index 000000000..58ae29241
--- /dev/null
+++ b/src/nnfusion/core/kernels/cuda_gpu/kernels/loop.hpp
@@ -0,0 +1,25 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+#pragma once
+#include "../cuda_emitter.hpp"
+#include "../cuda_langunit.hpp"
+
+namespace nnfusion
+{
+    namespace kernels
+    {
+        namespace cuda
+        {
+            class Loop : public KernelEmitter
+            {
+            public:
+                Loop(shared_ptr<KernelContext> ctx);
+
+                LanguageUnit_p emit_function_body() override;
+                LanguageUnit_p emit_dependency() override;
+                // LanguageUnit_p emit_function_signature() override;
+            };
+        } // namespace cuda
+    }     // namespace kernels
+} // namespace nnfusion
\ No newline at end of file

From 436d7d31a701e1a60ac0f1a9594f734bdf73d83b Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@gmail.com>
Date: Thu, 15 Jul 2021 13:00:17 +0000
Subject: [PATCH 07/17] update onnx frontend convert and shape inference for
 loop op

---
 src/nnfusion/core/operators/op_define/loop.cpp | 2 +-
 src/nnfusion/frontend/onnx_import/op/loop.cpp  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/nnfusion/core/operators/op_define/loop.cpp b/src/nnfusion/core/operators/op_define/loop.cpp
index 9da8b2dea..45a1c8a5a 100644
--- a/src/nnfusion/core/operators/op_define/loop.cpp
+++ b/src/nnfusion/core/operators/op_define/loop.cpp
@@ -48,7 +48,7 @@ void Loop::validate_and_infer_types(std::shared_ptr<graph::GNode> gnode)
     NNFUSION_CHECK(cond_et == nnfusion::element::boolean)
         << "The condition tensor of the Loop operation mush be boolean.";
 
-    for (size_t i = 0; i < m_output_types.size(); i++)
+    for (size_t i = 0; i < gnode->get_output_size(); i++)
     {
         gnode->set_output_type_and_shape(i, m_output_types[i], m_output_shapes[i]);
     }
diff --git a/src/nnfusion/frontend/onnx_import/op/loop.cpp b/src/nnfusion/frontend/onnx_import/op/loop.cpp
index ff3643732..033077630 100644
--- a/src/nnfusion/frontend/onnx_import/op/loop.cpp
+++ b/src/nnfusion/frontend/onnx_import/op/loop.cpp
@@ -247,9 +247,9 @@ namespace nnfusion
 
                     std::vector<nnfusion::PartialShape> output_shapes;
                     std::vector<nnfusion::element::Type> output_types;
-                    for (auto output : loop_body_graph_proto.output())
+                    for (size_t i = 1; i < loop_body_graph_proto.output().size(); i++)
                     {
-                        ValueInfo output_value_info(output, dim_params);
+                        ValueInfo output_value_info(loop_body_graph_proto.output()[i], dim_params);
                         output_shapes.push_back(output_value_info.get_shape());
                         output_types.push_back(output_value_info.get_element_type());
                     }

From a2863d7bddbd2b67fbf6254e695daf2c61645a67 Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@gmail.com>
Date: Tue, 20 Jul 2021 08:36:41 +0000
Subject: [PATCH 08/17] fix output size bug in ONNX Loop op convert

---
 src/nnfusion/frontend/onnx_import/op/loop.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/nnfusion/frontend/onnx_import/op/loop.cpp b/src/nnfusion/frontend/onnx_import/op/loop.cpp
index 033077630..5600b74b5 100644
--- a/src/nnfusion/frontend/onnx_import/op/loop.cpp
+++ b/src/nnfusion/frontend/onnx_import/op/loop.cpp
@@ -257,7 +257,8 @@ namespace nnfusion
                     auto loop_op =
                         std::make_shared<op::Loop>(loop_body_graph, output_shapes, output_types);
                     loop_op->set_name(node_proto.name());
-                    auto loop_gnode = m_graph->add_node_and_edge(loop_op, input_indexes);
+                    auto loop_gnode = m_graph->add_node_and_edge(
+                        loop_op, input_indexes, /* output_size */ node_proto.output_size());
 
                     NamedNodeVector ret;
                     for (size_t i = 0; i < node_proto.output_size(); i++)

From b7b66b2e448f3835fde38e8c3713fae47156f9c9 Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@gmail.com>
Date: Tue, 20 Jul 2021 08:39:47 +0000
Subject: [PATCH 09/17] Generic_op_define and ONNX opset_11 frontend converter
 for ScatterND op

---
 .../generic_op_define/ScatterND.cpp           | 11 ++++
 .../frontend/onnx_import/CMakeLists.txt       |  2 +
 .../frontend/onnx_import/op/scatter.cpp       | 60 +++++++++++++++++++
 .../frontend/onnx_import/op/scatter.hpp       | 44 ++++++++++++++
 .../frontend/onnx_import/ops_bridge.cpp       |  2 +
 5 files changed, 119 insertions(+)
 create mode 100644 src/nnfusion/core/operators/generic_op/generic_op_define/ScatterND.cpp
 create mode 100644 src/nnfusion/frontend/onnx_import/op/scatter.cpp
 create mode 100644 src/nnfusion/frontend/onnx_import/op/scatter.hpp

diff --git a/src/nnfusion/core/operators/generic_op/generic_op_define/ScatterND.cpp b/src/nnfusion/core/operators/generic_op/generic_op_define/ScatterND.cpp
new file mode 100644
index 000000000..cc07c09d2
--- /dev/null
+++ b/src/nnfusion/core/operators/generic_op/generic_op_define/ScatterND.cpp
@@ -0,0 +1,11 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT License.
+
+#include "nnfusion/core/operators/generic_op/generic_op.hpp"
+
+REGISTER_OP(ScatterND).infershape([](std::shared_ptr<graph::GNode> gnode) -> void {
+    NNFUSION_CHECK(gnode->get_input_size() == 3);
+
+    gnode->set_output_type_and_shape(
+        0, gnode->get_input_element_type(0), gnode->get_input_shape(0));
+});
\ No newline at end of file
diff --git a/src/nnfusion/frontend/onnx_import/CMakeLists.txt b/src/nnfusion/frontend/onnx_import/CMakeLists.txt
index 72eaa46c1..7367b39a7 100644
--- a/src/nnfusion/frontend/onnx_import/CMakeLists.txt
+++ b/src/nnfusion/frontend/onnx_import/CMakeLists.txt
@@ -64,6 +64,8 @@ add_library(onnx_import STATIC
         op/resize.cpp
         op/conv.hpp
         op/conv.cpp
+        op/scatter.hpp
+        op/scatter.cpp
         op/shape.hpp
         op/shape.cpp
         op/conv_trans.hpp
diff --git a/src/nnfusion/frontend/onnx_import/op/scatter.cpp b/src/nnfusion/frontend/onnx_import/op/scatter.cpp
new file mode 100644
index 000000000..af82b9f33
--- /dev/null
+++ b/src/nnfusion/frontend/onnx_import/op/scatter.cpp
@@ -0,0 +1,60 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+//----------------------------------------------------------------------------------------------
+//  Copyright (c) Microsoft Corporation. All rights reserved.
+//  Licensed under the MIT License. See License.txt in the project root for license information.
+//----------------------------------------------------------------------------------------------
+
+#include <vector>
+
+#include "nnfusion/core/operators/generic_op/generic_op.hpp"
+#include "nnfusion/frontend/util/evaluator.hpp"
+#include "scatter.hpp"
+
+namespace nnfusion
+{
+    namespace frontend
+    {
+        namespace onnx_import
+        {
+            namespace set_11
+            {
+                NamedNodeVector
+                    TranslateScatterNDOp(const onnx::NodeProto& node_proto,
+                                         const NodeMap& all_ng_nodes,
+                                         std::shared_ptr<nnfusion::graph::Graph> m_graph)
+                {
+                    auto input_indexes = GetAllInputIndex(all_ng_nodes, node_proto);
+
+                    Node node(node_proto);
+
+                    nnfusion::op::OpConfig::any myConfig;
+
+                    auto generic_op = std::make_shared<nnfusion::op::GenericOp>(
+                        node_proto.output(0), "ScatterND", myConfig);
+                    auto generic_gnode = m_graph->add_node_and_edge(generic_op, input_indexes);
+
+                    return {{node_proto.output(0), generic_gnode}};
+                }
+
+            } // namespace set_1
+
+        } //namespace onnx_import
+
+    } // namespace frontend
+
+} // namespace nnfusion
diff --git a/src/nnfusion/frontend/onnx_import/op/scatter.hpp b/src/nnfusion/frontend/onnx_import/op/scatter.hpp
new file mode 100644
index 000000000..9b3803560
--- /dev/null
+++ b/src/nnfusion/frontend/onnx_import/op/scatter.hpp
@@ -0,0 +1,44 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+//----------------------------------------------------------------------------------------------
+//  Copyright (c) Microsoft Corporation. All rights reserved.
+//  Licensed under the MIT License. See License.txt in the project root for license information.
+//----------------------------------------------------------------------------------------------
+
+#pragma once
+
+#include "core/node.hpp"
+
+namespace nnfusion
+{
+    namespace frontend
+    {
+        namespace onnx_import
+        {
+            namespace set_11
+            {
+                NamedNodeVector
+                    TranslateScatterNDOp(const onnx::NodeProto& node_proto,
+                                         const NodeMap& all_ng_nodes,
+                                         std::shared_ptr<nnfusion::graph::Graph> m_graph);
+            } // namespace set_1
+
+        } //namespace onnx_import
+
+    } // namespace frontend
+
+} // namespace nnfusion
diff --git a/src/nnfusion/frontend/onnx_import/ops_bridge.cpp b/src/nnfusion/frontend/onnx_import/ops_bridge.cpp
index 05d0abc19..1f600b919 100644
--- a/src/nnfusion/frontend/onnx_import/ops_bridge.cpp
+++ b/src/nnfusion/frontend/onnx_import/ops_bridge.cpp
@@ -61,6 +61,7 @@
 #include "op/reduce.hpp"
 #include "op/reshape.hpp"
 #include "op/resize.hpp"
+#include "op/scatter.hpp"
 #include "op/shape.hpp"
 #include "op/skip_layer_norm.hpp"
 #include "op/slice.hpp"
@@ -235,6 +236,7 @@ namespace nnfusion
                 REGISTER_OPERATOR("Relu", 1, TranslateUnaryOp<op::Relu>);
                 REGISTER_OPERATOR("Reshape", 1, TranslateReshapeOp);
                 REGISTER_OPERATOR("ReshapeGrad", 1, TranslateReshapeGradOp);
+                REGISTER_OPERATOR("ScatterND", 11, TranslateScatterNDOp);
                 //REGISTER_OPERATOR("Selu", 1, selu);
                 REGISTER_OPERATOR("Shape", 1, TranslateShapeOp);
                 REGISTER_OPERATOR("Sigmoid", 1, TranslateUnaryOp<op::Sigmoid>);

From ffae3f7ac3b28ec3a2871c8bb2daa01703da983a Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@gmail.com>
Date: Tue, 20 Jul 2021 08:41:26 +0000
Subject: [PATCH 10/17] Comment m_expression construction of generic_op to
 bypass translate function problem

---
 .../core/operators/generic_op/generic_op.hpp   | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/nnfusion/core/operators/generic_op/generic_op.hpp b/src/nnfusion/core/operators/generic_op/generic_op.hpp
index 0ec681258..39dd2ee16 100644
--- a/src/nnfusion/core/operators/generic_op/generic_op.hpp
+++ b/src/nnfusion/core/operators/generic_op/generic_op.hpp
@@ -372,15 +372,15 @@ namespace nnfusion
                     }
                 }
 
-                if (localOpConfig.f_translate_v2 != nullptr && !m_expression.size())
-                {
-                    m_expression = localOpConfig.f_translate_v2(gnode);
-                }
-
-                if (localOpConfig.f_translate != nullptr && !m_expression.size())
-                {
-                    m_expression = localOpConfig.f_translate(gnode);
-                }
+                // if (localOpConfig.f_translate_v2 != nullptr && !m_expression.size())
+                // {
+                //     m_expression = localOpConfig.f_translate_v2(gnode);
+                // }
+
+                // if (localOpConfig.f_translate != nullptr && !m_expression.size())
+                // {
+                //     m_expression = localOpConfig.f_translate(gnode);
+                // }
             }
 
             virtual void infer_shared_memory(std::shared_ptr<graph::GNode> gnode) override

From a77ee7bbd2f4cc292467efa89fd0668e493fc6ed Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@gmail.com>
Date: Thu, 18 Nov 2021 14:34:41 +0900
Subject: [PATCH 11/17] disable ORT optimizations

---
 src/nnfusion/frontend/onnx_import/onnx.cpp | 72 +++++++++++-----------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/nnfusion/frontend/onnx_import/onnx.cpp b/src/nnfusion/frontend/onnx_import/onnx.cpp
index cbe4fb6e5..838294035 100644
--- a/src/nnfusion/frontend/onnx_import/onnx.cpp
+++ b/src/nnfusion/frontend/onnx_import/onnx.cpp
@@ -51,42 +51,42 @@ namespace nnfusion
             load_onnx_model(const std::string& path,
                             const std::unordered_map<std::string, size_t>& dim_params)
         {
-            NNFUSION_LOG(INFO) << "Optimizing ONNX Graph with External Tool "
-                                  "(models/pytorch2onnx/ort_run_frozen.py)";
-            string optimized_filename = string(tmpnam(nullptr));
-            string m_path = path;
-            string script_path =
-                nnfusion::codegen::get_file_from_templates("onnx/ort_run_frozen.py");
-            string cmd = "python3 " + script_path +
-                         " --graph_optimization_level ORT_ENABLE_BASIC "
-                         "--warmup 1 --iters 0 --provider CPUExecutionProvider --file " +
-                         path + " --optimized_model_filepath " + optimized_filename;
-            if (dim_params.size() > 0)
-            {
-                string dim_params_str = " --symbolic_dims \'{";
-                for (auto& it : dim_params)
-                {
-                    if (dim_params_str != " --symbolic_dims \'{")
-                    {
-                        dim_params_str += ", ";
-                    }
-                    dim_params_str += "\"" + it.first + "\": " + to_string(it.second);
-                }
-                dim_params_str += "}\'";
-                cmd += dim_params_str;
-            }
-            int sys_ret = system(cmd.c_str());
-            std::ifstream opt_fin(optimized_filename.c_str());
-            if (sys_ret == 0 && opt_fin.is_open())
-            {
-                m_path = optimized_filename;
-            }
-            else
-            {
-                NNFUSION_LOG(NNFUSION_WARNING)
-                    << "Failed to optimize ONNX Graph with external tool, please "
-                       "check error messages reported by the tool, fallback";
-            }
+            // NNFUSION_LOG(INFO) << "Optimizing ONNX Graph with External Tool "
+            //                       "(models/pytorch2onnx/ort_run_frozen.py)";
+            // string optimized_filename = string(tmpnam(nullptr));
+            // string m_path = path;
+            // string script_path =
+            //     nnfusion::codegen::get_file_from_templates("onnx/ort_run_frozen.py");
+            // string cmd = "python3 " + script_path +
+            //              " --graph_optimization_level ORT_ENABLE_BASIC "
+            //              "--warmup 1 --iters 0 --provider CPUExecutionProvider --file " +
+            //              path + " --optimized_model_filepath " + optimized_filename;
+            // if (dim_params.size() > 0)
+            // {
+            //     string dim_params_str = " --symbolic_dims \'{";
+            //     for (auto& it : dim_params)
+            //     {
+            //         if (dim_params_str != " --symbolic_dims \'{")
+            //         {
+            //             dim_params_str += ", ";
+            //         }
+            //         dim_params_str += "\"" + it.first + "\": " + to_string(it.second);
+            //     }
+            //     dim_params_str += "}\'";
+            //     cmd += dim_params_str;
+            // }
+            // int sys_ret = system(cmd.c_str());
+            // std::ifstream opt_fin(optimized_filename.c_str());
+            // if (sys_ret == 0 && opt_fin.is_open())
+            // {
+            //     m_path = optimized_filename;
+            // }
+            // else
+            // {
+            //     NNFUSION_LOG(NNFUSION_WARNING)
+            //         << "Failed to optimize ONNX Graph with external tool, please "
+            //            "check error messages reported by the tool, fallback";
+            // }
 
             std::ifstream ifs{m_path, std::ios::in | std::ios::binary};
             NNFUSION_CHECK(ifs.is_open()) << "failure opening file:" + path;

From d42be30fdf0833ad4cc0644f720a3468ae1071d9 Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@gmail.com>
Date: Thu, 18 Nov 2021 14:36:40 +0900
Subject: [PATCH 12/17] fix bug for disabling ORT optimizations

---
 src/nnfusion/frontend/onnx_import/onnx.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nnfusion/frontend/onnx_import/onnx.cpp b/src/nnfusion/frontend/onnx_import/onnx.cpp
index 838294035..a9894f926 100644
--- a/src/nnfusion/frontend/onnx_import/onnx.cpp
+++ b/src/nnfusion/frontend/onnx_import/onnx.cpp
@@ -54,7 +54,7 @@ namespace nnfusion
             // NNFUSION_LOG(INFO) << "Optimizing ONNX Graph with External Tool "
             //                       "(models/pytorch2onnx/ort_run_frozen.py)";
             // string optimized_filename = string(tmpnam(nullptr));
-            // string m_path = path;
+            string m_path = path;
             // string script_path =
             //     nnfusion::codegen::get_file_from_templates("onnx/ort_run_frozen.py");
             // string cmd = "python3 " + script_path +

From 0a29cc1a37fb36c80fd8f83b1611a78f7e541054 Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@gmail.com>
Date: Thu, 18 Nov 2021 14:40:40 +0900
Subject: [PATCH 13/17] fix bug for disabling ORT optimizations

---
 src/nnfusion/frontend/onnx_import/onnx.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/nnfusion/frontend/onnx_import/onnx.cpp b/src/nnfusion/frontend/onnx_import/onnx.cpp
index a9894f926..a154cffd3 100644
--- a/src/nnfusion/frontend/onnx_import/onnx.cpp
+++ b/src/nnfusion/frontend/onnx_import/onnx.cpp
@@ -99,10 +99,10 @@ namespace nnfusion
 
             auto graph = load_onnx_model(ifs, model_dir, dim_params);
 
-            if (opt_fin.is_open())
-            {
-                remove(optimized_filename.c_str());
-            }
+            // if (opt_fin.is_open())
+            // {
+            //     remove(optimized_filename.c_str());
+            // }
 
             return graph;
         }

From b0d4c562c33b241218110ea534249018f9a066d0 Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@gmail.com>
Date: Sun, 28 Nov 2021 16:48:31 +0900
Subject: [PATCH 14/17] Support -ftuning_list in kernel tuning pass

---
 .../engine/pass/graph/kernel_tuning.cpp       | 30 ++++++++++++++++++-
 .../engine/pass/graph/kernel_tuning.hpp       |  2 ++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/src/nnfusion/engine/pass/graph/kernel_tuning.cpp b/src/nnfusion/engine/pass/graph/kernel_tuning.cpp
index b9a699c5a..6b689e005 100644
--- a/src/nnfusion/engine/pass/graph/kernel_tuning.cpp
+++ b/src/nnfusion/engine/pass/graph/kernel_tuning.cpp
@@ -20,6 +20,7 @@ DEFINE_int64(fkernel_tuning_steps, 0, "Enable automatic kernel tuning for maximu
 DEFINE_string(ftuning_blocklist,
               "",
               "List of op types that skip kernel tuning pass, e.g., \"Softmax,Add\"");
+DEFINE_string(ftuning_list, "", "List of op types for kernel tuning pass, e.g., \"Softmax,Add\"");
 DEFINE_string(fantares_perf_file, "./antares_perf.csv", "File to save Antares kernel performance.");
 DECLARE_bool(fantares_mode);
 DECLARE_string(fantares_codegen_server);
@@ -130,6 +131,7 @@ void dump_perf(std::string filename,
 
 std::pair<std::vector<std::shared_ptr<GNode>>, std::vector<std::shared_ptr<TuningStatus>>>
     get_tuning_candidates(std::shared_ptr<nnfusion::graph::Graph>& graph,
+                          const std::unordered_set<std::string> tuning_list,
                           const std::unordered_set<std::string> block_list,
                           std::unordered_map<std::string, size_t>& ir2cnt)
 {
@@ -147,6 +149,12 @@ std::pair<std::vector<std::shared_ptr<GNode>>, std::vector<std::shared_ptr<Tunin
         auto n_device_type = (*gnode)["DeviceType"].as<NNFusion_DeviceType>();
         NNFUSION_CHECK(n_device_type != UNKNOWN);
 
+        // filter ops not in TuningList
+        if (tuning_list.find(gnode->get_op_type()) == tuning_list.end())
+        {
+            continue;
+        }
+
         // filter ops in BlockList
         if (block_list.find(gnode->get_op_type()) != block_list.end())
         {
@@ -255,11 +263,31 @@ bool KernelTuning::parse_block_list()
     NNFUSION_LOG(INFO) << "Kernel Tuning BlockList: " << join(BlockList, ", ");
 }
 
+bool KernelTuning::parse_tuning_list()
+{
+    auto tuninglist_str = FLAGS_ftuning_list;
+    stringstream ss(tuninglist_str);
+    while (ss.good())
+    {
+        string substr;
+        getline(ss, substr, ',');
+        TuningList.insert(substr);
+    }
+    NNFUSION_LOG(INFO) << "Kernel Tuning List: " << join(TuningList, ", ");
+}
+
 bool KernelTuning::run_on_graph(std::shared_ptr<nnfusion::graph::Graph>& graph)
 {
     if (FLAGS_fantares_mode)
     {
+        parse_tuning_list();
         parse_block_list();
+        for (auto item : TuningList)
+        {
+            NNFUSION_CHECK(BlockList.find(item) == BlockList.end())
+                << "Kernel Tuning Pass: There are same operators in TuningList and "
+                   "TuningBlockList.";
+        }
         // register antares kernels anyway here in case kernel selection pass will use them
         register_antares_kernel();
     }
@@ -274,7 +302,7 @@ bool KernelTuning::run_on_graph(std::shared_ptr<nnfusion::graph::Graph>& graph)
     std::vector<std::shared_ptr<TuningStatus>> tuning_kernels;
     std::unordered_map<std::string, size_t> ir2cnt;
     std::vector<std::shared_ptr<GNode>> nodes;
-    std::tie(nodes, tuned_kernels) = get_tuning_candidates(graph, BlockList, ir2cnt);
+    std::tie(nodes, tuned_kernels) = get_tuning_candidates(graph, TuningList, BlockList, ir2cnt);
     for (auto gnode : nodes)
     {
         if (!(*gnode)["DeviceType"].is_valid())
diff --git a/src/nnfusion/engine/pass/graph/kernel_tuning.hpp b/src/nnfusion/engine/pass/graph/kernel_tuning.hpp
index 1d49761e2..67cf3a592 100644
--- a/src/nnfusion/engine/pass/graph/kernel_tuning.hpp
+++ b/src/nnfusion/engine/pass/graph/kernel_tuning.hpp
@@ -22,11 +22,13 @@ namespace nnfusion
 
             private:
                 bool parse_block_list();
+                bool parse_tuning_list();
                 bool insert_to_kernel_cache(
                     const std::vector<std::shared_ptr<nnfusion::graph::GNode>>& nodes);
 
             private:
                 std::unordered_set<std::string> BlockList;
+                std::unordered_set<std::string> TuningList;
             };
         }
     }

From 324f2cf31cea13af08c1a4e52a731ddf7b41a2bf Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@gmail.com>
Date: Thu, 2 Dec 2021 16:51:15 +0900
Subject: [PATCH 15/17] add mod operator

---
 src/nnfusion/common/common.hpp                |  1 +
 .../core/kernels/cuda_gpu/cuda_kernelops.hpp  |  8 ++++
 .../kernels/cuda_gpu/kernels/elementwise.cpp  |  1 +
 src/nnfusion/core/operators/CMakeLists.txt    |  1 +
 .../generic_op_define/Elementwise.cpp         |  2 +
 src/nnfusion/core/operators/op_define/mod.cpp | 26 +++++++++++++
 src/nnfusion/core/operators/op_define/mod.hpp | 38 +++++++++++++++++++
 .../frontend/onnx_import/ops_bridge.cpp       |  1 +
 8 files changed, 78 insertions(+)
 create mode 100644 src/nnfusion/core/operators/op_define/mod.cpp
 create mode 100644 src/nnfusion/core/operators/op_define/mod.hpp

diff --git a/src/nnfusion/common/common.hpp b/src/nnfusion/common/common.hpp
index 8c8ec2b3d..2d9d3dfe3 100644
--- a/src/nnfusion/common/common.hpp
+++ b/src/nnfusion/common/common.hpp
@@ -67,6 +67,7 @@
 #include "nnfusion/core/operators/op_define/maximum.hpp"
 #include "nnfusion/core/operators/op_define/min.hpp"
 #include "nnfusion/core/operators/op_define/minimum.hpp"
+#include "nnfusion/core/operators/op_define/mod.hpp"
 #include "nnfusion/core/operators/op_define/multiply.hpp"
 #include "nnfusion/core/operators/op_define/negative.hpp"
 #include "nnfusion/core/operators/op_define/not.hpp"
diff --git a/src/nnfusion/core/kernels/cuda_gpu/cuda_kernelops.hpp b/src/nnfusion/core/kernels/cuda_gpu/cuda_kernelops.hpp
index df40b2c60..6ff83975a 100644
--- a/src/nnfusion/core/kernels/cuda_gpu/cuda_kernelops.hpp
+++ b/src/nnfusion/core/kernels/cuda_gpu/cuda_kernelops.hpp
@@ -53,6 +53,7 @@ namespace nnfusion
         class GreaterEq;
         class Less;
         class LessEq;
+        class Mod;
         class Not;
         class Relu;
         class ReluBackprop;
@@ -275,6 +276,13 @@ namespace nnfusion
                 static constexpr const char* math_kernel = "x1 != 0 ? fdividef(x0, x1) : 0";
             };
 
+            template <>
+            struct CudaOpMap<nnfusion::op::Mod>
+            {
+                static constexpr const char* op = "fmod";
+                static constexpr const char* math_kernel = nullptr;
+            };
+
             template <>
             struct CudaOpMap<nnfusion::op::Sign>
             {
diff --git a/src/nnfusion/core/kernels/cuda_gpu/kernels/elementwise.cpp b/src/nnfusion/core/kernels/cuda_gpu/kernels/elementwise.cpp
index 1b0e7d160..09df62403 100644
--- a/src/nnfusion/core/kernels/cuda_gpu/kernels/elementwise.cpp
+++ b/src/nnfusion/core/kernels/cuda_gpu/kernels/elementwise.cpp
@@ -38,6 +38,7 @@ REGISTER_EW_KERNEL(PowerBackwardExponent)
 REGISTER_EW_KERNEL(Subtract)
 REGISTER_EW_KERNEL(Divide)
 REGISTER_EW_KERNEL(DivNoNan)
+REGISTER_EW_KERNEL(Mod)
 REGISTER_EW_KERNEL(Sign)
 REGISTER_EW_KERNEL(Convert)
 REGISTER_EW_KERNEL(Equal)
diff --git a/src/nnfusion/core/operators/CMakeLists.txt b/src/nnfusion/core/operators/CMakeLists.txt
index 334b32d2e..347692ec6 100644
--- a/src/nnfusion/core/operators/CMakeLists.txt
+++ b/src/nnfusion/core/operators/CMakeLists.txt
@@ -53,6 +53,7 @@ set(SRC
     op_define/maximum.cpp
     op_define/min.cpp
     op_define/minimum.cpp
+    op_define/mod.cpp
     op_define/multiply.cpp
     op_define/negative.cpp
     op_define/not_equal.cpp
diff --git a/src/nnfusion/core/operators/generic_op/generic_op_define/Elementwise.cpp b/src/nnfusion/core/operators/generic_op/generic_op_define/Elementwise.cpp
index bc53d9e27..f118fcfdd 100644
--- a/src/nnfusion/core/operators/generic_op/generic_op_define/Elementwise.cpp
+++ b/src/nnfusion/core/operators/generic_op/generic_op_define/Elementwise.cpp
@@ -44,6 +44,7 @@ static const std::unordered_map<std::string, element_op> ElementOpMap = {
      element_op(
          "divnonan",
          "(x0 / x1).when([x1 != const(0).cast(x1.dtype())], const(0).cast(input1[].dtype()))")},
+    {"Mod", element_op("fmod", "")},
     {"Square", element_op("square", "x0 * x0")},
     {"Negative", element_op("negative", "-x0")},
     {"Select", element_op("select", "x2.when([x0 == 0], x1)")},
@@ -174,6 +175,7 @@ REGISTER_ELEM_OP(Subtract)
 REGISTER_ELEM_OP(Multiply)
 REGISTER_ELEM_OP(Divide)
 REGISTER_ELEM_OP(DivNoNan)
+REGISTER_ELEM_OP(Mod)
 REGISTER_ELEM_OP(Square)
 REGISTER_ELEM_OP(Negative)
 REGISTER_ELEM_OP(Select)
diff --git a/src/nnfusion/core/operators/op_define/mod.cpp b/src/nnfusion/core/operators/op_define/mod.cpp
new file mode 100644
index 000000000..9d7b96d69
--- /dev/null
+++ b/src/nnfusion/core/operators/op_define/mod.cpp
@@ -0,0 +1,26 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+// Microsoft (c) 2019, NNFusion Team
+
+#include "mod.hpp"
+
+using namespace nnfusion::op;
+
+Mod::Mod()
+    : ElementwiseArithmetic("Mod")
+{
+}
\ No newline at end of file
diff --git a/src/nnfusion/core/operators/op_define/mod.hpp b/src/nnfusion/core/operators/op_define/mod.hpp
new file mode 100644
index 000000000..d2d7723f3
--- /dev/null
+++ b/src/nnfusion/core/operators/op_define/mod.hpp
@@ -0,0 +1,38 @@
+//*****************************************************************************
+// Copyright 2017-2020 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+// Microsoft (c) 2019, NNFusion Team
+
+#pragma once
+
+#include "nnfusion/core/operators/util/elementwise_arithmetic.hpp"
+
+namespace nnfusion
+{
+    namespace op
+    {
+        /// \brief Elementwise mod operation.
+        class Mod : public ElementwiseArithmetic
+        {
+        public:
+            /// \brief Constructs a mod operation.
+            Mod();
+
+        protected:
+            virtual bool is_commutative() override { return false; }
+        };
+    }; // namespace op
+} // namespace nnfusion
diff --git a/src/nnfusion/frontend/onnx_import/ops_bridge.cpp b/src/nnfusion/frontend/onnx_import/ops_bridge.cpp
index 4f4cc56cc..50b4f2282 100644
--- a/src/nnfusion/frontend/onnx_import/ops_bridge.cpp
+++ b/src/nnfusion/frontend/onnx_import/ops_bridge.cpp
@@ -213,6 +213,7 @@ namespace nnfusion
                 REGISTER_OPERATOR("MemcpyFromHost", 1, TranslateMemcpyFromHostOp);
                 REGISTER_OPERATOR("MemcpyToHost", 1, TranslateMemcpyToHostOp);
                 REGISTER_OPERATOR("Min", 1, TranslateLegacyBinaryOp<op::Minimum>);
+                REGISTER_OPERATOR("Mod", 1, TranslateBinaryOp<op::Mod>);
                 REGISTER_OPERATOR("Mul", 1, TranslateLegacyBinaryOp<op::Multiply>);
                 REGISTER_OPERATOR("Mul", 7, TranslateBinaryOp<op::Multiply>);
                 REGISTER_OPERATOR("Neg", 1, TranslateUnaryOp<op::Negative>);

From bcf5b7da2ffa6acdd60e85ea935bcf79bd32d8b5 Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@gmail.com>
Date: Wed, 8 Dec 2021 14:58:53 +0900
Subject: [PATCH 16/17] ad-hoc support tuning_list in AntaresCudaKernelEmitter

---
 .../core/kernels/cuda_gpu/cuda_emitter.hpp    | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/nnfusion/core/kernels/cuda_gpu/cuda_emitter.hpp b/src/nnfusion/core/kernels/cuda_gpu/cuda_emitter.hpp
index fada9792d..d38e234f2 100644
--- a/src/nnfusion/core/kernels/cuda_gpu/cuda_emitter.hpp
+++ b/src/nnfusion/core/kernels/cuda_gpu/cuda_emitter.hpp
@@ -11,6 +11,7 @@
 #include "nnfusion/engine/async_manager.hpp"
 
 DECLARE_string(fantares_codegen_server);
+DECLARE_string(ftuning_list);
 
 namespace nnfusion
 {
@@ -206,9 +207,14 @@ namespace nnfusion
                     , m_antares_ke_imp(new AntaresKEImp)
                 {
                     GENERIC_OP_LOGGING();
+                    parse_tuning_list();
                     if (!FLAGS_fantares_codegen_server.empty())
                     {
                         // NNFUSION_LOG(INFO) << "Translate for " << ctx->gnode->get_op_type();
+                        if (TuningList.find(ctx->gnode->get_op_type()) == TuningList.end())
+                        {
+                            return;
+                        }
 
                         ir = nnfusion::op::get_translation(ctx->gnode);
 #if 0
@@ -287,6 +293,7 @@ namespace nnfusion
                                                    << ctx->gnode->get_op_type();
                                 log_cache.insert(ctx->gnode->get_op_type());
                             }
+                            return;
                         }
 
                         kernel_info =
@@ -316,6 +323,19 @@ namespace nnfusion
                 std::string ir;
                 bool is_memcpy = false;
 
+                bool parse_tuning_list()
+                {
+                    auto tuninglist_str = FLAGS_ftuning_list;
+                    stringstream ss(tuninglist_str);
+                    while (ss.good())
+                    {
+                        string substr;
+                        getline(ss, substr, ',');
+                        TuningList.insert(substr);
+                    }
+                    NNFUSION_LOG(INFO) << "Kernel Tuning List: " << join(TuningList, ", ");
+                }
+
             protected:
                 // map tensor names and allocate tmp tensor
                 void process_antares_kernel_info();
@@ -323,6 +343,7 @@ namespace nnfusion
                 std::vector<AntaresKernelInfo::Pointer> kernel_info;
                 std::unordered_map<std::string, std::string>
                     tensor_name_map; // antares tensor name : kernel tensor name
+                std::unordered_set<std::string> TuningList;
             };
 
             class CacheCudaEmitter : public CudaEmitter

From 32ebc44fb1ab731936508845bbdbe5f23866419b Mon Sep 17 00:00:00 2001
From: Lingxiao Ma <xysmlx@users.noreply.github.com>
Date: Wed, 8 Dec 2021 18:52:27 +0800
Subject: [PATCH 17/17] fix bug in tuning_list

---
 src/nnfusion/engine/pass/graph/kernel_tuning.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/nnfusion/engine/pass/graph/kernel_tuning.cpp b/src/nnfusion/engine/pass/graph/kernel_tuning.cpp
index 6b689e005..0ca99ef76 100644
--- a/src/nnfusion/engine/pass/graph/kernel_tuning.cpp
+++ b/src/nnfusion/engine/pass/graph/kernel_tuning.cpp
@@ -150,7 +150,7 @@ std::pair<std::vector<std::shared_ptr<GNode>>, std::vector<std::shared_ptr<Tunin
         NNFUSION_CHECK(n_device_type != UNKNOWN);
 
         // filter ops not in TuningList
-        if (tuning_list.find(gnode->get_op_type()) == tuning_list.end())
+        if (tuning_list.size() > 0 && tuning_list.find(gnode->get_op_type()) == tuning_list.end())
         {
             continue;
         }