From 9f78a572bedb499ea8729021d95e2b9109bf60b2 Mon Sep 17 00:00:00 2001 From: eedalong Date: Mon, 22 Jan 2024 13:28:38 +0800 Subject: [PATCH 1/3] add one-hop buffer reuse propogation --- .../transforms/disc_input_output_alias.cc | 48 +++++++++++++++++-- 1 file changed, 44 insertions(+), 4 deletions(-) mode change 100755 => 100644 tao_compiler/mlir/disc/transforms/disc_input_output_alias.cc diff --git a/tao_compiler/mlir/disc/transforms/disc_input_output_alias.cc b/tao_compiler/mlir/disc/transforms/disc_input_output_alias.cc old mode 100755 new mode 100644 index 39d8dcadea2..36e3b66e31a --- a/tao_compiler/mlir/disc/transforms/disc_input_output_alias.cc +++ b/tao_compiler/mlir/disc/transforms/disc_input_output_alias.cc @@ -141,17 +141,57 @@ struct DiscInputOutputAliasPass if (outputs[outputs_index[i]] == params[params_index[i]]) { continue; } - // DISC now only support one-hop buffer sharing. + // Inplace buffer reuse. + bool inplace_reuse = false; auto defineOp = outputs[outputs_index[i]].getDefiningOp(); for (const auto& value : defineOp->getOperands()) { if (params[params_index[i]] == value) { builder.setInsertionPointAfterValue(outputs[outputs_index[i]]); - builder.create(main_func.getLoc(), - outputs[outputs_index[i]], - params[params_index[i]]); + builder.create( + outputs[outputs_index[i]].getLoc(), outputs[outputs_index[i]], + params[params_index[i]]); + inplace_reuse = true; break; } } + + // Try one-hop buffer sharing propogation + if (!inplace_reuse) { + OneHopBufferReusePropogation(params[params_index[i]], + outputs[outputs_index[i]], builder); + } + } + } + + private: + /* + A = op(src) + A = op(src) => args_mutation(A, src) + B = op(A) => B = op(A) + args_mutation(B, A) + */ + void OneHopBufferReusePropogation(Value src, Value dst, OpBuilder& builder) { + auto dst_op = dst.getDefiningOp(); + auto user_begin = src.user_begin(); + auto user_end = src.user_end(); + auto users_cnt = std::distance(user_begin, user_end); + + if (users_cnt > 1 || user_begin->getNumResults() > 1) { + return; + } + + auto user_result = user_begin->getResult(0); + for (const auto& operand : dst_op->getOperands()) { + if (operand == user_result) { + builder.setInsertionPointAfterValue(user_result); + builder.create(user_result.getLoc(), + user_result, src); + + builder.setInsertionPointAfterValue(dst); + builder.create(dst.getLoc(), dst, + user_result); + break; + } } } }; From 28960f0a1cdbf3253226471977c5e82fc665c725 Mon Sep 17 00:00:00 2001 From: eedalong Date: Mon, 22 Jan 2024 14:13:42 +0800 Subject: [PATCH 2/3] add ut --- .../tests/disc-input-output-alias-inplace.mlir | 11 +++++++++++ .../tests/disc-input-output-alias-propagation.mlir | 14 ++++++++++++++ 2 files changed, 25 insertions(+) create mode 100755 tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-inplace.mlir create mode 100644 tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-propagation.mlir diff --git a/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-inplace.mlir b/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-inplace.mlir new file mode 100755 index 00000000000..e0248e27252 --- /dev/null +++ b/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-inplace.mlir @@ -0,0 +1,11 @@ +// RUN: disc-opt -disc-input-output-alias \ +// RUN: %s -o - | FileCheck %s + +// CHECK-LABEL: main +func.func @main(%arg0: tensor<200x200xf32>, %arg1: tensor<200x200xf32>) -> (tensor<200x200xf32>, tensor<200x200xf32>) attributes {tf.entry_function = {input_output_alias_outputs = "0,1", input_output_alias_params = "0,1", input_placements = "gpu,gpu", output_placements = "gpu,gpu"}} { + // CHECK: %0 = mhlo.add %arg1, %arg0 : tensor<200x200xf32> + %0 = mhlo.add %arg1, %arg0 : tensor<200x200xf32> + // CHECK: "mhlo_disc.args_mutation"(%0, %arg1) : (tensor<200x200xf32>, tensor<200x200xf32>) -> () + // CHECK: return %arg0, %0 : tensor<200x200xf32>, tensor<200x200xf32> + return %arg0, %0 : tensor<200x200xf32>, tensor<200x200xf32> +} diff --git a/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-propagation.mlir b/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-propagation.mlir new file mode 100644 index 00000000000..914a663285a --- /dev/null +++ b/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-propagation.mlir @@ -0,0 +1,14 @@ +// RUN: disc-opt -disc-input-output-alias \ +// RUN: %s -o - | FileCheck %s + +// CHECK-LABEL: main +func.func @main(%arg0: tensor<4000x4000xf32>, %arg1: tensor<4000x4000xf32>) -> (tensor<4000x4000xf32>, tensor<4000x4000xf32>) attributes {tf.entry_function = {input_output_alias_outputs = "0,1", input_output_alias_params = "0,1", input_placements = "gpu,gpu", output_placements = "gpu,gpu"}} { + //CHECK: %0 = mhlo.add %arg1, %arg0 : tensor<4000x4000xf32> + %0 = mhlo.add %arg1, %arg0 : tensor<4000x4000xf32> + //CHECK: "mhlo_disc.args_mutation"(%0, %arg1) : (tensor<4000x4000xf32>, tensor<4000x4000xf32>) -> () + //CHECK: %1 = mhlo.add %0, %arg0 : tensor<4000x4000xf32> + %1 = mhlo.add %0, %arg0 : tensor<4000x4000xf32> + //CHECK: "mhlo_disc.args_mutation"(%1, %0) : (tensor<4000x4000xf32>, tensor<4000x4000xf32>) -> () + //CHECK: return %arg0, %1 : tensor<4000x4000xf32>, tensor<4000x4000xf32> + return %arg0, %1 : tensor<4000x4000xf32>, tensor<4000x4000xf32> + } \ No newline at end of file From 99cc4f4a61df53aaac16f2a8b1aca7ec5e687483 Mon Sep 17 00:00:00 2001 From: eedalong Date: Thu, 22 Feb 2024 10:21:09 +0800 Subject: [PATCH 3/3] always try to lower standalone transposeOp to custom call --- .../transforms/disc_input_output_alias.cc | 48 ++----------------- .../transforms/disc_lower_to_library_call.cc | 5 +- .../disc-input-output-alias-inplace.mlir | 11 ----- .../disc-input-output-alias-propagation.mlir | 14 ------ 4 files changed, 6 insertions(+), 72 deletions(-) mode change 100644 => 100755 tao_compiler/mlir/disc/transforms/disc_input_output_alias.cc mode change 100644 => 100755 tao_compiler/mlir/disc/transforms/disc_lower_to_library_call.cc delete mode 100755 tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-inplace.mlir delete mode 100644 tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-propagation.mlir diff --git a/tao_compiler/mlir/disc/transforms/disc_input_output_alias.cc b/tao_compiler/mlir/disc/transforms/disc_input_output_alias.cc old mode 100644 new mode 100755 index 36e3b66e31a..39d8dcadea2 --- a/tao_compiler/mlir/disc/transforms/disc_input_output_alias.cc +++ b/tao_compiler/mlir/disc/transforms/disc_input_output_alias.cc @@ -141,57 +141,17 @@ struct DiscInputOutputAliasPass if (outputs[outputs_index[i]] == params[params_index[i]]) { continue; } - // Inplace buffer reuse. - bool inplace_reuse = false; + // DISC now only support one-hop buffer sharing. auto defineOp = outputs[outputs_index[i]].getDefiningOp(); for (const auto& value : defineOp->getOperands()) { if (params[params_index[i]] == value) { builder.setInsertionPointAfterValue(outputs[outputs_index[i]]); - builder.create( - outputs[outputs_index[i]].getLoc(), outputs[outputs_index[i]], - params[params_index[i]]); - inplace_reuse = true; + builder.create(main_func.getLoc(), + outputs[outputs_index[i]], + params[params_index[i]]); break; } } - - // Try one-hop buffer sharing propogation - if (!inplace_reuse) { - OneHopBufferReusePropogation(params[params_index[i]], - outputs[outputs_index[i]], builder); - } - } - } - - private: - /* - A = op(src) - A = op(src) => args_mutation(A, src) - B = op(A) => B = op(A) - args_mutation(B, A) - */ - void OneHopBufferReusePropogation(Value src, Value dst, OpBuilder& builder) { - auto dst_op = dst.getDefiningOp(); - auto user_begin = src.user_begin(); - auto user_end = src.user_end(); - auto users_cnt = std::distance(user_begin, user_end); - - if (users_cnt > 1 || user_begin->getNumResults() > 1) { - return; - } - - auto user_result = user_begin->getResult(0); - for (const auto& operand : dst_op->getOperands()) { - if (operand == user_result) { - builder.setInsertionPointAfterValue(user_result); - builder.create(user_result.getLoc(), - user_result, src); - - builder.setInsertionPointAfterValue(dst); - builder.create(dst.getLoc(), dst, - user_result); - break; - } } } }; diff --git a/tao_compiler/mlir/disc/transforms/disc_lower_to_library_call.cc b/tao_compiler/mlir/disc/transforms/disc_lower_to_library_call.cc old mode 100644 new mode 100755 index bd6b7296dd2..a3de94b8677 --- a/tao_compiler/mlir/disc/transforms/disc_lower_to_library_call.cc +++ b/tao_compiler/mlir/disc/transforms/disc_lower_to_library_call.cc @@ -494,7 +494,7 @@ struct TransposeConverter : public OpRewritePattern { if (rank != 2 && rank != 3) return failure(); // only rewriter custom library when switch 1 and 2 dimensions of // a 3d tensor, that means permute = [0, 2, 1] - if (rank == 3 && permutation[1] != 2 && permutation[2] != 1) + if (rank == 3 && (permutation[1] != 2 || permutation[2] != 1)) return failure(); bool on_gpu = placement_utils::isGpuMemRef(op->getOperand(0)); // TODO: support other device @@ -914,8 +914,7 @@ struct DiscLowerToLibraryCallPass SendOutputOpConvertor >(context); // clang-format on - if (enableTransposeLibraryCall()) - patterns.insert(context); + patterns.insert(context); // GPU copy related ops patterns.insert>(context, "h2d"); diff --git a/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-inplace.mlir b/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-inplace.mlir deleted file mode 100755 index e0248e27252..00000000000 --- a/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-inplace.mlir +++ /dev/null @@ -1,11 +0,0 @@ -// RUN: disc-opt -disc-input-output-alias \ -// RUN: %s -o - | FileCheck %s - -// CHECK-LABEL: main -func.func @main(%arg0: tensor<200x200xf32>, %arg1: tensor<200x200xf32>) -> (tensor<200x200xf32>, tensor<200x200xf32>) attributes {tf.entry_function = {input_output_alias_outputs = "0,1", input_output_alias_params = "0,1", input_placements = "gpu,gpu", output_placements = "gpu,gpu"}} { - // CHECK: %0 = mhlo.add %arg1, %arg0 : tensor<200x200xf32> - %0 = mhlo.add %arg1, %arg0 : tensor<200x200xf32> - // CHECK: "mhlo_disc.args_mutation"(%0, %arg1) : (tensor<200x200xf32>, tensor<200x200xf32>) -> () - // CHECK: return %arg0, %0 : tensor<200x200xf32>, tensor<200x200xf32> - return %arg0, %0 : tensor<200x200xf32>, tensor<200x200xf32> -} diff --git a/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-propagation.mlir b/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-propagation.mlir deleted file mode 100644 index 914a663285a..00000000000 --- a/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-propagation.mlir +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: disc-opt -disc-input-output-alias \ -// RUN: %s -o - | FileCheck %s - -// CHECK-LABEL: main -func.func @main(%arg0: tensor<4000x4000xf32>, %arg1: tensor<4000x4000xf32>) -> (tensor<4000x4000xf32>, tensor<4000x4000xf32>) attributes {tf.entry_function = {input_output_alias_outputs = "0,1", input_output_alias_params = "0,1", input_placements = "gpu,gpu", output_placements = "gpu,gpu"}} { - //CHECK: %0 = mhlo.add %arg1, %arg0 : tensor<4000x4000xf32> - %0 = mhlo.add %arg1, %arg0 : tensor<4000x4000xf32> - //CHECK: "mhlo_disc.args_mutation"(%0, %arg1) : (tensor<4000x4000xf32>, tensor<4000x4000xf32>) -> () - //CHECK: %1 = mhlo.add %0, %arg0 : tensor<4000x4000xf32> - %1 = mhlo.add %0, %arg0 : tensor<4000x4000xf32> - //CHECK: "mhlo_disc.args_mutation"(%1, %0) : (tensor<4000x4000xf32>, tensor<4000x4000xf32>) -> () - //CHECK: return %arg0, %1 : tensor<4000x4000xf32>, tensor<4000x4000xf32> - return %arg0, %1 : tensor<4000x4000xf32>, tensor<4000x4000xf32> - } \ No newline at end of file