From 99cc4f4a61df53aaac16f2a8b1aca7ec5e687483 Mon Sep 17 00:00:00 2001 From: eedalong Date: Thu, 22 Feb 2024 10:21:09 +0800 Subject: [PATCH] always try to lower standalone transposeOp to custom call --- .../transforms/disc_input_output_alias.cc | 48 ++----------------- .../transforms/disc_lower_to_library_call.cc | 5 +- .../disc-input-output-alias-inplace.mlir | 11 ----- .../disc-input-output-alias-propagation.mlir | 14 ------ 4 files changed, 6 insertions(+), 72 deletions(-) mode change 100644 => 100755 tao_compiler/mlir/disc/transforms/disc_input_output_alias.cc mode change 100644 => 100755 tao_compiler/mlir/disc/transforms/disc_lower_to_library_call.cc delete mode 100755 tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-inplace.mlir delete mode 100644 tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-propagation.mlir diff --git a/tao_compiler/mlir/disc/transforms/disc_input_output_alias.cc b/tao_compiler/mlir/disc/transforms/disc_input_output_alias.cc old mode 100644 new mode 100755 index 36e3b66e31a..39d8dcadea2 --- a/tao_compiler/mlir/disc/transforms/disc_input_output_alias.cc +++ b/tao_compiler/mlir/disc/transforms/disc_input_output_alias.cc @@ -141,57 +141,17 @@ struct DiscInputOutputAliasPass if (outputs[outputs_index[i]] == params[params_index[i]]) { continue; } - // Inplace buffer reuse. - bool inplace_reuse = false; + // DISC now only support one-hop buffer sharing. auto defineOp = outputs[outputs_index[i]].getDefiningOp(); for (const auto& value : defineOp->getOperands()) { if (params[params_index[i]] == value) { builder.setInsertionPointAfterValue(outputs[outputs_index[i]]); - builder.create( - outputs[outputs_index[i]].getLoc(), outputs[outputs_index[i]], - params[params_index[i]]); - inplace_reuse = true; + builder.create(main_func.getLoc(), + outputs[outputs_index[i]], + params[params_index[i]]); break; } } - - // Try one-hop buffer sharing propogation - if (!inplace_reuse) { - OneHopBufferReusePropogation(params[params_index[i]], - outputs[outputs_index[i]], builder); - } - } - } - - private: - /* - A = op(src) - A = op(src) => args_mutation(A, src) - B = op(A) => B = op(A) - args_mutation(B, A) - */ - void OneHopBufferReusePropogation(Value src, Value dst, OpBuilder& builder) { - auto dst_op = dst.getDefiningOp(); - auto user_begin = src.user_begin(); - auto user_end = src.user_end(); - auto users_cnt = std::distance(user_begin, user_end); - - if (users_cnt > 1 || user_begin->getNumResults() > 1) { - return; - } - - auto user_result = user_begin->getResult(0); - for (const auto& operand : dst_op->getOperands()) { - if (operand == user_result) { - builder.setInsertionPointAfterValue(user_result); - builder.create(user_result.getLoc(), - user_result, src); - - builder.setInsertionPointAfterValue(dst); - builder.create(dst.getLoc(), dst, - user_result); - break; - } } } }; diff --git a/tao_compiler/mlir/disc/transforms/disc_lower_to_library_call.cc b/tao_compiler/mlir/disc/transforms/disc_lower_to_library_call.cc old mode 100644 new mode 100755 index bd6b7296dd2..a3de94b8677 --- a/tao_compiler/mlir/disc/transforms/disc_lower_to_library_call.cc +++ b/tao_compiler/mlir/disc/transforms/disc_lower_to_library_call.cc @@ -494,7 +494,7 @@ struct TransposeConverter : public OpRewritePattern { if (rank != 2 && rank != 3) return failure(); // only rewriter custom library when switch 1 and 2 dimensions of // a 3d tensor, that means permute = [0, 2, 1] - if (rank == 3 && permutation[1] != 2 && permutation[2] != 1) + if (rank == 3 && (permutation[1] != 2 || permutation[2] != 1)) return failure(); bool on_gpu = placement_utils::isGpuMemRef(op->getOperand(0)); // TODO: support other device @@ -914,8 +914,7 @@ struct DiscLowerToLibraryCallPass SendOutputOpConvertor >(context); // clang-format on - if (enableTransposeLibraryCall()) - patterns.insert(context); + patterns.insert(context); // GPU copy related ops patterns.insert>(context, "h2d"); diff --git a/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-inplace.mlir b/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-inplace.mlir deleted file mode 100755 index e0248e27252..00000000000 --- a/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-inplace.mlir +++ /dev/null @@ -1,11 +0,0 @@ -// RUN: disc-opt -disc-input-output-alias \ -// RUN: %s -o - | FileCheck %s - -// CHECK-LABEL: main -func.func @main(%arg0: tensor<200x200xf32>, %arg1: tensor<200x200xf32>) -> (tensor<200x200xf32>, tensor<200x200xf32>) attributes {tf.entry_function = {input_output_alias_outputs = "0,1", input_output_alias_params = "0,1", input_placements = "gpu,gpu", output_placements = "gpu,gpu"}} { - // CHECK: %0 = mhlo.add %arg1, %arg0 : tensor<200x200xf32> - %0 = mhlo.add %arg1, %arg0 : tensor<200x200xf32> - // CHECK: "mhlo_disc.args_mutation"(%0, %arg1) : (tensor<200x200xf32>, tensor<200x200xf32>) -> () - // CHECK: return %arg0, %0 : tensor<200x200xf32>, tensor<200x200xf32> - return %arg0, %0 : tensor<200x200xf32>, tensor<200x200xf32> -} diff --git a/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-propagation.mlir b/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-propagation.mlir deleted file mode 100644 index 914a663285a..00000000000 --- a/tao_compiler/mlir/disc/transforms/tests/disc-input-output-alias-propagation.mlir +++ /dev/null @@ -1,14 +0,0 @@ -// RUN: disc-opt -disc-input-output-alias \ -// RUN: %s -o - | FileCheck %s - -// CHECK-LABEL: main -func.func @main(%arg0: tensor<4000x4000xf32>, %arg1: tensor<4000x4000xf32>) -> (tensor<4000x4000xf32>, tensor<4000x4000xf32>) attributes {tf.entry_function = {input_output_alias_outputs = "0,1", input_output_alias_params = "0,1", input_placements = "gpu,gpu", output_placements = "gpu,gpu"}} { - //CHECK: %0 = mhlo.add %arg1, %arg0 : tensor<4000x4000xf32> - %0 = mhlo.add %arg1, %arg0 : tensor<4000x4000xf32> - //CHECK: "mhlo_disc.args_mutation"(%0, %arg1) : (tensor<4000x4000xf32>, tensor<4000x4000xf32>) -> () - //CHECK: %1 = mhlo.add %0, %arg0 : tensor<4000x4000xf32> - %1 = mhlo.add %0, %arg0 : tensor<4000x4000xf32> - //CHECK: "mhlo_disc.args_mutation"(%1, %0) : (tensor<4000x4000xf32>, tensor<4000x4000xf32>) -> () - //CHECK: return %arg0, %1 : tensor<4000x4000xf32>, tensor<4000x4000xf32> - return %arg0, %1 : tensor<4000x4000xf32>, tensor<4000x4000xf32> - } \ No newline at end of file