From bba961e658c2666cf653379f0bbf53451facffde Mon Sep 17 00:00:00 2001 From: yzhang93 Date: Fri, 7 Feb 2025 17:51:28 -0800 Subject: [PATCH] [LoweringStrategy] Use a more general method to fetch input dims and sizes --- .../Transforms/KernelDispatch.cpp | 204 +++++++++++------- .../test/lowering_strategy_air.mlir | 6 +- .../test/lowering_strategy_generic.mlir | 36 +++- .../lowering_strategy_objectfifo_npu1.mlir | 48 ++--- .../lowering_strategy_objectfifo_npu4.mlir | 4 +- 5 files changed, 191 insertions(+), 107 deletions(-) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp index ff795fca0..fa4bccc6f 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/KernelDispatch.cpp @@ -84,6 +84,52 @@ FailureOr> getPackedSize(linalg::LinalgOp linalgOp, return instructionSize; } +struct InputDimsAndSizes { + SmallVector mDims; + SmallVector nDims; + SmallVector kDims; + SmallVector mSizes; + SmallVector nSizes; + SmallVector kSizes; +}; + +FailureOr getInputDimsAndSizes(linalg::LinalgOp linalgOp) { + FailureOr maybeContractionDims = + linalg::inferContractionDims(linalgOp); + if (failed(maybeContractionDims)) { + return linalgOp.emitOpError("failed to infer the contraction dimensions."); + } + + linalg::ContractionDimensions contractionDims = *maybeContractionDims; + SmallVector mDims = contractionDims.m; + SmallVector nDims = contractionDims.n; + SmallVector kDims = contractionDims.k; + if (mDims.empty() || nDims.empty() || kDims.empty()) { + return linalgOp.emitOpError("failed to fetch m/n/k dims."); + } + + SmallVector shapes = linalgOp.getStaticLoopRanges(); + if (mDims.size() + nDims.size() + kDims.size() > shapes.size()) { + return linalgOp.emitOpError( + "the total of m/n/k dims is larger than the number of loops."); + } + + auto getSizesAt = [&shapes](const SmallVector &idx) { + SmallVector sizes; + for (auto i : idx) sizes.push_back(shapes[i]); + return sizes; + }; + + InputDimsAndSizes inputDimsAndSizes; + inputDimsAndSizes.mDims = mDims; + inputDimsAndSizes.nDims = nDims; + inputDimsAndSizes.kDims = kDims; + inputDimsAndSizes.mSizes = getSizesAt(mDims); + inputDimsAndSizes.nSizes = getSizesAt(nDims); + inputDimsAndSizes.kSizes = getSizesAt(kDims); + return inputDimsAndSizes; +} + // Container class for the tiling at level 0 (the AIE shared memory) and level 1 // (the AIE core) in the M-, N-, and K-dimensions of a matmul operation, using // the pad-pack approach to tiling a matmul. Also contains the packing sizes for @@ -156,25 +202,24 @@ FailureOr ParameterSetting::create( auto initType = llvm::cast(linalgOp.getDpsInitOperand(0)->get().getType()); unsigned nBitsInit = initType.getElementTypeBitWidth(); - ArrayRef initShape = initType.getShape(); - auto lhsType = llvm::cast(linalgOp.getDpsInputOperand(0)->get().getType()); unsigned nBitsLhs = lhsType.getElementTypeBitWidth(); - ArrayRef lhsShape = lhsType.getShape(); - auto rhsType = llvm::cast(linalgOp.getDpsInputOperand(1)->get().getType()); unsigned nBitsRhs = rhsType.getElementTypeBitWidth(); - // Shape of the full matmul operation. - if (isa(linalgOp)) { - initShape = initShape.drop_front(); - lhsShape = lhsShape.drop_front(); - } - const uint64_t M = initShape[0]; - const uint64_t N = initShape[1]; - const uint64_t K = lhsShape[1]; + auto getTotalSize = [](const SmallVector &sizes) { + return std::accumulate(sizes.begin(), sizes.end(), 1, + std::multiplies()); + }; + + // Get the shape (M, N, K) of the full Matmul operation. + auto maybeInputDimsAndSizes = getInputDimsAndSizes(linalgOp); + if (failed(maybeInputDimsAndSizes)) return failure(); + int64_t M = getTotalSize(maybeInputDimsAndSizes.value().mSizes); + int64_t N = getTotalSize(maybeInputDimsAndSizes.value().nSizes); + int64_t K = getTotalSize(maybeInputDimsAndSizes.value().kSizes); // If we are conservative with ensuring that tiles A, B, and C fit at the // different memory levels, we should choose the scale factor based @@ -389,15 +434,23 @@ static SmallVector setOuterPermB(bool isMatmulTransposeB, static LogicalResult setRootConfigForPackPeel4LevelTilingPipeline( mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp, - AMDAIEDevice targetDevice, uint32_t numRows, uint32_t numCols) { - // Scale the L1 K with a factor of 2 compared with the outer dimenions M and N - // to increase the L1 memory usage. + AMDAIEDevice targetDevice, uint32_t numRows, uint32_t numCols, + uint32_t numLoops) { + // Scale the L1 K with a factor of 2 compared with the outer dimensions M and + // N to increase the L1 memory usage. auto maybePackPeelTiling = ParameterSetting::create( linalgOp, /*isPackPeel=*/true, /*isObjectFifo=*/true, targetDevice, numRows, numCols, /*kPackScaleL1=*/2); if (failed(maybePackPeelTiling)) return failure(); auto packPeelTiling = maybePackPeelTiling.value(); + // Get M, N, K dimension indices from the input indexing map. + auto maybeInputDimsAndSizes = getInputDimsAndSizes(linalgOp); + if (failed(maybeInputDimsAndSizes)) return failure(); + SmallVector mDims = maybeInputDimsAndSizes.value().mDims; + SmallVector nDims = maybeInputDimsAndSizes.value().nDims; + SmallVector kDims = maybeInputDimsAndSizes.value().kDims; + AMDAIEDeviceModel deviceModel = getDeviceModel(targetDevice); // ------------------------------------------------------ @@ -405,10 +458,11 @@ static LogicalResult setRootConfigForPackPeel4LevelTilingPipeline( // ------------------------------------------------------ MLIRContext *context = entryPointFn.getContext(); - SmallVector packedSizesL0 = packPeelTiling.getPackSizeL0(); - if (isa(linalgOp)) { - packedSizesL0.insert(packedSizesL0.begin(), 0); - } + // Pack level => 1. + SmallVector packedSizesL0(numLoops, 0); + packedSizesL0[mDims.back()] = packPeelTiling.m0Pack; + packedSizesL0[nDims.back()] = packPeelTiling.n0Pack; + packedSizesL0[kDims.back()] = packPeelTiling.k0Pack; // For matmul, transpose B matrix from [K N n k] to [N K k n] // For matmul_transpose_b, we don't have to transpose the B matrix, @@ -440,17 +494,11 @@ static LogicalResult setRootConfigForPackPeel4LevelTilingPipeline( outerPerm); // Pack level => 2. - // packed size for [M, N, K, m, n, k] - SmallVector packedSizesL1 = {0, - 0, - 0, - packPeelTiling.m1Pack, - packPeelTiling.n1Pack, - packPeelTiling.k1Pack}; - - if (isa(linalgOp)) { - packedSizesL1.insert(packedSizesL1.begin(), 0); - } + // The number of loops have increased by 3 due to the first level pack. + SmallVector packedSizesL1(numLoops + 3, 0); + packedSizesL1[mDims.back() + 3] = packPeelTiling.m1Pack; + packedSizesL1[nDims.back() + 3] = packPeelTiling.n1Pack; + packedSizesL1[kDims.back() + 3] = packPeelTiling.k1Pack; // Transpose A matrix from [M K m k m0 k0] to [M K k m m0 k0] // Transpose C matrix from [M N m n m0 n0] to [M N n m m0 n0] @@ -492,18 +540,24 @@ static LogicalResult setRootConfigForPackPeel4LevelTilingPipeline( bool fitsInL2 = (l2SizeA + l2SizeB + l2SizeInit) < (deviceModel.getMemTileSizeInBytes() * numCols); int64_t scaleL0 = !isBatchMatmul && fitsInL2 ? 2 : 1; - SmallVector tileSizeLevel0 = {packPeelTiling.M0 * scaleL0, - packPeelTiling.N0 * scaleL0}; - SmallVector tileSizeLevel1 = {numRows, numCols, 0}; - SmallVector tileSizeLevel2 = {0, 0, 1}; - SmallVector tileSizeLevel3 = {1, 1, 0, 0, 0, 0}; + SmallVector tileSizeLevel0(numLoops, 0); if (isa(linalgOp)) { - tileSizeLevel0.insert(tileSizeLevel0.begin(), 1); - tileSizeLevel1.insert(tileSizeLevel1.begin(), 0); - tileSizeLevel2.insert(tileSizeLevel2.begin(), 0); - tileSizeLevel3.insert(tileSizeLevel3.begin(), 0); + tileSizeLevel0[0] = 1; } + tileSizeLevel0[mDims[0]] = packPeelTiling.M0 * scaleL0; + tileSizeLevel0[nDims[0]] = packPeelTiling.N0 * scaleL0; + + SmallVector tileSizeLevel1(numLoops, 0); + tileSizeLevel1[mDims[0]] = numRows; + tileSizeLevel1[nDims[0]] = numCols; + + SmallVector tileSizeLevel2(numLoops, 0); + tileSizeLevel2[kDims[0]] = 1; + + SmallVector tileSizeLevel3(numLoops, 0); + tileSizeLevel3[mDims[0]] = 1; + tileSizeLevel3[nDims[0]] = 1; TileSizesListType tileSizes = {tileSizeLevel0, tileSizeLevel1, tileSizeLevel2, tileSizeLevel3}; @@ -518,7 +572,7 @@ static LogicalResult setRootConfigForPackPeel4LevelTilingPipeline( static LogicalResult setRootConfigForPackPeelPipeline( mlir::FunctionOpInterface entryPointFn, linalg::LinalgOp linalgOp, LowerToAIEPassPipeline useLowerToAIEPipeline, AMDAIEDevice targetDevice, - uint32_t numRows, uint32_t numCols) { + uint32_t numRows, uint32_t numCols, uint32_t numLoops) { bool isObjectFifo = useLowerToAIEPipeline == LowerToAIEPassPipeline::ObjectFifo; auto maybePackPeelTiling = @@ -527,15 +581,23 @@ static LogicalResult setRootConfigForPackPeelPipeline( if (failed(maybePackPeelTiling)) return failure(); auto packPeelTiling = maybePackPeelTiling.value(); + // Get M, N, K dimension indices from the input indexing map. + auto maybeInputDimsAndSizes = getInputDimsAndSizes(linalgOp); + if (failed(maybeInputDimsAndSizes)) return failure(); + SmallVector mDims = maybeInputDimsAndSizes.value().mDims; + SmallVector nDims = maybeInputDimsAndSizes.value().nDims; + SmallVector kDims = maybeInputDimsAndSizes.value().kDims; + // ------------------------------------------------------ // --------------- Set packing config ------------------- // ------------------------------------------------------ MLIRContext *context = entryPointFn.getContext(); - SmallVector packedSizesL0 = packPeelTiling.getPackSizeL0(); - if (isa(linalgOp)) { - packedSizesL0.insert(packedSizesL0.begin(), 0); - } + // Pack level => 1. + SmallVector packedSizesL0(numLoops, 0); + packedSizesL0[mDims.back()] = packPeelTiling.m0Pack; + packedSizesL0[nDims.back()] = packPeelTiling.n0Pack; + packedSizesL0[kDims.back()] = packPeelTiling.k0Pack; // For matmul, transpose B matrix from [K N n k] to [N K k n] // For matmul_transpose_b, we don't have to transpose the B matrix, @@ -571,17 +633,11 @@ static LogicalResult setRootConfigForPackPeelPipeline( outerPerm); // Pack level => 2. - // packed size for [M, N, K, m, n, k] - SmallVector packedSizesL1 = {0, - 0, - 0, - packPeelTiling.m1Pack, - packPeelTiling.n1Pack, - packPeelTiling.k1Pack}; - - if (isa(linalgOp)) { - packedSizesL1.insert(packedSizesL1.begin(), 0); - } + // The number of loops have increased by 3 due to the first level pack. + SmallVector packedSizesL1(numLoops + 3, 0); + packedSizesL1[mDims.back() + 3] = packPeelTiling.m1Pack; + packedSizesL1[nDims.back() + 3] = packPeelTiling.n1Pack; + packedSizesL1[kDims.back() + 3] = packPeelTiling.k1Pack; // Transpose A matrix from [M K m k m0 k0] to [M K k m m0 k0] // Transpose C matrix from [M N m n m0 n0] to [M N n m m0 n0] @@ -611,15 +667,19 @@ static LogicalResult setRootConfigForPackPeelPipeline( // ------------------------------------------------------ // -------------- Set lowering config ------------------- // ------------------------------------------------------ - SmallVector tileSizeLevel0 = {packPeelTiling.M0, packPeelTiling.N0}; - SmallVector tileSizeLevel1 = {0, 0, packPeelTiling.K0}; - SmallVector tileSizeLevel2 = {1, 1, 0, 0, 0, 0}; - + SmallVector tileSizeLevel0(numLoops, 0); if (isa(linalgOp)) { - tileSizeLevel0.insert(tileSizeLevel0.begin(), 1); - tileSizeLevel1.insert(tileSizeLevel1.begin(), 0); - tileSizeLevel2.insert(tileSizeLevel2.begin(), 0); + tileSizeLevel0[0] = 1; } + tileSizeLevel0[mDims[0]] = packPeelTiling.M0; + tileSizeLevel0[nDims[0]] = packPeelTiling.N0; + + SmallVector tileSizeLevel1(numLoops, 0); + tileSizeLevel1[kDims[0]] = 1; + + SmallVector tileSizeLevel2(numLoops, 0); + tileSizeLevel2[mDims[0]] = 1; + tileSizeLevel2[nDims[0]] = 1; TileSizesListType tileSizes = {tileSizeLevel0, tileSizeLevel1, tileSizeLevel2}; @@ -842,6 +902,8 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, uint32_t numCols) { assert(!getLoweringConfig(genericOp) && "expected lowering_config is not set"); + unsigned numLoops = genericOp.getNumLoops(); + assert(numLoops <= 7 && "expected input number of loops no more than 7"); if (!isMatmul(genericOp) && !isMatmulTransposeA(genericOp) && !isMatmulTransposeB(genericOp)) return genericOp.emitOpError( @@ -850,11 +912,11 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, if (passPipeline == TilePassPipeline::PackPeelPipeline) { return setRootConfigForPackPeelPipeline(entryPointFn, genericOp, useLowerToAIEPipeline, targetDevice, - numRows, numCols); + numRows, numCols, numLoops); } if (passPipeline == TilePassPipeline::PackPeel4LevelTilingPipeline) { return setRootConfigForPackPeel4LevelTilingPipeline( - entryPointFn, genericOp, targetDevice, numRows, numCols); + entryPointFn, genericOp, targetDevice, numRows, numCols, numLoops); } if (passPipeline == TilePassPipeline::PadPackPipeline) { return setRootConfigForPadPackPipeline(entryPointFn, genericOp, @@ -875,15 +937,7 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, "expected lowering_config is not set"); auto linalgOp = cast(contractionOp.getOperation()); unsigned numLoops = linalgOp.getNumLoops(); - { - SmallVector dims; - linalgOp.getReductionDims(dims); - if (dims.size() != 1 || dims[0] != numLoops - 1) { - return linalgOp.emitOpError( - "is expected to have exactly one reduction dim, ") - << "and that it is the innermost dim (" << numLoops - 1 << ")."; - } - } + assert(numLoops <= 7 && "expected input number of loops no more than 7"); // TODO (nmeshram) : This needs to be moved in a separate more generalized // logic. Also, need a flag to experiment between pad based and pack based @@ -891,11 +945,11 @@ static LogicalResult setRootConfig(mlir::FunctionOpInterface entryPointFn, if (passPipeline == TilePassPipeline::PackPeelPipeline) { return setRootConfigForPackPeelPipeline(entryPointFn, linalgOp, useLowerToAIEPipeline, targetDevice, - numRows, numCols); + numRows, numCols, numLoops); } if (passPipeline == TilePassPipeline::PackPeel4LevelTilingPipeline) { return setRootConfigForPackPeel4LevelTilingPipeline( - entryPointFn, linalgOp, targetDevice, numRows, numCols); + entryPointFn, linalgOp, targetDevice, numRows, numCols, numLoops); } if (passPipeline == TilePassPipeline::PadPackPipeline) { return setRootConfigForPadPackPipeline(entryPointFn, linalgOp, targetDevice, diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_air.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_air.mlir index 6b6718808..aef9e1e26 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_air.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_air.mlir @@ -189,7 +189,7 @@ builtin.module { // ----- -// CHECK-PACK-PEEL{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-PACK-PEEL{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-PACK-PEEL{LITERAL}: #amdaie.packing_config #pipeline_layout = #hal.pipeline.layout, @@ -216,7 +216,7 @@ builtin.module { // ----- -// CHECK-PACK-PEEL{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-PACK-PEEL{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-PACK-PEEL{LITERAL}: #amdaie.packing_config #pipeline_layout = #hal.pipeline.layout, @@ -244,7 +244,7 @@ module { // CHECK-PAD-PACK{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-PAD-PACK{LITERAL}: #packingConfig = #amdaie.packing_config -// CHECK-PACK-PEEL{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-PACK-PEEL{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-PACK-PEEL{LITERAL}: #amdaie.packing_config #pipeline_layout = #hal.pipeline.layout, diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_generic.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_generic.mlir index 0e8e80a01..f8bbb02f5 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_generic.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_generic.mlir @@ -2,7 +2,7 @@ // Test generic version of matmul. -// CHECK{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK{LITERAL}: #config = #iree_codegen.lowering_config // CHECK{LITERAL}: #amdaie.packing_config module { func.func @matmul_generic_128x128x256_i32() { @@ -32,7 +32,7 @@ module { // Test generic version of matmul_transpose_b. -// CHECK{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK{LITERAL}: #config = #iree_codegen.lowering_config // CHECK{LITERAL}: #amdaie.packing_config module { func.func @matmul_transpose_b_generic_128x128x256_i32() { @@ -62,7 +62,7 @@ module { // Test generic version of matmul_transpose_a. -// CHECK{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK{LITERAL}: #config = #iree_codegen.lowering_config // CHECK{LITERAL}: #amdaie.packing_config module { func.func @matmul_transpose_a_generic_128x128x256_i32() { @@ -85,3 +85,33 @@ module { return } } + +// ----- + +// Test generic version of matmul with reduction loop at first, i.e, (d0, d1, d2) = (k, m, n). + +// CHECK{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK{LITERAL}: #amdaie.packing_config +module { + func.func @matmul_generic_128x128x256_i32() { + %c0_i32 = arith.constant 0 : i32 + %c0 = arith.constant 0 : index + %0 = hal.interface.binding.subspan layout(, #hal.pipeline.binding, #hal.pipeline.binding], flags = Indirect>) binding(0) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor> + %1 = hal.interface.binding.subspan layout(, #hal.pipeline.binding, #hal.pipeline.binding], flags = Indirect>) binding(1) alignment(64) offset(%c0) flags("ReadOnly|Indirect") : !flow.dispatch.tensor> + %2 = hal.interface.binding.subspan layout(, #hal.pipeline.binding, #hal.pipeline.binding], flags = Indirect>) binding(2) alignment(64) offset(%c0) flags(Indirect) : !flow.dispatch.tensor> + %3 = flow.dispatch.tensor.load %0, offsets = [0, 0], sizes = [128, 256], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<128x256xi32> + %4 = flow.dispatch.tensor.load %1, offsets = [0, 0], sizes = [256, 128], strides = [1, 1] : !flow.dispatch.tensor> -> tensor<256x128xi32> + %5 = tensor.empty() : tensor<128x128xi32> + %6 = linalg.fill ins(%c0_i32 : i32) outs(%5 : tensor<128x128xi32>) -> tensor<128x128xi32> + // CHECK: linalg.generic + // CHECK-SAME: attrs = {lowering_config = #config, packing_config = #packingConfig} + %7 = linalg.generic {indexing_maps = [affine_map<(d0, d1, d2) -> (d1, d0)>, affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d1, d2)>], iterator_types = ["reduction", "parallel", "parallel"]} ins(%3, %4 : tensor<128x256xi32>, tensor<256x128xi32>) outs(%6 : tensor<128x128xi32>) { + ^bb0(%in: i32, %in_0: i32, %out: i32): + %8 = arith.muli %in, %in_0 : i32 + %9 = arith.addi %out, %8 : i32 + linalg.yield %9 : i32 + } -> tensor<128x128xi32> + flow.dispatch.tensor.store %7, %2, offsets = [0, 0], sizes = [128, 128], strides = [1, 1] : tensor<128x128xi32> -> !flow.dispatch.tensor> + return + } +} diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_objectfifo_npu1.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_objectfifo_npu1.mlir index c40e27717..f7c5c8831 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_objectfifo_npu1.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_objectfifo_npu1.mlir @@ -3,16 +3,16 @@ // RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-amdaie-lowering-strategy{target-device=npu1_4col})' %s | FileCheck %s --check-prefix=CHECK-4x4 // RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-amdaie-lowering-strategy{target-device=npu1_4col use-tile-pipeline=pack-peel-4-level-tiling})' %s | FileCheck %s --check-prefix=PACK-PEEL-4-LEVEL -// CHECK-2x2{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-2x2{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-2x2{LITERAL}: #amdaie.packing_config -// CHECK-4x2{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-4x2{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-4x2{LITERAL}: #amdaie.packing_config -// CHECK-4x4{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-4x4{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-4x4{LITERAL}: #amdaie.packing_config -// PACK-PEEL-4-LEVEL{LITERAL}: #config = #iree_codegen.lowering_config #pipeline_layout = #hal.pipeline.layout, @@ -39,16 +39,16 @@ module { // ----- -// CHECK-2x2{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-2x2{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-2x2{LITERAL}: #amdaie.packing_config -// CHECK-4x2{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-4x2{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-4x2{LITERAL}: #amdaie.packing_config -// CHECK-4x4{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-4x4{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-4x4{LITERAL}: #amdaie.packing_config -// PACK-PEEL-4-LEVEL{LITERAL}: #config = #iree_codegen.lowering_config +// PACK-PEEL-4-LEVEL{LITERAL}: #config = #iree_codegen.lowering_config // PACK-PEEL-4-LEVEL{LITERAL}: #amdaie.packing_config #pipeline_layout = #hal.pipeline.layout, @@ -75,16 +75,16 @@ module { // ----- -// CHECK-2x2{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-2x2{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-2x2{LITERAL}: #amdaie.packing_config -// CHECK-4x2{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-4x2{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-4x2{LITERAL}: #amdaie.packing_config -// CHECK-4x4{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-4x4{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-4x4{LITERAL}: #amdaie.packing_config -// PACK-PEEL-4-LEVEL{LITERAL}: #config = #iree_codegen.lowering_config +// PACK-PEEL-4-LEVEL{LITERAL}: #config = #iree_codegen.lowering_config // PACK-PEEL-4-LEVEL{LITERAL}: #amdaie.packing_config #pipeline_layout = #hal.pipeline.layout, @@ -111,16 +111,16 @@ module { // ----- -// CHECK-2x2{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-2x2{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-2x2{LITERAL}: #amdaie.packing_config -// CHECK-4x2{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-4x2{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-4x2{LITERAL}: #amdaie.packing_config -// CHECK-4x4{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-4x4{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-4x4{LITERAL}: #amdaie.packing_config -// PACK-PEEL-4-LEVEL{LITERAL}: #config = #iree_codegen.lowering_config +// PACK-PEEL-4-LEVEL{LITERAL}: #config = #iree_codegen.lowering_config // PACK-PEEL-4-LEVEL{LITERAL}: #packingConfig = #amdaie.packing_config #pipeline_layout = #hal.pipeline.layout, @@ -147,16 +147,16 @@ module { // ----- -// CHECK-2x2{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-2x2{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-2x2{LITERAL}: #amdaie.packing_config -// CHECK-4x2{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-4x2{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-4x2{LITERAL}: #amdaie.packing_config -// CHECK-4x4{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-4x4{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-4x4{LITERAL}: #amdaie.packing_config -// PACK-PEEL-4-LEVEL{LITERAL}: #config = #iree_codegen.lowering_config +// PACK-PEEL-4-LEVEL{LITERAL}: #config = #iree_codegen.lowering_config // PACK-PEEL-4-LEVEL{LITERAL}: #packingConfig = #amdaie.packing_config #pipeline_layout = #hal.pipeline.layout, @@ -183,16 +183,16 @@ module { // ----- -// CHECK-2x2{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-2x2{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-2x2{LITERAL}: #amdaie.packing_config -// CHECK-4x2{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-4x2{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-4x2{LITERAL}: #amdaie.packing_config -// CHECK-4x4{LITERAL}: #config = #iree_codegen.lowering_config +// CHECK-4x4{LITERAL}: #config = #iree_codegen.lowering_config // CHECK-4x4{LITERAL}: #amdaie.packing_config -// PACK-PEEL-4-LEVEL{LITERAL}: #config = #iree_codegen.lowering_config +// PACK-PEEL-4-LEVEL{LITERAL}: #config = #iree_codegen.lowering_config // PACK-PEEL-4-LEVEL{LITERAL}: #packingConfig = #amdaie.packing_config #pipeline_layout = #hal.pipeline.layout, diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_objectfifo_npu4.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_objectfifo_npu4.mlir index d36bd925d..63497578b 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_objectfifo_npu4.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/lowering_strategy_objectfifo_npu4.mlir @@ -2,7 +2,7 @@ // RUN: iree-opt --split-input-file --pass-pipeline='builtin.module(iree-amdaie-lowering-strategy{target-device=npu4 use-tile-pipeline=pack-peel-4-level-tiling})' %s | FileCheck %s --check-prefix=PACK-PEEL-4-LEVEL // CHECK: #config = #iree_codegen.lowering_config // CHECK: #packingConfig = #amdaie.packing_config -// PACK-PEEL-4-LEVEL{LITERAL}: #config = #iree_codegen.lowering_config +// PACK-PEEL-4-LEVEL{LITERAL}: #config = #iree_codegen.lowering_config // PACK-PEEL-4-LEVEL{LITERAL}: #packingConfig = #amdaie.packing_config #pipeline_layout = #hal.pipeline.layout,