From 75060e486f87f4345a4de215bb9e2b89141006cf Mon Sep 17 00:00:00 2001 From: Jorn Tuyls Date: Wed, 15 Jan 2025 07:01:31 -0800 Subject: [PATCH] [CombineStridedOps] Generalize dimension checking --- build_tools/ci/cpu_comparison/run.py | 41 +++++-- .../Transforms/AMDAIECombineStridedOps.cpp | 68 ++++++++---- .../Transforms/Utils/AMDAIEDmaUtils.cpp | 13 ++- .../Transforms/Utils/AMDAIEDmaUtils.h | 4 +- .../Transforms/test/AMDAIEDmaUtilsTest.cpp | 102 ++++++++++++++---- .../Transforms/test/combine_strided_ops.mlir | 16 +-- 6 files changed, 183 insertions(+), 61 deletions(-) diff --git a/build_tools/ci/cpu_comparison/run.py b/build_tools/ci/cpu_comparison/run.py index ef5546745..f321b6e4a 100755 --- a/build_tools/ci/cpu_comparison/run.py +++ b/build_tools/ci/cpu_comparison/run.py @@ -708,7 +708,17 @@ class BatchMatmul(BaseMatmul): A test of the form batch_matmul(A,B) where A:BxMxK, B:BxKxN """ - def __init__(self, B, M, N, K, input_type, acc_type, run_on_target=["npu1_4col"]): + def __init__( + self, + B, + M, + N, + K, + input_type, + acc_type, + run_on_target=["npu1_4col"], + tile_pipeline="pack-peel", + ): super().__init__( run_on_target=run_on_target, aie_compilation_flags=None, @@ -717,12 +727,14 @@ def __init__(self, B, M, N, K, input_type, acc_type, run_on_target=["npu1_4col"] K=K, input_type=input_type, acc_type=acc_type, - tile_pipeline="pack-peel", + tile_pipeline=tile_pipeline, n_repeats=1, ) self.labels.append("BatchMatmul") self.name = f"batch_matmul_{B}_{M}_{N}_{K}_{input_type}_{acc_type}" + if tile_pipeline == "pack-peel-4-level-tiling": + self.name += "_4_level_tiling" self.B = B def _execute(self, config): @@ -1624,11 +1636,26 @@ def __init__(self): ) # BatchMatmul test(s): - for input_type, acc_type in zip(["i32", "bf16"], ["i32", "f32"]): - # Batch size = 1: - self.register(BatchMatmul(1, 128, 128, 256, input_type, acc_type)) - # Batch size = 2: - self.register(BatchMatmul(2, 64, 64, 64, input_type, acc_type)) + for tile_pipeline in ["pack-peel", "pack-peel-4-level-tiling"]: + for input_type, acc_type in zip(["i32", "bf16"], ["i32", "f32"]): + # Batch size = 1: + self.register( + BatchMatmul( + 1, + 128, + 128, + 256, + input_type, + acc_type, + tile_pipeline=tile_pipeline, + ) + ) + # Batch size = 2: + self.register( + BatchMatmul( + 2, 64, 64, 64, input_type, acc_type, tile_pipeline=tile_pipeline + ) + ) # MatmulThinBias test(s): self.register(MatmulThinBias(1024, 1024, 512, "bf16", "f32", use_ukernel=True)) diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECombineStridedOps.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECombineStridedOps.cpp index 9281d77ec..04a1d1663 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECombineStridedOps.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/AMDAIECombineStridedOps.cpp @@ -21,6 +21,8 @@ #define DEBUG_TYPE "iree-amdaie-combine-strided-ops" +using namespace std::placeholders; + namespace mlir::iree_compiler::AMDAIE { namespace { @@ -43,6 +45,8 @@ struct CombineStridedOps Block *block = op->getBlock(); if (!block) return failure(); + std::unique_ptr sourceDmaDimConfig; + std::unique_ptr targetDmaDimConfig; SmallVector userOpsToBeErased; AMDAIE::DoublyStridedOpInterface nextStridedOp; if (auto npuDmaOp = dyn_cast(op.getOperation())) { @@ -60,6 +64,20 @@ struct CombineStridedOps if (failed(maybeNextNpuDmaOp)) return failure(); nextStridedOp = cast( maybeNextNpuDmaOp->getOperation()); + if (!nextStridedOp) return failure(); + + std::optional sourceMemspaceInt = + nextStridedOp.getSourceMemorySpaceAsUInt(); + std::optional targetMemspaceInt = + nextStridedOp.getTargetMemorySpaceAsUInt(); + if (!sourceMemspaceInt || !targetMemspaceInt) { + return rewriter.notifyMatchFailure( + nextStridedOp, "expected a source and target memory space"); + } + sourceDmaDimConfig = std::make_unique( + deviceModel, sourceMemspaceInt.value()); + targetDmaDimConfig = std::make_unique( + deviceModel, targetMemspaceInt.value()); } else if (auto npuCircularDmaOp = dyn_cast(op.getOperation())) { LLVM_DEBUG(llvm::dbgs() @@ -69,25 +87,24 @@ struct CombineStridedOps if (failed(maybeNextNpuCircDmaOp)) return failure(); nextStridedOp = cast( maybeNextNpuCircDmaOp->getOperation()); + if (!nextStridedOp) return failure(); + + std::optional sourceMemspaceInt = + nextStridedOp.getSourceMemorySpaceAsUInt(); + std::optional targetMemspaceInt = + nextStridedOp.getTargetMemorySpaceAsUInt(); + if (!sourceMemspaceInt || !targetMemspaceInt) { + return rewriter.notifyMatchFailure( + nextStridedOp, "expected a source and target memory space"); + } + sourceDmaDimConfig = std::make_unique( + deviceModel, sourceMemspaceInt.value()); + targetDmaDimConfig = std::make_unique( + deviceModel, targetMemspaceInt.value()); } else { return failure(); } - if (!nextStridedOp) return failure(); - - std::optional sourceMemspaceInt = - nextStridedOp.getSourceMemorySpaceAsUInt(); - std::optional targetMemspaceInt = - nextStridedOp.getTargetMemorySpaceAsUInt(); - if (!sourceMemspaceInt || !targetMemspaceInt) { - return rewriter.notifyMatchFailure( - nextStridedOp, "expected a source and target memory space"); - } - DmaDimConfig sourceDmaDimConfig(deviceModel, sourceMemspaceInt.value()); - size_t sourceMaxNbDims = sourceDmaDimConfig.maxNbDims; - DmaDimConfig targetDmaDimConfig(deviceModel, targetMemspaceInt.value()); - size_t targetMaxNbDims = targetDmaDimConfig.maxNbDims; - SmallVector sourceOffsetsA = op.getSourceMixedOffsets(); SmallVector sourceSizesA = op.getSourceMixedSizes(); SmallVector sourceStridesA = op.getSourceMixedStrides(); @@ -99,7 +116,9 @@ struct CombineStridedOps nextStridedOp.getSourceMixedStrides(); bool areSourcesCombinable = areAccessPatternsCombinable( sourceOffsetsA, sourceSizesA, sourceStridesA, sourceOffsetsB, - sourceSizesB, sourceStridesB, sourceMaxNbDims); + sourceSizesB, sourceStridesB, + std::bind(&DmaDimConfig::exceedsNbDims, std::ref(sourceDmaDimConfig), + _1)); SmallVector targetOffsetsA = op.getTargetMixedOffsets(); SmallVector targetSizesA = op.getTargetMixedSizes(); @@ -112,7 +131,14 @@ struct CombineStridedOps nextStridedOp.getTargetMixedStrides(); bool areTargetsCombinable = areAccessPatternsCombinable( targetOffsetsA, targetSizesA, targetStridesA, targetOffsetsB, - targetSizesB, targetStridesB, targetMaxNbDims); + targetSizesB, targetStridesB, + std::bind(&DmaDimConfig::exceedsNbDims, std::ref(targetDmaDimConfig), + _1)); + + LLVM_DEBUG(llvm::dbgs() + << "areSourcesCombinable: " << areSourcesCombinable << "\n"); + LLVM_DEBUG(llvm::dbgs() + << "areTargetsCombinable: " << areTargetsCombinable << "\n"); if (areSourcesCombinable && areTargetsCombinable) { SmallVector newSourceOffsets; @@ -121,7 +147,9 @@ struct CombineStridedOps if (failed(combineAccessPatterns( rewriter, sourceOffsetsA, sourceSizesA, sourceStridesA, sourceOffsetsB, sourceSizesB, sourceStridesB, newSourceOffsets, - newSourceSizes, newSourceStrides, sourceMaxNbDims))) { + newSourceSizes, newSourceStrides, + std::bind(&DmaDimConfig::exceedsNbDims, + std::ref(sourceDmaDimConfig), _1)))) { return failure(); } @@ -131,7 +159,9 @@ struct CombineStridedOps if (failed(combineAccessPatterns( rewriter, targetOffsetsA, targetSizesA, targetStridesA, targetOffsetsB, targetSizesB, targetStridesB, newTargetOffsets, - newTargetSizes, newTargetStrides, targetMaxNbDims))) { + newTargetSizes, newTargetStrides, + std::bind(&DmaDimConfig::exceedsNbDims, + std::ref(targetDmaDimConfig), _1)))) { return failure(); } diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Utils/AMDAIEDmaUtils.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Utils/AMDAIEDmaUtils.cpp index 81c9cf64f..89657708d 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Utils/AMDAIEDmaUtils.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Utils/AMDAIEDmaUtils.cpp @@ -11,6 +11,8 @@ #include "llvm/ADT/SmallPtrSet.h" #include "mlir/Dialect/Utils/StaticValueUtils.h" +#define DEBUG_TYPE "iree-amdaie-dma-utils" + namespace mlir::iree_compiler::AMDAIE { static bool isEqualConstantIntOrValueArrayFromIndices( @@ -42,7 +44,7 @@ bool areAccessPatternsCombinable(const SmallVector &offsetsA, const SmallVector &offsetsB, const SmallVector &sizesB, const SmallVector &stridesB, - size_t maxNbDims) { + function_ref exceedsNbDims) { assert(offsetsA.size() == sizesA.size() && "expected same number of source offsets and sizes"); assert(offsetsA.size() == stridesA.size() && @@ -59,8 +61,11 @@ bool areAccessPatternsCombinable(const SmallVector &offsetsA, // In case both access patterns have the same number of dimension, a new // dimension will need to be added, so fail if there aren't enough // dimensions. - if (offsetsA.size() == offsetsB.size() && offsetsA.size() + 1 > maxNbDims) + if (offsetsA.size() == offsetsB.size() && + exceedsNbDims(offsetsA.size() + 1)) { + LLVM_DEBUG(llvm::dbgs() << "Exceeded maximum number of dimensions\n"); return false; + } // Equality of the last N elements of the access patterns of A and B with N = // min(sizeA, sizeB) results in some simple cases in which the access @@ -196,7 +201,7 @@ LogicalResult combineAccessPatterns(RewriterBase &rewriter, SmallVector &newOffsets, SmallVector &newSizes, SmallVector &newStrides, - size_t maxNbDims) { + function_ref exceedsNbDims) { assert(offsetsA.size() == sizesA.size() && "expected same number of source offsets and sizes"); assert(offsetsA.size() == stridesA.size() && @@ -206,7 +211,7 @@ LogicalResult combineAccessPatterns(RewriterBase &rewriter, assert(offsetsB.size() == stridesB.size() && "expected same number of source offsets and strides"); if (!areAccessPatternsCombinable(offsetsA, sizesA, stridesA, offsetsB, sizesB, - stridesB, maxNbDims)) { + stridesB, exceedsNbDims)) { return failure(); } if (offsetsA.empty() && offsetsB.empty()) return success(); diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Utils/AMDAIEDmaUtils.h b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Utils/AMDAIEDmaUtils.h index 6f4bc1040..0c909c5f3 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Utils/AMDAIEDmaUtils.h +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/Utils/AMDAIEDmaUtils.h @@ -93,7 +93,7 @@ bool areAccessPatternsCombinable(const SmallVector &offsetsA, const SmallVector &offsetsB, const SmallVector &sizesB, const SmallVector &stridesB, - size_t maxNbDims); + function_ref exceedsNbDims); /// Combine two access patterns into a single one. Assumes that access pattern A /// belongs to a strided op which is ordered before the strided op B. Takes a @@ -110,7 +110,7 @@ LogicalResult combineAccessPatterns(RewriterBase &rewriter, SmallVector &newOffsets, SmallVector &newSizes, SmallVector &newStrides, - size_t maxNbDims); + function_ref exceedsNbDims); /// Fold subsequent dimensions within a strided access pattern that describe a /// single linear access. Returns `success` if folding took place. diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/AMDAIEDmaUtilsTest.cpp b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/AMDAIEDmaUtilsTest.cpp index 4f0c47f43..ccb08bae5 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/AMDAIEDmaUtilsTest.cpp +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/AMDAIEDmaUtilsTest.cpp @@ -32,6 +32,23 @@ class AccessPatternCombinationTest : public ::testing::Test { }); } + bool checkAreAccessPatternsCombinable( + const SmallVector &offsetsA, const SmallVector &sizesA, + const SmallVector &stridesA, + const SmallVector &offsetsB, const SmallVector &sizesB, + const SmallVector &stridesB, + function_ref exceedsNbDims) { + SmallVector offsetsValuesA = toOpFoldResult(offsetsA); + SmallVector sizesValuesA = toOpFoldResult(sizesA); + SmallVector stridesValuesA = toOpFoldResult(stridesA); + SmallVector offsetsValuesB = toOpFoldResult(offsetsB); + SmallVector sizesValuesB = toOpFoldResult(sizesB); + SmallVector stridesValuesB = toOpFoldResult(stridesB); + return areAccessPatternsCombinable( + offsetsValuesA, sizesValuesA, stridesValuesA, offsetsValuesB, + sizesValuesB, stridesValuesB, exceedsNbDims); + } + bool checkAreAccessPatternsCombinable(const SmallVector &offsetsA, const SmallVector &sizesA, const SmallVector &stridesA, @@ -39,27 +56,19 @@ class AccessPatternCombinationTest : public ::testing::Test { const SmallVector &sizesB, const SmallVector &stridesB, size_t maxNbDims) { - SmallVector offsetsValuesA = toOpFoldResult(offsetsA); - SmallVector sizesValuesA = toOpFoldResult(sizesA); - SmallVector stridesValuesA = toOpFoldResult(stridesA); - SmallVector offsetsValuesB = toOpFoldResult(offsetsB); - SmallVector sizesValuesB = toOpFoldResult(sizesB); - SmallVector stridesValuesB = toOpFoldResult(stridesB); - return areAccessPatternsCombinable(offsetsValuesA, sizesValuesA, - stridesValuesA, offsetsValuesB, - sizesValuesB, stridesValuesB, maxNbDims); + return checkAreAccessPatternsCombinable( + offsetsA, sizesA, stridesA, offsetsB, sizesB, stridesB, + [&](size_t dim) { return dim > maxNbDims; }); } - void checkCombineAccessPatterns(const SmallVector offsetsA, - const SmallVector sizesA, - const SmallVector stridesA, - const SmallVector offsetsB, - const SmallVector sizesB, - const SmallVector stridesB, - const SmallVector expectedOffsets, - const SmallVector expectedSizes, - const SmallVector expectedStrides, - size_t maxNbDims, bool shouldSucceed = true) { + void checkCombineAccessPatterns( + const SmallVector offsetsA, const SmallVector sizesA, + const SmallVector stridesA, const SmallVector offsetsB, + const SmallVector sizesB, const SmallVector stridesB, + const SmallVector expectedOffsets, + const SmallVector expectedSizes, + const SmallVector expectedStrides, + function_ref exceedsNbDims, bool shouldSucceed = true) { SmallVector offsetsValuesA = toOpFoldResult(offsetsA); SmallVector sizesValuesA = toOpFoldResult(sizesA); SmallVector stridesValuesA = toOpFoldResult(stridesA); @@ -79,7 +88,7 @@ class AccessPatternCombinationTest : public ::testing::Test { EXPECT_TRUE(succeeded(combineAccessPatterns( rewriter, offsetsValuesA, sizesValuesA, stridesValuesA, offsetsValuesB, sizesValuesB, stridesValuesB, newOffsets, newSizes, - newStrides, maxNbDims))); + newStrides, exceedsNbDims))); EXPECT_EQ(newOffsets, expectedOffsetsValues); EXPECT_EQ(newSizes, expectedSizesValues); EXPECT_EQ(newStrides, expectedStridesValues); @@ -87,10 +96,26 @@ class AccessPatternCombinationTest : public ::testing::Test { EXPECT_TRUE(failed(combineAccessPatterns( rewriter, offsetsValuesA, sizesValuesA, stridesValuesA, offsetsValuesB, sizesValuesB, stridesValuesB, newOffsets, newSizes, - newStrides, maxNbDims))); + newStrides, exceedsNbDims))); } } + void checkCombineAccessPatterns(const SmallVector offsetsA, + const SmallVector sizesA, + const SmallVector stridesA, + const SmallVector offsetsB, + const SmallVector sizesB, + const SmallVector stridesB, + const SmallVector expectedOffsets, + const SmallVector expectedSizes, + const SmallVector expectedStrides, + size_t maxNbDims, bool shouldSucceed = true) { + checkCombineAccessPatterns( + offsetsA, sizesA, stridesA, offsetsB, sizesB, stridesB, expectedOffsets, + expectedSizes, expectedStrides, + [&](size_t dim) { return dim > maxNbDims; }, shouldSucceed); + } + MLIRContext context; IRRewriter rewriter; Location loc; @@ -200,6 +225,24 @@ TEST_F(AccessPatternCombinationTest, NonCombinableAccessPatterns) { {32, 0}, {32, 64}, {128, 1}, {96, 0}, {64, 64}, {128, 1}, 4)); } +TEST_F(AccessPatternCombinationTest, AnyNbDims) { + auto exceedsNbDims = [](size_t dims) { return false; }; + EXPECT_TRUE(checkAreAccessPatternsCombinable({0}, {16}, {1}, {32}, {16}, {1}, + exceedsNbDims)); + EXPECT_TRUE(checkAreAccessPatternsCombinable( + {0, 0, 0}, {16, 16, 32}, {32, 64, 1}, {0, 0, 32}, {16, 16, 32}, + {32, 64, 1}, exceedsNbDims)); +} + +TEST_F(AccessPatternCombinationTest, NoDims) { + auto exceedsNbDims = [](size_t dims) { return true; }; + EXPECT_FALSE(checkAreAccessPatternsCombinable({0}, {16}, {1}, {32}, {16}, {1}, + exceedsNbDims)); + EXPECT_FALSE(checkAreAccessPatternsCombinable( + {0, 0, 0}, {16, 16, 32}, {32, 64, 1}, {0, 0, 32}, {16, 16, 32}, + {32, 64, 1}, exceedsNbDims)); +} + TEST_F(AccessPatternCombinationTest, CombineAccessPatterns) { checkCombineAccessPatterns({}, {}, {}, {}, {}, {}, {}, {}, {}, 1); // size(A) == size(B) @@ -317,6 +360,23 @@ TEST_F(AccessPatternCombinationTest, FailCombineAccessPatterns) { {128, 1}, {32, 0}, {96, 64}, {128, 1}, 4, false); } +TEST_F(AccessPatternCombinationTest, CombineAccessPatternsAnyNbDims) { + auto exceedsNbDims = [](size_t dims) { return false; }; + checkCombineAccessPatterns({}, {}, {}, {}, {}, {}, {}, {}, {}, exceedsNbDims); + checkCombineAccessPatterns({0, 0}, {16, 32}, {16, 1}, {0, 32}, {16, 32}, + {16, 1}, {0, 0, 0}, {2, 16, 32}, {32, 16, 1}, + exceedsNbDims); +} + +TEST_F(AccessPatternCombinationTest, CombineAccessPatternsNoDims) { + auto exceedsNbDims = [](size_t dims) { return true; }; + checkCombineAccessPatterns({0}, {16}, {1}, {32}, {16}, {1}, {}, {}, {}, + exceedsNbDims, false); + checkCombineAccessPatterns({0, 0}, {16, 32}, {16, 1}, {0, 32}, {16, 32}, + {16, 1}, {0, 0, 0}, {2, 16, 32}, {32, 16, 1}, + exceedsNbDims, false); +} + class FoldTest : public ::testing::Test { protected: FoldTest() : rewriter(&context), loc(UnknownLoc::get(&context)) { diff --git a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/combine_strided_ops.mlir b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/combine_strided_ops.mlir index 0073f7ecf..2bb6f0d24 100644 --- a/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/combine_strided_ops.mlir +++ b/compiler/plugins/target/AMD-AIE/iree-amd-aie/Transforms/test/combine_strided_ops.mlir @@ -615,13 +615,13 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} // ----- -// CHECK-LABEL: @circular_not_enough_dims_source +// CHECK-LABEL: @circular_any_num_dims_source // CHECK: %[[CONNECTION:.+]] = amdaie.connection -// CHECK: amdaie.npu.circular_dma_cpy_nd %[[CONNECTION]]([] [] [], [0, 0, 0, 0] [8, 16, 8, 16] [8, 32, 8, 1]) -// CHECK: amdaie.npu.circular_dma_cpy_nd %[[CONNECTION]]([] [] [], [0, 0, 0, 32] [8, 16, 8, 16] [8, 32, 8, 1]) +// CHECK: amdaie.npu.circular_dma_cpy_nd %[[CONNECTION]]([] [] [], [0, 0, 0, 0, 0] [2, 8, 16, 8, 16] [32, 8, 32, 8, 1]) +// CHECK-NOT: amdaie.npu.circular_dma_cpy_nd #executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { - func.func @circular_not_enough_dims_source(%arg0: !amdaie.logicalobjectfifo>, %arg1: !amdaie.logicalobjectfifo>) { + func.func @circular_any_num_dims_source(%arg0: !amdaie.logicalobjectfifo>, %arg1: !amdaie.logicalobjectfifo>) { amdaie.workgroup { %0 = amdaie.connection(%arg0, %arg1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) amdaie.controlcode { @@ -636,13 +636,13 @@ module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} // ----- -// CHECK-LABEL: @circular_not_enough_dims_target +// CHECK-LABEL: @circular_any_num_dims_target // CHECK: %[[CONNECTION:.+]] = amdaie.connection -// CHECK: amdaie.npu.circular_dma_cpy_nd %[[CONNECTION]]([0, 0, 0, 0] [8, 16, 8, 16] [8, 32, 8, 1], [] [] []) -// CHECK: amdaie.npu.circular_dma_cpy_nd %[[CONNECTION]]([0, 0, 0, 32] [8, 16, 8, 16] [8, 32, 8, 1], [] [] []) +// CHECK: amdaie.npu.circular_dma_cpy_nd %[[CONNECTION]]([0, 0, 0, 0, 0] [2, 8, 16, 8, 16] [32, 8, 32, 8, 1], [] [] []) +// CHECK-NOT: amdaie.npu.circular_dma_cpy_nd #executable_target_amdaie_xclbin_fb = #hal.executable.target<"amd-aie", "amdaie-xclbin-fb", {target_device = "npu1_4col", ukernels = "none"}> module attributes {hal.executable.target = #executable_target_amdaie_xclbin_fb} { - func.func @circular_not_enough_dims_target(%arg0: !amdaie.logicalobjectfifo>, %arg1: !amdaie.logicalobjectfifo>) { + func.func @circular_any_num_dims_target(%arg0: !amdaie.logicalobjectfifo>, %arg1: !amdaie.logicalobjectfifo>) { amdaie.workgroup { %0 = amdaie.connection(%arg0, %arg1) : (!amdaie.logicalobjectfifo>, !amdaie.logicalobjectfifo>) amdaie.controlcode {