diff --git a/include/imex/Dialect/XeGPU/IR/XeGPUAttrs.td b/include/imex/Dialect/XeGPU/IR/XeGPUAttrs.td index 49422f46c..a793b7081 100644 --- a/include/imex/Dialect/XeGPU/IR/XeGPUAttrs.td +++ b/include/imex/Dialect/XeGPU/IR/XeGPUAttrs.td @@ -22,30 +22,27 @@ def XeGPU_ScatteredAttr : XeGPUAttr<"Scattered", "scattered"> { def XeGPU_SgMapAttr: XeGPUAttr<"SubGroupMap", "sg_map"> { let parameters = (ins - ArrayRefParameter<"unsigned">:$wiLayout, - ArrayRefParameter<"unsigned">:$wiData, - ArrayRefParameter<"unsigned">:$mmaBlockSize); + OptionalParameter<"mlir::DenseI32ArrayAttr">:$mma_block_size, + "mlir::DenseI32ArrayAttr":$wi_layout, + "mlir::DenseI32ArrayAttr":$wi_data + ); // In format of #xegpu.sg_map<{mma_block_size = [2, 4], wi_layout = [2, 4], wi_data = [2, 4]}> - let assemblyFormat = "`<` custom($wiLayout, $wiData, $mmaBlockSize) `>`"; + let assemblyFormat = "`<` struct(params) `>`"; let genVerifyDecl = true; - let extraClassDeclaration = [{ - bool hasMMABlockSizeAttr() { - return getMmaBlockSize().size() == 2; - } - }]; - let builders = [ AttrBuilder<(ins - "::llvm::ArrayRef":$wiLayout, - "::llvm::ArrayRef":$wiData, - CArg<"::llvm::ArrayRef", "{}">:$mmaBlockSize + "::llvm::ArrayRef":$wiLayout, + "::llvm::ArrayRef":$wiData, + CArg<"::llvm::ArrayRef", "{}">:$mmaBlockSize ), [{ assert(wiLayout.size() == 2 && wiData.size() == 2 && "wiLayout and wiData should be 2D arrays.\n"); assert((mmaBlockSize.size() == 2 || mmaBlockSize.size() == 0) && "mmaBlockSize can be either empty or a 2D array.\n"); - return $_get($_ctxt, wiLayout, wiData, mmaBlockSize); + return $_get($_ctxt, mlir::DenseI32ArrayAttr::get($_ctxt, mmaBlockSize), + mlir::DenseI32ArrayAttr::get($_ctxt, wiLayout), + mlir::DenseI32ArrayAttr::get($_ctxt, wiData)); }]> ]; @@ -54,16 +51,18 @@ def XeGPU_SgMapAttr: XeGPUAttr<"SubGroupMap", "sg_map"> { def XeGPU_WgMapAttr: XeGPUAttr<"WorkGroupMap", "wg_map"> { let parameters = (ins - ArrayRefParameter<"unsigned">:$sgLayout, - ArrayRefParameter<"unsigned">:$sgData); + "mlir::DenseI32ArrayAttr":$sg_layout, + "mlir::DenseI32ArrayAttr":$sg_data + ); let builders = [ AttrBuilder<(ins - "::llvm::ArrayRef":$sgLayout, - "::llvm::ArrayRef":$sgData + "::llvm::ArrayRef":$sgLayout, + "::llvm::ArrayRef":$sgData ), [{ assert(sgLayout.size() == 2 && sgData.size() == 2 && "sgLayout and sgData should be 2D arrays.\n"); - return $_get($_ctxt, sgLayout, sgData); + return $_get($_ctxt, mlir::DenseI32ArrayAttr::get($_ctxt, sgLayout), + mlir::DenseI32ArrayAttr::get($_ctxt, sgData)); }]> ]; @@ -71,7 +70,7 @@ def XeGPU_WgMapAttr: XeGPUAttr<"WorkGroupMap", "wg_map"> { let skipDefaultBuilders = 1; // In format of #xegpu.wg_map<{sg_layout = [2, 4], sg_data = [2, 4]}> - let assemblyFormat = "`<` custom($sgLayout, $sgData) `>`"; + let assemblyFormat = "`<` struct(params) `>`"; } def XeGPU_XeMapAttr: XeGPUAttr<"XeMap", "xe_map"> { @@ -81,23 +80,27 @@ def XeGPU_XeMapAttr: XeGPUAttr<"XeMap", "xe_map"> { let builders = [ AttrBuilder<(ins - "::llvm::ArrayRef":$sgLayout, - "::llvm::ArrayRef":$sgData, - "::llvm::ArrayRef":$wiLayout, - "::llvm::ArrayRef":$wiData, - CArg<"::llvm::ArrayRef", "{}">:$mmaBlockSize + "::llvm::ArrayRef":$sgLayout, + "::llvm::ArrayRef":$sgData, + "::llvm::ArrayRef":$wiLayout, + "::llvm::ArrayRef":$wiData, + CArg<"::llvm::ArrayRef", "{}">:$mmaBlockSize ), [{ assert(sgLayout.size() == 2 && sgData.size() == 2 && "sgLayout and sgData should be 2D arrays.\n"); assert(wiLayout.size() == 2 && wiData.size() == 2 && "wiLayout and wiData should be 2D arrays.\n"); assert((mmaBlockSize.size() == 2 || mmaBlockSize.size() == 0) && "mmaBlockSize can be either empty or a 2D array.\n"); - auto wg = WorkGroupMapAttr::get($_ctxt, sgLayout, sgData); - auto sg = SubGroupMapAttr::get($_ctxt, wiLayout, wiData, mmaBlockSize); + auto wg = WorkGroupMapAttr::get($_ctxt, mlir::DenseI32ArrayAttr::get($_ctxt, sgLayout), + mlir::DenseI32ArrayAttr::get($_ctxt, sgData)); + auto sg = SubGroupMapAttr::get($_ctxt, mlir::DenseI32ArrayAttr::get($_ctxt, mmaBlockSize), + mlir::DenseI32ArrayAttr::get($_ctxt, wiLayout), + mlir::DenseI32ArrayAttr::get($_ctxt, wiData)); return $_get($_ctxt, wg, sg); }]> ]; // In format of #xegpu.xe_map - let hasCustomAssemblyFormat = 1; + let assemblyFormat = "`<` struct(params) `>`"; + } def XeGPU_ArgTypeAttr : I32EnumAttr< diff --git a/include/imex/Dialect/XeGPU/IR/XeGPUOps.td b/include/imex/Dialect/XeGPU/IR/XeGPUOps.td index b2baf565c..652ee9f60 100644 --- a/include/imex/Dialect/XeGPU/IR/XeGPUOps.td +++ b/include/imex/Dialect/XeGPU/IR/XeGPUOps.td @@ -317,13 +317,12 @@ def XeGPU_CreateDescOp let description = [{ "create_tdesc" is similar to "create_nd_tdesc" in terms that it creates a TensorDesc for a memory region. while "create_nd_tdesc" is for creating continious subviews, "create_tdesc" is for creating non-continious - (scattered) subviews. It accepts the following parameters: + (scattered) subviews. It only works with VectorCompute (VC) mode and accepts the following parameters: * source: a 1D memref or pointer (uint64_t) represents the memory object. - * offsets: In VectorCompute (VC) mode, it is a 1D vector containing offsets of each access point, the size is aligned with + * offsets: It is a 1D vector containing offsets of each access point, the size should be aligned with supportted group size, e.g., vector<16xindex>. And each element in the vector corresponds to a work item (SIMT lane) in the subgroup. - In SIMT mode (default), it is an index scalar representing the offset of the access point. * chunk_size_per_lane: [optional attribute] indicates number of continious elements accessed for each offset, default is 1. Example 1. It assumes subgroup size is 4, and accesses a[0], a[16], a[32], a[64] @@ -336,13 +335,6 @@ def XeGPU_CreateDescOp %0 = memref.alloc() : memref<1024xf32> %c0 = arith.constant dense<0, 16, 32, 64> : vector<4xindex> %1 = xegpu.create_tdesc %0, %c0 {chunk_size_per_lane = 8}: memref<1024xf32> -> TensorDesc<4x8xf32> - - Example 3. an SIMT mode example, accessing a[16]. - %a = memref.alloc() : memref<1024xf32> - %c0 = arith.constant 16 : index - %1 = xegpu.create_tdesc %a, %c0: memref<1024xf32> -> TensorDesc<1xf32> - - }]; let arguments = (ins XeGPU_BaseAddrType: $source, @@ -366,6 +358,28 @@ def XeGPU_CreateDescOp }]; + let builders = [ + OpBuilder<(ins "::imex::xegpu::TensorDescType": $TensorDesc, "::mlir::Value": $source, + "::mlir::Value": $offsets, CArg<"uint32_t", "1"> : $chunk_size_per_lane), [{ + $_state.addOperands(source); + $_state.addOperands(offsets); + $_state.getOrAddProperties().chunk_size_per_lane = $_builder.getIntegerAttr($_builder.getIntegerType(32), chunk_size_per_lane); + $_state.getOrAddProperties().mode = ::imex::xegpu::ModeAttr::get($_builder.getContext(), imex::xegpu::Mode::VC); + $_state.addTypes(TensorDesc); + }]>, + + OpBuilder<(ins "::imex::xegpu::TensorDescType": $TensorDesc, "::mlir::Value": $source, + "::mlir::Value": $offsets, "::mlir::IntegerAttr": $chunk_size_per_lane), [{ + $_state.addOperands(source); + $_state.addOperands(offsets); + if(chunk_size_per_lane) + $_state.getOrAddProperties().chunk_size_per_lane = chunk_size_per_lane; + $_state.getOrAddProperties().mode = ::imex::xegpu::ModeAttr::get($_builder.getContext(), imex::xegpu::Mode::VC); + $_state.addTypes(TensorDesc); + }]> + ]; + let skipDefaultBuilders = 1; + // Format: xegpu.create_tdesc %src, %offsets {mode=simt, chunk_size_per_lane=1} // : ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> let hasCustomAssemblyFormat = 1; @@ -431,6 +445,25 @@ def XeGPU_PrefetchNDOp : XeGPU_Op<"prefetch_nd", []> { let hasCustomAssemblyFormat = 1; } +def XeGPU_UpdateNDOffsetOp : XeGPU_Op<"update_nd_offset", []> { + let summary = "update the offsets for the given tensor descriptor"; + + let arguments = (ins + XeGPU_TensorDesc: $TensorDesc, + Variadic: $offsets, + DefaultValuedAttr: $mode); + + let results = (outs XeGPU_TensorDesc: $result); + + let assemblyFormat = [{ + $TensorDesc `,` (`[` $offsets^ `]`)? (`{` `mode` `=` $mode^ `}`)? + attr-dict `:` qualified(type($TensorDesc)) `->` qualified(type($result)) + }]; + + let hasVerifier = 1; +} + + def XeGPU_DpasOp : XeGPU_Op<"dpas"> { let summary = "performs dpas computation"; let arguments = (ins @@ -480,6 +513,55 @@ def XeGPU_LoadGatherOp : XeGPU_Op<"load"> { let results = (outs XeGPU_ValueType: $value); + let builders = [ + OpBuilder<(ins "::mlir::Type": $value, "::mlir::Value": $TensorDesc, "::mlir::Value": $mask, "::mlir::IntegerAttr": $vnni_axis, + CArg<"::mlir::DenseI64ArrayAttr", "::mlir::DenseI64ArrayAttr()">: $transpose, + CArg<"::imex::xegpu::CacheReadHintAttr", "::imex::xegpu::CacheReadHintAttr()">: $l1_hint, + CArg<"::imex::xegpu::CacheReadHintAttr", "::imex::xegpu::CacheReadHintAttr()">: $l2_hint, + CArg<"::imex::xegpu::CacheReadHintAttr", "::imex::xegpu::CacheReadHintAttr()">: $l3_hint), [{ + $_state.addOperands(TensorDesc); + $_state.addOperands(mask); + if (vnni_axis) { + $_state.getOrAddProperties().vnni_axis = vnni_axis; + } + if (transpose) { + $_state.getOrAddProperties().transpose = transpose; + } + if (l1_hint) { + $_state.getOrAddProperties().l1_hint = l1_hint; + } + if (l2_hint) { + $_state.getOrAddProperties().l2_hint = l2_hint; + } + if (l3_hint) { + $_state.getOrAddProperties().l3_hint = l3_hint; + } + $_state.getOrAddProperties().mode = ::imex::xegpu::ModeAttr::get($_builder.getContext(), imex::xegpu::Mode::VC); + $_state.addTypes(value); }]>, + + OpBuilder<(ins "::mlir::Type": $value, "::mlir::Value": $TensorDesc, "::mlir::Value": $mask, "::mlir::IntegerAttr": $vnni_axis, + CArg<"::mlir::DenseI64ArrayAttr", "::mlir::DenseI64ArrayAttr()">: $transpose, + CArg<"::imex::xegpu::CacheReadHint", "::imex::xegpu::CacheReadHint::CACHED">: $l1_hint, + CArg<"::imex::xegpu::CacheReadHint", "::imex::xegpu::CacheReadHint::CACHED">: $l2_hint, + CArg<"::imex::xegpu::CacheReadHint", "::imex::xegpu::CacheReadHint::CACHED">: $l3_hint), [{ + $_state.addOperands(TensorDesc); + $_state.addOperands(mask); + if (vnni_axis) { + $_state.getOrAddProperties().vnni_axis = vnni_axis; + } + if (transpose) { + $_state.getOrAddProperties().transpose = transpose; + } + + $_state.getOrAddProperties().l1_hint = ::imex::xegpu::CacheReadHintAttr::get($_builder.getContext(), l1_hint); + $_state.getOrAddProperties().l2_hint = ::imex::xegpu::CacheReadHintAttr::get($_builder.getContext(), l2_hint); + $_state.getOrAddProperties().l3_hint = ::imex::xegpu::CacheReadHintAttr::get($_builder.getContext(), l3_hint); + $_state.getOrAddProperties().mode = ::imex::xegpu::ModeAttr::get($_builder.getContext(), imex::xegpu::Mode::VC); + $_state.addTypes(value); }]> + + ]; + let skipDefaultBuilders = 1; + // In format of: %2 = xegpu.load %1, %0 {transpose = [1, 0], l1_hint = cached, l2_hint = uncached} // : !xegpu.tensor_desc<16x8xf32, #xegpu.scattered>, vector<16x8xi1> -> vector<8x16xf32> let hasCustomAssemblyFormat = 1; @@ -499,30 +581,47 @@ def XeGPU_StoreScatterOp : XeGPU_Op<"store", []> { DefaultValuedAttr: $mode ); + let builders = [ + OpBuilder<(ins "::mlir::Value": $value, "::mlir::Value": $TensorDesc, "::mlir::Value": $mask, + CArg<"::imex::xegpu::CacheWriteHintAttr", "::imex::xegpu::CacheWriteHintAttr()">: $l1_hint, + CArg<"::imex::xegpu::CacheWriteHintAttr", "::imex::xegpu::CacheWriteHintAttr()">: $l2_hint, + CArg<"::imex::xegpu::CacheWriteHintAttr", "::imex::xegpu::CacheWriteHintAttr()">: $l3_hint), [{ + $_state.addOperands(value); + $_state.addOperands(TensorDesc); + $_state.addOperands(mask); + if (l1_hint) { + $_state.getOrAddProperties().l1_hint = l1_hint; + } + if (l2_hint) { + $_state.getOrAddProperties().l2_hint = l2_hint; + } + if (l3_hint) { + $_state.getOrAddProperties().l3_hint = l3_hint; + } + $_state.getOrAddProperties().mode = ::imex::xegpu::ModeAttr::get($_builder.getContext(), imex::xegpu::Mode::VC); + }]>, + + OpBuilder<(ins "::mlir::Value": $value, "::mlir::Value": $TensorDesc, "::mlir::Value": $mask, + CArg<"::imex::xegpu::CacheWriteHint", "::imex::xegpu::CacheWriteHint::WRITE_BACK">: $l1_hint, + CArg<"::imex::xegpu::CacheWriteHint", "::imex::xegpu::CacheWriteHint::WRITE_BACK">: $l2_hint, + CArg<"::imex::xegpu::CacheWriteHint", "::imex::xegpu::CacheWriteHint::WRITE_BACK">: $l3_hint), [{ + $_state.addOperands(value); + $_state.addOperands(TensorDesc); + $_state.addOperands(mask); + $_state.getOrAddProperties().l1_hint = ::imex::xegpu::CacheWriteHintAttr::get($_builder.getContext(), l1_hint); + $_state.getOrAddProperties().l2_hint = ::imex::xegpu::CacheWriteHintAttr::get($_builder.getContext(), l2_hint);; + $_state.getOrAddProperties().l3_hint = ::imex::xegpu::CacheWriteHintAttr::get($_builder.getContext(), l3_hint);; + $_state.getOrAddProperties().mode = ::imex::xegpu::ModeAttr::get($_builder.getContext(), imex::xegpu::Mode::VC); + }]> + ]; + let skipDefaultBuilders = 1; + // Format: %3 = xegpu.load %1, %0 {l1_hint = cached, l2_hint = uncached} // : !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1> -> vector<16xf32> let hasCustomAssemblyFormat = 1; let hasVerifier = 1; } -def XeGPU_UpdateNDOffsetOp : XeGPU_Op<"update_nd_offset", []> { - let summary = "update the offsets for the given tensor descriptor"; - - let arguments = (ins - XeGPU_TensorDesc: $TensorDesc, - Variadic: $offsets, - DefaultValuedAttr: $mode); - - let results = (outs XeGPU_TensorDesc: $result); - - let assemblyFormat = [{ - $TensorDesc `,` (`[` $offsets^ `]`)? (`{` `mode` `=` $mode^ `}`)? - attr-dict `:` qualified(type($TensorDesc)) `->` qualified(type($result)) - }]; - - let hasVerifier = 1; -} - def XeGPU_UpdateOffsetOp : XeGPU_Op<"update_offset", []> { let summary = "update the offsets for the given tensor descriptor"; @@ -535,6 +634,17 @@ def XeGPU_UpdateOffsetOp let results = (outs XeGPU_TensorDesc: $result); + let builders = [ + OpBuilder<(ins "::mlir::Type": $result, "::mlir::Value": $TensorDesc, "::mlir::Value": $offsets), [{ + $_state.addOperands(TensorDesc); + $_state.addOperands(offsets); + $_state.getOrAddProperties().mode = ::imex::xegpu::ModeAttr::get($_builder.getContext(), imex::xegpu::Mode::VC); + $_state.addTypes(result); + }]> + ]; + + let skipDefaultBuilders = 1; + let assemblyFormat = [{ $TensorDesc `,` $offsets (`{` `mode` `=` $mode^ `}`)? attr-dict `:` qualified(type($TensorDesc)) `,` qualified(type($offsets)) `->` qualified(type($result)) @@ -543,6 +653,53 @@ def XeGPU_UpdateOffsetOp let hasVerifier = 1; } +def XeGPU_PrefetchOp : XeGPU_Op<"prefetch", []> { + let summary = "prefetches a nD block to cache"; + let arguments = (ins XeGPU_TensorDesc: $TensorDesc, + OptionalAttr: $l1_hint, + OptionalAttr: $l2_hint, + OptionalAttr: $l3_hint, + DefaultValuedAttr: $mode + ); + + let builders = [ + OpBuilder<(ins "::mlir::Value": $TensorDesc, + CArg<"::imex::xegpu::CacheReadHintAttr", "::imex::xegpu::CacheReadHintAttr()">: $l1_hint, + CArg<"::imex::xegpu::CacheReadHintAttr", "::imex::xegpu::CacheReadHintAttr()">: $l2_hint, + CArg<"::imex::xegpu::CacheReadHintAttr", "::imex::xegpu::CacheReadHintAttr()">: $l3_hint), [{ + $_state.addOperands(TensorDesc); + if (l1_hint) { + $_state.getOrAddProperties().l1_hint = l1_hint; + } + if (l2_hint) { + $_state.getOrAddProperties().l2_hint = l2_hint; + } + if (l3_hint) { + $_state.getOrAddProperties().l3_hint = l3_hint; + } + $_state.getOrAddProperties().mode = ::imex::xegpu::ModeAttr::get($_builder.getContext(), imex::xegpu::Mode::VC); + }]>, + + OpBuilder<(ins "::mlir::Value": $TensorDesc, + CArg<"::imex::xegpu::CacheReadHint", "::imex::xegpu::CacheReadHint::CACHED">: $l1_hint, + CArg<"::imex::xegpu::CacheReadHint", "::imex::xegpu::CacheReadHint::CACHED">: $l2_hint, + CArg<"::imex::xegpu::CacheReadHint", "::imex::xegpu::CacheReadHint::CACHED">: $l3_hint), [{ + $_state.addOperands(TensorDesc); + $_state.getOrAddProperties().l1_hint = ::imex::xegpu::CacheReadHintAttr::get($_builder.getContext(), l1_hint); + $_state.getOrAddProperties().l2_hint = ::imex::xegpu::CacheReadHintAttr::get($_builder.getContext(), l2_hint); + $_state.getOrAddProperties().l3_hint = ::imex::xegpu::CacheReadHintAttr::get($_builder.getContext(), l3_hint);; + $_state.getOrAddProperties().mode = ::imex::xegpu::ModeAttr::get($_builder.getContext(), imex::xegpu::Mode::VC); + }]> + ]; + + let skipDefaultBuilders = 1; + + // In format of: xegpu.prefetch %tdesc {l1_hint = cached, l2_hint = uncached}: + // !xegpu.tensor_desc<8x16xf16> + let hasCustomAssemblyFormat = 1; + let hasVerifier = 1; +} + def XeGPU_InvokeSIMDOp : XeGPU_Op<"invoke_SIMD", []> { let summary = "Invoke_SIMD operation"; let description = [{ @@ -588,6 +745,36 @@ def XeGPU_AtomicRMWOp: XeGPU_Op<"atomic_rmw", []> { let assemblyFormat = [{ $kind $tensorDesc `,` $mask (`,` $value^)? (`{` `mode` `=` $mode^ `}`)? attr-dict `:` qualified(type(operands)) `->` type($result) }]; + + let builders = [ + OpBuilder<(ins "::mlir::Type": $result, "::imex::xegpu::AtomicRMWKindAttr": $kind, + "::mlir::Value": $tensorDesc, "::mlir::Value": $mask, + "::mlir::Value": $value), [{ + $_state.addOperands(tensorDesc); + $_state.addOperands(mask); + if (value) + $_state.addOperands(value); + $_state.getOrAddProperties().kind = kind; + $_state.getOrAddProperties().mode = ::imex::xegpu::ModeAttr::get($_builder.getContext(), imex::xegpu::Mode::VC); + $_state.addTypes(result); + }]>, + + OpBuilder<(ins "::mlir::Type": $result, "::imex::xegpu::AtomicRMWKind": $kind, + "::mlir::Value": $tensorDesc, "::mlir::Value": $mask, + "::mlir::Value": $value), [{ + $_state.addOperands(tensorDesc); + $_state.addOperands(mask); + if (value) + $_state.addOperands(value); + $_state.getOrAddProperties().kind = ::imex::xegpu::AtomicRMWKindAttr::get($_builder.getContext(), kind); + $_state.getOrAddProperties().mode = ::imex::xegpu::ModeAttr::get($_builder.getContext(), imex::xegpu::Mode::VC); + $_state.addTypes(result); + }]> + ]; + + let skipDefaultBuilders = 1; + + let hasVerifier = 1; } diff --git a/include/imex/Dialect/XeGPU/IR/XeGPUTypes.td b/include/imex/Dialect/XeGPU/IR/XeGPUTypes.td index 6f6f3df59..c87238232 100644 --- a/include/imex/Dialect/XeGPU/IR/XeGPUTypes.td +++ b/include/imex/Dialect/XeGPU/IR/XeGPUTypes.td @@ -27,7 +27,8 @@ def XeGPU_FloatType: AnyTypeOf<[F16, F32, F64, BF16, TF32]>; def XeGPU_ScalarType: AnyTypeOf<[XeGPU_IntType, XeGPU_FloatType]>; def XeGPU_BaseAddrType: AnyTypeOf<[MemRefRankOf<[XeGPU_ScalarType], [1, 2]>, UI64, UI32, I64, I32]>; def XeGPU_DpasOpType: VectorOfRankAndType<[2, 3], [XeGPU_ScalarType]>; -def XeGPU_OffsetType: AnyTypeOf<[VectorOfRankAndType<[1], [Index]>, Index]>; +// def XeGPU_OffsetType: AnyTypeOf<[VectorOfRankAndType<[1], [Index]>, Index]>; +def XeGPU_OffsetType: VectorOfRankAndType<[1], [Index]>; def XeGPU_MaskType: AnyTypeOf<[VectorOfRankAndType<[1,2], [I1]>, I1]>; def XeGPU_ValueType: AnyTypeOf<[VectorOfRankAndType<[1,2,3], [XeGPU_ScalarType]>, XeGPU_ScalarType]>; @@ -70,16 +71,19 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", let parameters = (ins ArrayRefParameter<"int64_t">:$shape, "::mlir::Type":$elementType, DefaultValuedParameter<"::imex::xegpu::MemoryScope", "xegpu::MemoryScope::GLOBAL">: $memory_scope, - OptionalParameter<"::mlir::Attribute"> :$encoding); + OptionalParameter<"::mlir::Attribute">: $encoding, + OptionalParameter<"::mlir::Attribute">: $mapping + ); let builders = [ TypeBuilderWithInferredContext<(ins "::llvm::ArrayRef":$shape, "::mlir::Type":$elementType, CArg<"::imex::xegpu::MemoryScope", "xegpu::MemoryScope::GLOBAL">: $memory_scope, - CArg<"::mlir::Attribute", "{}">:$encoding + CArg<"::mlir::Attribute", "{}">:$encoding, + CArg<"::mlir::Attribute", "{}">:$mapping ), [{ - return $_get(elementType.getContext(), shape, elementType, memory_scope, encoding); + return $_get(elementType.getContext(), shape, elementType, memory_scope, encoding, mapping); }]> ]; @@ -99,7 +103,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", } }]; - let assemblyFormat = "`<` custom($shape, $elementType)``custom($memory_scope, $encoding)`>`"; + let assemblyFormat = "`<` custom($shape, $elementType)``custom($memory_scope, $encoding, $mapping)`>`"; } diff --git a/lib/Conversion/XeGPUToSPIRV/XeGPUToSPIRV.cpp b/lib/Conversion/XeGPUToSPIRV/XeGPUToSPIRV.cpp index b4e86fcc8..9b044065d 100644 --- a/lib/Conversion/XeGPUToSPIRV/XeGPUToSPIRV.cpp +++ b/lib/Conversion/XeGPUToSPIRV/XeGPUToSPIRV.cpp @@ -1474,11 +1474,11 @@ Value linearizeOffset(OpBuilder builder, Location loc, unsigned getElementPerWI(imex::xegpu::TensorDescType tDescType) { imex::xegpu::SubGroupMapAttr sgMap; - auto encoding = tDescType.getEncoding(); - if (auto xeMapAttr = llvm::dyn_cast(encoding)) { + auto mapping = tDescType.getMapping(); + if (auto xeMapAttr = llvm::dyn_cast(mapping)) { sgMap = xeMapAttr.getSg(); } else { - sgMap = llvm::dyn_cast(encoding); + sgMap = llvm::dyn_cast(mapping); } auto blockSize = tDescType.getShape(); auto wiLayout = sgMap.getWiLayout(); diff --git a/lib/Conversion/XeTileToXeGPU/XeTileOpConversion.cpp b/lib/Conversion/XeTileToXeGPU/XeTileOpConversion.cpp index 2fe695760..79e5ae9f1 100644 --- a/lib/Conversion/XeTileToXeGPU/XeTileOpConversion.cpp +++ b/lib/Conversion/XeTileToXeGPU/XeTileOpConversion.cpp @@ -40,7 +40,7 @@ class SgInitTileOpPattern llvm::SmallVector offsets; auto staticOffsets = op.getStaticOffsets(); auto dynamicOffsets = op.getOffsets(); - for (int i = 0, j = 0; i != staticOffsets.size(); i++) { + for (size_t i = 0, j = 0; i != staticOffsets.size(); i++) { if (mlir::ShapedType::isDynamic(staticOffsets[i])) { offsets.push_back(dynamicOffsets[j++]); } else { @@ -77,8 +77,6 @@ class SgInitTileOpPattern mlir::SmallVector tDescOffsets{tDescOffsetX, tDescOffsetY}; - constexpr int64_t kDynamic = std::numeric_limits::min(); - // TODO: this needs improvement, it assumes the source is static // memeref. auto createNdOp = rewriter.create( @@ -116,9 +114,6 @@ struct SgPrefetchTileOpPattern return mlir::failure(); } - auto elementTy = tileTy.getElementType(); - auto subVectorTy = mlir::VectorType::get({shape[2], shape[3]}, elementTy); - auto L1 = xegpu::CacheReadHintAttr::get(op.getContext(), xegpu::CacheReadHint::CACHED); auto L2 = xegpu::CacheReadHintAttr::get(op.getContext(), @@ -129,9 +124,8 @@ struct SgPrefetchTileOpPattern for (int i = 0; i < shape[0]; i++) { for (int j = 0; j < shape[1]; j++) { auto tile = tiles[i * shape[1] + j]; - rewriter.create( - op.getLoc(), subVectorTy, tile, mlir::IntegerAttr(), - mlir::DenseI64ArrayAttr(), L1, L2, L3, imex::xegpu::Mode::VC); + rewriter.create(op.getLoc(), tile, L1, L2, L3, + imex::xegpu::Mode::VC); } } @@ -175,11 +169,11 @@ struct SgLoadTileOpPattern mlir::IntegerAttr vnniAxisAttr; auto transposeAttr = op.getTransposeAttr(); auto L1 = xegpu::CacheReadHintAttr::get(op.getContext(), - xegpu::CacheReadHint::UNCACHED); + xegpu::CacheReadHint::CACHED); auto L2 = xegpu::CacheReadHintAttr::get(op.getContext(), - xegpu::CacheReadHint::UNCACHED); + xegpu::CacheReadHint::CACHED); auto L3 = xegpu::CacheReadHintAttr::get(op.getContext(), - xegpu::CacheReadHint::UNCACHED); + xegpu::CacheReadHint::CACHED); llvm::SmallVector newShape = {shape[2], shape[3]}; // needs vnni transform; @@ -235,11 +229,11 @@ struct SgStoreTileOpPattern auto context = op.getContext(); auto L1 = xegpu::CacheWriteHintAttr::get(context, - xegpu::CacheWriteHint::UNCACHED); + xegpu::CacheWriteHint::WRITE_BACK); auto L2 = xegpu::CacheWriteHintAttr::get(context, - xegpu::CacheWriteHint::UNCACHED); + xegpu::CacheWriteHint::WRITE_BACK); auto L3 = xegpu::CacheWriteHintAttr::get(context, - xegpu::CacheWriteHint::UNCACHED); + xegpu::CacheWriteHint::WRITE_BACK); for (size_t i = 0; i < tiles.size(); i++) rewriter.create(op.getLoc(), tiles[i], values[i], L1, L2, L3, imex::xegpu::Mode::VC); diff --git a/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index 4453f1607..43567fc66 100644 --- a/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -47,6 +47,13 @@ void XeGPUDialect::initialize() { >(); } +bool printDefaultValues() { + auto *env = getenv("IMEX_XEGPU_PRINT_DEFAULTS"); + if (env && std::string(env) == "true") + return true; + return false; +} + // custom parser for XeGPU_TensorDesc (shape and type parameter) static mlir::LogicalResult parseShapeAndType(mlir::AsmParser &parser, llvm::SmallVector &shape, @@ -77,9 +84,11 @@ static void printShapeAndType(mlir::AsmPrinter &printer, printer << type; } +// custom parser for XeGPU_TensorDesc (scope, encoding and mapping parameter) static mlir::LogicalResult parseTensorDescAttr(mlir::AsmParser &parser, imex::xegpu::MemoryScope &scope, - mlir::Attribute &encoding) { + mlir::Attribute &encoding, + mlir::Attribute &mapping) { // implies no attrbutes if (mlir::failed(parser.parseOptionalComma())) return mlir::success(); @@ -107,7 +116,14 @@ static mlir::LogicalResult parseTensorDescAttr(mlir::AsmParser &parser, return parser.emitError( loc, "Failed to parse XeGPU_TensorDesc parameter 'encoding' which " "is to be a `::mlir::Attribute`.\n"); - encoding = *attrOptional; + + if (llvm::isa(*attrOptional)) + encoding = *attrOptional; + + if (llvm::isa(*attrOptional) || + llvm::isa(*attrOptional) || + llvm::isa(*attrOptional)) + mapping = *attrOptional; return mlir::success(); } }; @@ -118,274 +134,79 @@ static mlir::LogicalResult parseTensorDescAttr(mlir::AsmParser &parser, return mlir::success(); } +// custom printer for XeGPU_TensorDesc (scope, encoding and mapping parameter) static void printTensorDescAttr(mlir::AsmPrinter &printer, imex::xegpu::MemoryScope scope, - mlir::Attribute encoding) { - if (scope != imex::xegpu::MemoryScope::GLOBAL) + mlir::Attribute encoding, + mlir::Attribute mapping) { + if (printDefaultValues() || scope != imex::xegpu::MemoryScope::GLOBAL) printer << ", memory_scope = " << scope; if (encoding) printer << ", " << encoding; -} - -template -static mlir::LogicalResult parseArrayList(mlir::AsmParser &parser, - llvm::SmallVector &array, - bool parsePrecedenceEqual = false) { - mlir::FailureOr> result; - // Parse literal '=' - if (parsePrecedenceEqual) - if (parser.parseEqual()) - return mlir::failure(); - - // Parse literal '[' - if (parser.parseLSquare()) - return mlir::failure(); - - result = mlir::FieldParser<::llvm::SmallVector>::parse(parser); - - if (::mlir::failed(result)) - return mlir::failure(); - - // Parse literal ']' - if (parser.parseRSquare()) - return mlir::failure(); - - array = result.value(); - return mlir::success(); -} - -template -static void printArrayElement(mlir::AsmPrinter &printer, - llvm::StringRef keyword, - llvm::ArrayRef array) { - printer << keyword; - printer << ' ' << "="; - printer << ' ' << "["; - printer.printStrippedAttrOrType(array); - printer << "]"; -} - -static mlir::LogicalResult -parseSubGroupMapAttrElements(mlir::AsmParser &parser, - llvm::SmallVector &layout, - llvm::SmallVector &data, - llvm::SmallVector &mmaBlockSize) { - auto parseElt = [&]() -> mlir::LogicalResult { - return mlir::AsmParser::KeywordSwitch(parser) - .Case("mma_block_size", - [&](llvm::StringRef, llvm::SMLoc) { - return parseArrayList(parser, mmaBlockSize, true); - }) - .Case("wi_layout", - [&](llvm::StringRef, llvm::SMLoc) { - return parseArrayList(parser, layout, true); - }) - .Case("wi_data", - [&](llvm::StringRef, llvm::SMLoc) { - return parseArrayList(parser, data, true); - }) - .Default([&](llvm::StringRef keyword, llvm::SMLoc) { - parser.emitError( - parser.getCurrentLocation(), - "SubGroupMapAttr Parser meet an unexpected keywoard: ") - << keyword << "\n"; - return mlir::failure(); - }); - }; - - if (parser.parseLBrace()) - return mlir::failure(); - if (parser.parseCommaSeparatedList(parseElt)) - return mlir::failure(); - if (parser.parseRBrace()) - return mlir::failure(); - - return mlir::success(); -} - -static void printSubGroupMapAttrElements( - mlir::AsmPrinter &printer, llvm::ArrayRef layout, - llvm::ArrayRef data, llvm::ArrayRef mmaBlockSize) { - printer << "{"; - if (mmaBlockSize.size()) { - printArrayElement(printer, "mma_block_size", mmaBlockSize); - printer << "," << ' '; - } - printArrayElement(printer, "wi_layout", layout); - printer << "," << ' '; - printArrayElement(printer, "wi_data", data); - printer << "}"; -} - -static mlir::LogicalResult -parseWorkGroupMapAttrElements(mlir::AsmParser &parser, - llvm::SmallVector &layout, - llvm::SmallVector &data) { - auto parseElt = [&]() -> mlir::LogicalResult { - return mlir::AsmParser::KeywordSwitch(parser) - .Case("sg_layout", - [&](llvm::StringRef, llvm::SMLoc) { - return parseArrayList(parser, layout, true); - }) - .Case("sg_data", - [&](llvm::StringRef, llvm::SMLoc) { - return parseArrayList(parser, data, true); - }) - .Default([&](llvm::StringRef keyword, llvm::SMLoc) { - parser.emitError( - parser.getCurrentLocation(), - "WorkGroupMapAttr Parser meet an unexpected keywoard: ") - << keyword << "\n"; - return mlir::failure(); - }); - }; - - if (parser.parseLBrace()) - return mlir::failure(); - if (parser.parseCommaSeparatedList(parseElt)) - return mlir::failure(); - if (parser.parseRBrace()) - return mlir::failure(); - return mlir::success(); -} - -static void printWorkGroupMapAttrElements(mlir::AsmPrinter &printer, - llvm::ArrayRef layout, - llvm::ArrayRef data) { - printer << "{"; - printArrayElement(printer, "sg_layout", layout); - printer << "," << ' '; - printArrayElement(printer, "sg_data", data); - printer << "}"; + if (mapping) + printer << ", " << mapping; } mlir::LogicalResult SubGroupMapAttr::verify( llvm::function_ref emitError, - llvm::ArrayRef layout, llvm::ArrayRef data, - llvm::ArrayRef mmaBlockSize) { - - if (mmaBlockSize.size() != 2 && mmaBlockSize.size() != 0) { - emitError() - << "Failed to parse SubGroupMapAttr: mma_block_size should be a " - "`llvm::ArrayRef` with size 2 or empty. But it got " - << mmaBlockSize.size() << ".\n"; - return mlir::failure(); - } + mlir::DenseI32ArrayAttr mmaBlockSize, mlir::DenseI32ArrayAttr layout, + mlir::DenseI32ArrayAttr data) { if (layout.size() != 2) { emitError() << "Failed to parse SubGroupMapAttr: missing wi_layout which " - "is to be a `llvm::ArrayRef` with size 2.\n"; + "is to be an integer array of size 2.\n"; return mlir::failure(); } if (data.size() != 2) { emitError() << "Failed to parse SubGroupMapAttr: missing wi_data which is " - "to be a `llvm::ArrayRef` with size 2.\n"; + "to be an integer array of size 2.\n"; return mlir::failure(); } + if (mmaBlockSize) { + if (mmaBlockSize.size() != 2) { + emitError() + << "Failed to parse SubGroupMapAttr: the optional mma_block_size " + "should be an integer array of size 2 or empty. But it got " + << mmaBlockSize.size() << ".\n"; + return mlir::failure(); + } + for (int i = 0; i < mmaBlockSize.size(); i++) { + if ((mmaBlockSize[i] % (layout[i] * data[i]) != 0 && + (layout[i] * data[i]) % mmaBlockSize[i] != 0) || + mmaBlockSize[i] % layout[i] != 0 || mmaBlockSize[i] % data[i] != 0) { + return emitError() + << "Invalid SubGroupMapAttr. A valid SubGroupMapAttr should " + "meet the following conditions: " + "\n\tmmaBlockSize[i] % wi_layout[i] == 0 && " + "\n\tmmaBlockSize[i] % wi_data[i] == 0 && " + "\n\t(mmaBlockSize[i] % (wi_layout[i] * wi_data[i]) == 0 || " + "\n\t (wi_layout[i] * wi_data[i]) % mmaBlockSize[i] == 0)"; + } + } + } + return mlir::success(); } mlir::LogicalResult WorkGroupMapAttr::verify( llvm::function_ref emitError, - llvm::ArrayRef layout, llvm::ArrayRef data) { + mlir::DenseI32ArrayAttr layout, mlir::DenseI32ArrayAttr data) { if (layout.size() != 2) { emitError() << "Failed to parse WorkGroupMapAttr: missing sg_layout which " - "is to be a `llvm::ArrayRef` with size 2.\n"; + "is to be a `llvm::ArrayRef` with size 2.\n"; return mlir::failure(); } if (data.size() != 2) { emitError() << "Failed to parse WorkGroupMapAttr: missing sg_data which is " - "to be a `llvm::ArrayRef` with size 2.\n"; + "to be a `llvm::ArrayRef` with size 2.\n"; return mlir::failure(); } return mlir::success(); } -mlir::Attribute XeMapAttr::parse(mlir::AsmParser &parser, mlir::Type type) { - imex::xegpu::WorkGroupMapAttr wg; - imex::xegpu::SubGroupMapAttr sg; - // Parse literal '<' - if (parser.parseLess()) - return {}; - - auto parseElt = [&]() -> mlir::ParseResult { - mlir::OptionalParseResult result = - mlir::AsmParser::KeywordSwitch(parser) - .Case("sg", - [&](llvm::StringRef, llvm::SMLoc) { - if (parser.parseEqual()) - return mlir::failure(); - llvm::SmallVector mmaBlockSize; - llvm::SmallVector wiLayout; - llvm::SmallVector wiData; - if (mlir::failed(parseSubGroupMapAttrElements( - parser, wiLayout, wiData, mmaBlockSize))) - return mlir::failure(); - sg = imex::xegpu::SubGroupMapAttr::get( - parser.getContext(), wiLayout, wiData, mmaBlockSize); - return mlir::success(!!sg); - }) - .Case("wg", - [&](llvm::StringRef, llvm::SMLoc) { - if (parser.parseEqual()) - return mlir::failure(); - llvm::SmallVector sgLayout; - llvm::SmallVector sgData; - if (mlir::failed(parseWorkGroupMapAttrElements( - parser, sgLayout, sgData))) - return mlir::failure(); - wg = imex::xegpu::WorkGroupMapAttr::get(parser.getContext(), - sgLayout, sgData); - return mlir::success(!!wg); - }) - .Default([&](llvm::StringRef keyword, llvm::SMLoc) { - return std::nullopt; - }); - return result.value(); - }; - - // Parse wg and sg attrs - if (parser.parseCommaSeparatedList(parseElt)) - return {}; - - // Parse literal '>' - if (parser.parseGreater()) - return {}; - - if (!wg && !sg) { - parser.emitError(parser.getCurrentLocation(), - "Expecting at least one of sg and wg attributes.\n"); - return {}; - } - - return XeMapAttr::get(parser.getContext(), wg, sg); -} - -void XeMapAttr::print(mlir::AsmPrinter &printer) const { - bool printSep = false; - printer << "<"; - if (getWg()) { - printer << "wg = "; - printWorkGroupMapAttrElements(printer, getWg().getSgLayout(), - getWg().getSgData()); - printSep = true; - } - - if (getSg()) { - if (printSep) - printer << ", "; - printer << "sg = "; - printSubGroupMapAttrElements(printer, getSg().getWiLayout(), - getSg().getWiData(), - getSg().getMmaBlockSize()); - } - - printer << ">"; -} - } // namespace xegpu } // namespace imex diff --git a/lib/Dialect/XeGPU/IR/XeGPUOps.cpp b/lib/Dialect/XeGPU/IR/XeGPUOps.cpp index 225aae701..6756f3bc9 100644 --- a/lib/Dialect/XeGPU/IR/XeGPUOps.cpp +++ b/lib/Dialect/XeGPU/IR/XeGPUOps.cpp @@ -70,12 +70,6 @@ static void transpose(llvm::ArrayRef trans, shape[i] = old[trans[i]]; }; -static bool isMappingAttr(mlir::Attribute attr) { - return attr && (llvm::isa(attr) || - llvm::isa(attr) || - llvm::isa(attr)); -} - bool dpasSupportedTypes(mlir::Type type, bool isResult) { if (isResult) { if (type.isF32() || type.isInteger(32)) @@ -114,10 +108,12 @@ bool dpasSupportedTypes(mlir::Type type, bool isResult) { // return false; // } +extern bool printDefaultValues(); + template -static mlir::ParseResult parseCustomEnumAttr(mlir::OpAsmParser &parser, - mlir::OperationState &result, - llvm::StringRef attrKeyword) { +static ::mlir::ParseResult parseCustomEnumAttr(mlir::OpAsmParser &parser, + mlir::OperationState &result, + llvm::StringRef attrKeyword) { auto loc = parser.getCurrentLocation(); auto attrOptional = mlir::FieldParser::parse(parser); if (mlir::failed(attrOptional)) @@ -129,9 +125,9 @@ static mlir::ParseResult parseCustomEnumAttr(mlir::OpAsmParser &parser, } template -static mlir::ParseResult parseBoolAndIntegerAttr(mlir::OpAsmParser &parser, - mlir::OperationState &result, - llvm::StringRef attrKeyword) { +static ::mlir::ParseResult +parseBoolAndIntegerAttr(mlir::OpAsmParser &parser, mlir::OperationState &result, + llvm::StringRef attrKeyword) { AttrType attr; mlir::Type ty; @@ -160,7 +156,7 @@ static mlir::ParseResult parseBoolAndIntegerAttr(mlir::OpAsmParser &parser, /// @param result /// @param allowedKeywords /// @return -static mlir::ParseResult +static ::mlir::ParseResult parseOptionalAttrDict(mlir::OpAsmParser &parser, mlir::OperationState &result, llvm::ArrayRef allowedKeywords, bool isWrite = false) { @@ -234,8 +230,55 @@ static void printCacheHintAttrs(mlir::OpAsmPrinter &printer, T op, } } -mlir::ParseResult CreateNdDescOp::parse(mlir::OpAsmParser &parser, - mlir::OperationState &result) { +static bool verifyAndInferShape(std::vector &shape, + imex::xegpu::WorkGroupMapAttr wgMap, + imex::xegpu::SubGroupMapAttr sgMap) { + if (wgMap) { + auto sgData = wgMap.getSgData(); + auto sgLayout = wgMap.getSgLayout(); + + if (shape.size() != sgData.size() || shape.size() != sgLayout.size()) + return false; + + for (size_t i = 0; i < shape.size(); i++) { + if (shape[i] % sgLayout[i] != 0 || shape[i] % sgData[i] != 0 || + (shape[i] % (sgLayout[i] * sgData[i]) != 0 && + (sgLayout[i] * sgData[i]) % shape[i] != 0)) + return false; + shape[i] /= sgLayout[i]; + } + } + + if (sgMap) { + auto blockSize = sgMap.getMmaBlockSize(); + auto wiLayout = sgMap.getWiLayout(); + auto wiData = sgMap.getWiData(); + + if (blockSize && shape.size() != blockSize.size()) { + return false; + } + + if (shape.size() != wiData.size() || shape.size() != wiLayout.size()) { + return false; + } + + for (size_t i = 0; i < shape.size(); i++) { + + if ((shape[i] % (wiLayout[i] * wiData[i]) != 0 && + (wiLayout[i] * wiData[i]) % shape[i] != 0) || + (blockSize && shape[i] % blockSize[i] != 0) || + shape[i] % wiLayout[i] != 0 || shape[i] % wiData[i] != 0) { + return false; + } + shape[i] /= wiLayout[i]; + } + } + + return true; +} + +::mlir::ParseResult CreateNdDescOp::parse(mlir::OpAsmParser &parser, + mlir::OperationState &result) { // parse the source operand mlir::OpAsmParser::UnresolvedOperand sourceRawOperands[1]; @@ -325,6 +368,11 @@ mlir::ParseResult CreateNdDescOp::parse(mlir::OpAsmParser &parser, } void CreateNdDescOp::print(::mlir::OpAsmPrinter &printer) { + auto mode = getMode(); + bool printSep = false; + auto check = getBoundaryCheck(); + auto printDefaults = printDefaultValues(); + printer << ' '; printer << getSource(); printDynamicIndexList(printer, *this, getOffsets(), getStaticOffsetsAttr()); @@ -342,11 +390,24 @@ void CreateNdDescOp::print(::mlir::OpAsmPrinter &printer) { printer << "]"; } - printer << ' ' << "{"; - printer << "mode = " << getMode(); - printer << "," << ' '; - printer << "boundary_check = " << getBoundaryCheck(); - printer << "}"; + if (printDefaults || mode != imex::xegpu::Mode::SIMT || !check) { + printer << ' ' << "{"; + } + + if (printDefaults || mode != imex::xegpu::Mode::SIMT) { + printer << "mode = " << mode; + printSep = true; + } + + if (printDefaults || !check) { + if (printSep) + printer << "," << ' '; + printer << "boundary_check = " << check; + } + + if (printDefaults || mode != imex::xegpu::Mode::SIMT || !check) { + printer << "}"; + } printer << ' ' << ":"; printer << ' '; @@ -356,11 +417,22 @@ void CreateNdDescOp::print(::mlir::OpAsmPrinter &printer) { printer << getTensorDesc().getType(); } -mlir::LogicalResult CreateNdDescOp::verify() { +::mlir::LogicalResult CreateNdDescOp::verify() { auto mode = getMode(); auto encoding = getTensorDesc().getType().getEncoding(); + auto mapping = getTensorDesc().getType().getMapping(); - if (mode == imex::xegpu::Mode::SIMT && !isMappingAttr(encoding)) { + if (encoding) { + return emitOpError("Encoding Attribute of TensorDesc is not expected for " + "non-scattered operators.\n"); + } + + if (mode == imex::xegpu::Mode::VC && mapping) { + return emitOpError("Mapping attribute of TensorDesc is not expected " + "for VC mode operations.\n"); + } + + if (mode == imex::xegpu::Mode::SIMT && !mapping) { return emitOpError("Expecting either SgMap, WgMap or XeMap attribute for " "SIMT mode operators.\n"); } @@ -379,8 +451,8 @@ mlir::LogicalResult CreateNdDescOp::verify() { return mlir::success(); } -mlir::ParseResult CreateDescOp::parse(mlir::OpAsmParser &parser, - mlir::OperationState &result) { +::mlir::ParseResult CreateDescOp::parse(mlir::OpAsmParser &parser, + mlir::OperationState &result) { mlir::OpAsmParser::UnresolvedOperand sourceRawOperands[1]; llvm::ArrayRef sourceOperands( sourceRawOperands); @@ -434,17 +506,35 @@ mlir::ParseResult CreateDescOp::parse(mlir::OpAsmParser &parser, } void CreateDescOp::print(::mlir::OpAsmPrinter &printer) { + auto mode = getMode(); + bool printSep = false; + auto chunk = getChunkSizePerLane(); + auto printDefaults = printDefaultValues(); + printer << ' '; printer << getSource(); printer << ","; printer << ' '; printer << getOffsets(); - printer << ' ' << "{"; - printer << "mode = " << getMode(); - printer << "," << ' '; - printer << "chunk_size_per_lane = " << getChunkSizePerLane(); - printer << "}"; + if (printDefaults || mode != imex::xegpu::Mode::SIMT || chunk != 1) { + printer << ' ' << "{"; + } + + if (printDefaults || mode != imex::xegpu::Mode::SIMT) { + printer << "mode = " << mode; + printSep = true; + } + + if (printDefaults || chunk != 1) { + if (printSep) + printer << "," << ' '; + printer << "chunk_size_per_lane = " << chunk; + } + + if (printDefaults || mode != imex::xegpu::Mode::SIMT || chunk != 1) { + printer << "}"; + } printer << ' ' << ":"; printer << ' '; @@ -457,45 +547,49 @@ void CreateDescOp::print(::mlir::OpAsmPrinter &printer) { printer << getTensorDesc().getType(); } -mlir::LogicalResult CreateDescOp::verify() { - if (getRankOf(getSource()) > 2) - return emitOpError( - "Expecting the source is a 2D/1D memref or pointer (uint64_t)."); - - std::vector shape; - +::mlir::LogicalResult CreateDescOp::verify() { + auto mode = getMode(); + auto mapping = getTensorDesc().getType().getMapping(); auto offsetTy = getOffsets().getType(); auto tdescTy = getTensorDesc().getType(); auto chunkSize = getChunkSizePerLane(); - auto tdescShape = tdescTy.getShape(); + if (mode == imex::xegpu::Mode::SIMT || mapping) { + return emitOpError("CreateDescOp only support VC mode and mapping " + "attribute of TensorDesc is not expected.\n"); + } + + if (getRankOf(getSource()) > 2) + return emitOpError( + "Expecting the source is a 1D/2D memref or pointer (uint64_t)."); + if (!tdescTy.getEncoding()) + return emitOpError( + "Expecting the presence of scattered attribute for tensor descriptor."); + + // Infer the TensorDesc shape + std::vector shape; if (llvm::isa(offsetTy)) { shape = llvm::dyn_cast(offsetTy).getShape().vec(); - if (shape.size() > 2) - return emitOpError( - "Expecting the offset is either a 2D/1D vector (for VC) " - "or scalar (for SIMT)."); + if (shape.size() != 1) + return emitOpError("Expecting the offset is a 1D vector."); } - if (offsetTy.isIndex() || chunkSize != 1) { + if (chunkSize != 1) { shape.push_back(chunkSize); } + auto tdescShape = tdescTy.getShape(); if (shape != tdescShape.vec()) { return emitOpError("Expecting dimensions of offsets is the same as the " "tensor descriptor, or one less than."); } - if (!tdescTy.getEncoding()) - return emitOpError( - "Expecting the presence of scattered attribute for tensor descriptor."); - return mlir::success(); } -mlir::ParseResult LoadNDOp::parse(::mlir::OpAsmParser &parser, - ::mlir::OperationState &result) { +::mlir::ParseResult LoadNDOp::parse(::mlir::OpAsmParser &parser, + ::mlir::OperationState &result) { mlir::OpAsmParser::UnresolvedOperand TensorDescRawOperands[1]; llvm::ArrayRef<::mlir::OpAsmParser::UnresolvedOperand> TensorDescOperands( TensorDescRawOperands); @@ -533,27 +627,44 @@ mlir::ParseResult LoadNDOp::parse(::mlir::OpAsmParser &parser, } void LoadNDOp::print(::mlir::OpAsmPrinter &printer) { + auto mode = getMode(); + bool printSep = false; + auto printDefaults = printDefaultValues(); + auto numAttrs = (*this)->getAttrs().size(); + printer << ' '; printer << getTensorDesc(); - if ((*this)->getAttrs().size()) { + if (printDefaults || mode != imex::xegpu::Mode::SIMT || numAttrs > 1) { printer << ' ' << "{"; - printer << "mode = " << getMode(); - if (getVnniAxisAttr()) { + } + + if (printDefaults || mode != imex::xegpu::Mode::SIMT) { + printer << "mode = " << mode; + printSep = true; + } + + if (getVnniAxisAttr()) { + if (printSep) printer << "," << ' '; - printer << "vnni_axis = " << getVnniAxis().value(); - } + printer << "vnni_axis = " << getVnniAxis().value(); + printSep = true; + } - if (getTransposeAttr()) { + if (getTransposeAttr()) { + if (printSep) printer << "," << ' '; - printer << "transpose = "; - getTransposeAttr().print(printer); - } + printer << "transpose = "; + getTransposeAttr().print(printer); + printSep = true; + } - printCacheHintAttrs(printer, *this, true); + printCacheHintAttrs(printer, *this, printSep); + if (printDefaults || mode != imex::xegpu::Mode::SIMT || numAttrs > 1) { printer << "}"; } + printer << ' ' << ":"; printer << ' '; printer << getTensorDesc().getType(); @@ -562,15 +673,7 @@ void LoadNDOp::print(::mlir::OpAsmPrinter &printer) { printer << getValue().getType(); } -// mlir::LogicalResult CreateNbarrierOp::verify() { -// llvm::dbgs() << "\nOp: " << getValueAsString(*this) -// << "\n\tnum producers: " << getNumProducers() -// << "\n\tnum consumers: " << getNumConsumers() -// << "\n\n"; -// return mlir::success(); -// } - -mlir::LogicalResult LoadNDOp::verify() { +::mlir::LogicalResult LoadNDOp::verify() { auto tdescTy = getTensorDesc().getType(); auto valueTy = llvm::dyn_cast(getValue().getType()); @@ -588,9 +691,9 @@ mlir::LogicalResult LoadNDOp::verify() { return emitOpError( "Value should have the same element type as TensorDesc."); - if (tdescTy.getRank() == 2) { // TODO: The following logic are architecture - // dependent, pending to be moved - // out + if (tdescTy.getRank() == 2) { + // TODO: The following logic are architecture + // dependent, pending to be moved out auto width = tdescTy.getShape()[1]; auto height = tdescTy.getShape()[0]; auto elemTyByteWidth = tdescElemTy.getIntOrFloatBitWidth() / 8; @@ -598,10 +701,11 @@ mlir::LogicalResult LoadNDOp::verify() { if (width < MIN_2D_BLOCK_WIDTH_IN_ELEMENTS || width > MAX_2D_BLOCK_WIDTH_IN_ELEMENTS || (width * elemTyByteWidth) % 4 != 0) { - return emitOpError("Invalid width size for 2D block load. \ - The specification expects the value to \ - be in range [1, 64], and The the total \ - data size (width * elemTyBytes) to be multiple of 4.\n"); + return emitOpError( + "Invalid width size for 2D block load. " + "The specification expects the value to " + "be in range [1, 64], and The the total " + "data size (width * elemTyBytes) to be multiple of 4.\n"); } if (height < MIN_2D_BLOCK_HEIGHT_IN_ELEMENTS || @@ -620,63 +724,36 @@ mlir::LogicalResult LoadNDOp::verify() { imex::xegpu::WorkGroupMapAttr wgMap; imex::xegpu::SubGroupMapAttr sgMap; - auto encoding = tdescTy.getEncoding(); - if (!isMappingAttr(encoding)) { + auto mapping = tdescTy.getMapping(); + if (!mapping) { return emitOpError("Expecting either SgMap, WgMap or XeMap attribute for " "SIMT mode operators.\n"); } - if (auto xeMapAttr = llvm::dyn_cast(encoding)) { + if (auto xeMapAttr = llvm::dyn_cast(mapping)) { wgMap = xeMapAttr.getWg(); sgMap = xeMapAttr.getSg(); } else { - wgMap = llvm::dyn_cast(encoding); - sgMap = llvm::dyn_cast(encoding); + wgMap = llvm::dyn_cast(mapping); + sgMap = llvm::dyn_cast(mapping); } - if (wgMap) { - auto sgData = wgMap.getSgData(); - auto sgLayout = wgMap.getSgLayout(); - for (size_t i = 0; i < sgData.size(); i++) { - if (tdescShape[i] % sgLayout[i] != 0 || - tdescShape[i] % sgData[i] != 0 || - tdescShape[i] % (sgLayout[i] * sgData[i]) != 0) - return emitOpError("Invalid WorkGroupMapAttr. It should meet the " - "following conditions: " - "tdescShape[i] % sgLayout[i] == 0 && " - "tdescShape[i] % sgData[i] == 0 && " - "tdescShape[i] % (sgLayout[i] *sgData[i]) == 0"); - tdescShape[i] /= sgLayout[i]; - } - } - - if (sgMap) { - auto blockSize = sgMap.getMmaBlockSize(); - auto wiLayout = sgMap.getWiLayout(); - auto wiData = sgMap.getWiData(); - for (size_t i = 0; i < blockSize.size(); i++) { - if (tdescShape[i] % blockSize[i] != 0 || - blockSize[i] % wiLayout[i] != 0 || blockSize[i] % wiData[i] != 0 || - blockSize[i] % (wiLayout[i] * wiData[i]) != 0) { - return emitOpError("Invalid SubGroupMapAttr. It should meet the " - "following conditions: " - "tdescShape[i] % blockSize[i] == 0 && " - "blockSize[i] % wiLayout[i] == 0 && " - "blockSize[i] % wiData[i] == 0 && " - "blockSize[i] % (wiLayout[i] * wiData[i]) == 0 "); - } - } - - for (size_t i = 0; i < wiLayout.size(); i++) { - if (tdescShape[i] % wiData[i] != 0 || - tdescShape[i] % (wiLayout[i] * wiData[i]) != 0) { - return emitOpError("Invalid SubGroupMapAttr. It should meet the " - "following conditions: " - "tdescShape[i] % wiData[i] == 0 && " - "tdescShape[i] % (wiLayout[i] * wiData[i]) == 0 "); - } - tdescShape[i] /= wiLayout[i]; - } + if (!verifyAndInferShape(tdescShape, wgMap, sgMap)) { + return emitOpError("Failed to infer the shape.") + << "\nItshould meet the following conditions for " + "WorkGroupMapAttr: " + << "\n\t tdescShape[i] % sg_layout[i] == 0 && " + << "\n\t tdescShape[i] % sg_data[i] == 0 && " + << "\n\t (tdescShape[i] % (sg_layout[i] * sg_data[i]) == 0 ||" + << "\n\t (sg_layout[i] * sg_data[i]) % tdescShape[i] == 0)" + << "\n\nAnd after performing shape[i] /= sg_layout[i]. " + << "The new shape[i] should meet the following condistions " + "for SubGroupMapAttr: " + << "\n\ttdescShape[i] % mma_block_size[i] == 0 (if it has) && " + << "\n\ttdescShape[i] % wi_layout[i] == 0 && " + << "\n\ttdescShape[i] % wi_data[i] == 0 && " + << "\n\t(tdescShape[i] % (wi_layout[i] * wi_data[i]) == 0 || " + << "\n\t (wi_layout[i] * wi_data[i]) % tdescShape[i] == 0).\n"; } } @@ -696,25 +773,18 @@ mlir::LogicalResult LoadNDOp::verify() { } if (tdescShape != valueShape) - return emitOpError( - "Result shape doesn't match TensorDesc shape." - "The expected shape is " + - makeString(tdescShape) + - ", while " - "the given shape is " + - makeString(valueShape) + - ". " - "In VC mode, when VNNI is not enabled, the result should have the same " - "shape (or transposed shape if transpose is also enabled) as " - "TensorDesc; " - "when VNNI is enabled, the result should have one more dimention than " - "the " - "TensorDesc, with last dimention having vnni factor, but having same " - "number " - "of total data elements. The vnni factor are typically calculated as " - "simd_lane_width / elementTypeBitWidth. " - "For element type having more than 32 bits, vnni shouldn't be used. " - "In SIMT mode, the shape is derived from the mapping attributes.\n"); + return emitOpError("Result shape doesn't match TensorDesc shape.") + << "\nThe expected shape is " << makeString(tdescShape) << "." + << "\nBut the given shape is " << makeString(valueShape) << "." + << "\nIn VC mode, when VNNI is not enabled, the result should have " + << "the same shape (or transposed shape if transpose is enabled) " + << "as TensorDesc; \nwhen VNNI is enabled, the result should have " + << "one more dimention than the TensorDesc, with last dimention " + << "having vnni factor, \nbut having same number of total data " + << "elements. The vnni factor are typically calculated as " + << "simd_lane_width / elementTypeBitWidth. \nFor element type " + << "having more than 32 bits, vnni shouldn't be used. \nIn SIMT " + << "mode, the shape is derived from the mapping attributes.\n"; return mlir::success(); } @@ -769,17 +839,32 @@ ::mlir::ParseResult StoreNDOp::parse(::mlir::OpAsmParser &parser, } void StoreNDOp::print(::mlir::OpAsmPrinter &printer) { + auto mode = getMode(); + bool printSep = false; + auto printDefaults = printDefaultValues(); + auto numAttrs = (*this)->getAttrs().size(); + printer << ' '; printer << getValue(); printer << ","; printer << ' '; printer << getTensorDesc(); - if ((*this)->getAttrs().size()) { + + if (printDefaults || mode != imex::xegpu::Mode::SIMT || numAttrs > 1) { printer << ' ' << "{"; + } + + if (printDefaults || mode != imex::xegpu::Mode::SIMT) { printer << "mode = " << getMode(); - printCacheHintAttrs(printer, *this, true); + printSep = true; + } + + printCacheHintAttrs(printer, *this, true); + + if (printDefaults || mode != imex::xegpu::Mode::SIMT || numAttrs > 1) { printer << "}"; } + printer << ' ' << ":"; printer << ' '; printer << getValue().getType(); @@ -788,7 +873,7 @@ void StoreNDOp::print(::mlir::OpAsmPrinter &printer) { printer << getTensorDesc().getType(); } -mlir::LogicalResult StoreNDOp::verify() { +::mlir::LogicalResult StoreNDOp::verify() { auto dstTy = getTensorDesc().getType(); // Tile auto valTy = llvm::dyn_cast(getValue().getType()); // Vector @@ -816,10 +901,11 @@ mlir::LogicalResult StoreNDOp::verify() { if (width < MIN_2D_BLOCK_WIDTH_IN_ELEMENTS || width > MAX_2D_BLOCK_WIDTH_IN_ELEMENTS || (width * elemTyByteWidth) % 4 != 0) { - return emitOpError("Invalid width size for 2D block write. \ - The specification expects the value to \ - be in range [1, 64], and The the total \ - data size (width * elemTyBytes) to be multiple of 4.\n"); + return emitOpError( + "Invalid width size for 2D block write. " + "The specification expects the value to " + "be in range [1, 64], and The the total " + "data size (width * elemTyBytes) to be multiple of 4.\n"); } if (height < MIN_2D_BLOCK_HEIGHT_IN_ELEMENTS || @@ -837,8 +923,8 @@ mlir::LogicalResult StoreNDOp::verify() { return emitOpError("In VC mode, the value (vector) shape doesn't match " "the memory (dst) shape.\n"); } else { - auto encoding = dstTy.getEncoding(); - if (!isMappingAttr(encoding)) { + auto mapping = dstTy.getMapping(); + if (!mapping) { return emitOpError("Expecting either SgMap, WgMap or XeMap attribute for " "SIMT mode operators.\n"); } @@ -847,56 +933,30 @@ mlir::LogicalResult StoreNDOp::verify() { imex::xegpu::SubGroupMapAttr sgMap; std::vector shape = dstTy.getShape().vec(); - if (auto xeMapAttr = llvm::dyn_cast(encoding)) { + if (auto xeMapAttr = llvm::dyn_cast(mapping)) { wgMap = xeMapAttr.getWg(); sgMap = xeMapAttr.getSg(); } else { - wgMap = llvm::dyn_cast(encoding); - sgMap = llvm::dyn_cast(encoding); - } - - if (wgMap) { - auto sgData = wgMap.getSgData(); - auto sgLayout = wgMap.getSgLayout(); - for (size_t i = 0; i < sgData.size(); i++) { - if (shape[i] % sgLayout[i] != 0 || shape[i] % sgData[i] != 0 || - shape[i] % (sgLayout[i] * sgData[i]) != 0) - return emitOpError("Invalid WorkGroupMapAttr. It should meet the " - "following conditions: " - "tdescShape[i] % sgLayout[i] == 0 && " - "tdescShape[i] % sgData[i] == 0 && " - "tdescShape[i] % (sgLayout[i] *sgData[i]) == 0"); - shape[i] /= sgLayout[i]; - } + wgMap = llvm::dyn_cast(mapping); + sgMap = llvm::dyn_cast(mapping); } - if (sgMap) { - auto blockSize = sgMap.getMmaBlockSize(); - auto wiLayout = sgMap.getWiLayout(); - auto wiData = sgMap.getWiData(); - for (size_t i = 0; i < shape.size(); i++) { - if (blockSize[i] % (wiLayout[i] * wiData[i]) != 0 || - blockSize[i] % wiLayout[i] != 0 || blockSize[i] % wiData[i] != 0 || - shape[i] % blockSize[i] != 0) { - return emitOpError("Invalid SubGroupMapAttr. It should meet the " - "following conditions: " - "tdescShape[i] % blockSize[i] == 0 && " - "blockSize[i] % wiLayout[i] == 0 && " - "blockSize[i] % wiData[i] == 0 && " - "blockSize[i] % (wiLayout[i] * wiData[i]) == 0 "); - } - } - - for (size_t i = 0; i < wiLayout.size(); i++) { - if (shape[i] % wiData[i] != 0 || - shape[i] % (wiLayout[i] * wiData[i]) != 0) { - return emitOpError("Invalid SubGroupMapAttr. It should meet the " - "following conditions: " - "tdescShape[i] % wiData[i] == 0 && " - "tdescShape[i] % (wiLayout[i] * wiData[i]) == 0 "); - } - shape[i] /= wiLayout[i]; - } + if (!verifyAndInferShape(shape, wgMap, sgMap)) { + return emitOpError("Failed to infer the shape.") + << "\nItshould meet the following conditions for " + "WorkGroupMapAttr: " + << "\n\t tdescShape[i] % sg_layout[i] == 0 && " + << "\n\t tdescShape[i] % sg_data[i] == 0 && " + << "\n\t (tdescShape[i] % (sg_layout[i] * sg_data[i]) == 0 ||" + << "\n\t (sg_layout[i] * sg_data[i]) % tdescShape[i] == 0)" + << "\n\nAnd after performing shape[i] /= sg_layout[i]. " + << "The new shape[i] should meet the following condistions " + "for SubGroupMapAttr: " + << "\n\ttdescShape[i] % mma_block_size[i] == 0 (if it has) && " + << "\n\ttdescShape[i] % wi_layout[i] == 0 && " + << "\n\ttdescShape[i] % wi_data[i] == 0 && " + << "\n\t(tdescShape[i] % (wi_layout[i] * wi_data[i]) == 0 || " + << "\n\t (wi_layout[i] * wi_data[i]) % tdescShape[i] == 0).\n"; } if (shape != valTy.getShape().vec()) @@ -936,13 +996,25 @@ ::mlir::ParseResult PrefetchNDOp::parse(::mlir::OpAsmParser &parser, } void PrefetchNDOp::print(::mlir::OpAsmPrinter &printer) { + auto mode = getMode(); + bool printSep = false; + auto printDefaults = printDefaultValues(); + auto numAttrs = (*this)->getAttrs().size(); printer << ' '; printer << getTensorDesc(); - // printer.printOptionalAttrDict((*this)->getAttrs()); - if ((*this)->getAttrs().size()) { + + if (printDefaults || mode != imex::xegpu::Mode::SIMT || numAttrs > 1) { printer << ' ' << "{"; + } + + if (printDefaults || mode != imex::xegpu::Mode::SIMT) { printer << "mode = " << getMode(); - printCacheHintAttrs(printer, *this, true); + printSep = true; + } + + printCacheHintAttrs(printer, *this, true); + + if (printDefaults || mode != imex::xegpu::Mode::SIMT || numAttrs > 1) { printer << "}"; } @@ -951,7 +1023,7 @@ void PrefetchNDOp::print(::mlir::OpAsmPrinter &printer) { printer << getTensorDesc().getType(); } -mlir::LogicalResult DpasOp::verify() { +::mlir::LogicalResult DpasOp::verify() { int64_t lhsRank = getLhsType().getRank(); int64_t rhsRank = getRhsType().getRank(); @@ -1059,25 +1131,44 @@ ::mlir::ParseResult LoadGatherOp::parse(::mlir::OpAsmParser &parser, } void LoadGatherOp::print(mlir::OpAsmPrinter &printer) { + auto mode = getMode(); + bool printSep = false; + auto printDefaults = printDefaultValues(); + auto numAttrs = (*this)->getAttrs().size(); + printer << ' '; printer << getTensorDesc(); printer << ","; printer << ' '; printer << getMask(); - if ((*this)->getAttrs().size()) { + + if (printDefaults || mode != imex::xegpu::Mode::SIMT || numAttrs > 1) { printer << ' ' << "{"; + } + if (printDefaults || mode != imex::xegpu::Mode::SIMT) { printer << "mode = " << getMode(); - if (getVnniAxisAttr()) - printer << ", vnni_axis = " << getVnniAxis().value(); + printSep = true; + } - if (getTransposeAttr()) { - printer << ", transpose = "; - getTransposeAttr().print(printer); - } + if (getVnniAxisAttr()) { + if (printSep) + printer << "," << ' '; + printer << "vnni_axis = " << getVnniAxis().value(); + printSep = true; + } - printCacheHintAttrs(printer, *this, true); + if (getTransposeAttr()) { + if (printSep) + printer << "," << ' '; + printer << "transpose = "; + getTransposeAttr().print(printer); + printSep = true; + } + printCacheHintAttrs(printer, *this, printSep); + + if (printDefaults || mode != imex::xegpu::Mode::SIMT || numAttrs > 1) { printer << "}"; } @@ -1092,11 +1183,16 @@ void LoadGatherOp::print(mlir::OpAsmPrinter &printer) { printer << getValue().getType(); } -mlir::LogicalResult LoadGatherOp::verify() { +::mlir::LogicalResult LoadGatherOp::verify() { auto tdescTy = getTensorDesc().getType(); auto maskTy = getMask().getType(); auto valueTy = getValue().getType(); + auto encoding = tdescTy.getEncoding(); + if (!encoding || !llvm::isa(encoding)) + return emitOpError( + "LoadGatherOp only works on TensorDesc with ScatteredAttr."); + auto getElementType = [&](mlir::Type type) -> mlir::Type { if (type.isIntOrIndexOrFloat()) return type; @@ -1131,6 +1227,13 @@ mlir::LogicalResult LoadGatherOp::verify() { if (tdescShape != maskShape) return emitOpError("Mask should have the same shape as TensorDesc."); + auto mode = getMode(); + auto mapping = tdescTy.getMapping(); + if (mode == imex::xegpu::Mode::SIMT || mapping) { + return emitOpError("LoadGatherOp only supports VC mode and mapping " + "attribute of TensorDesc is not expected.\n"); + } + if (getTranspose()) { auto trans = getTranspose().value(); if (tdescShape.size() >= trans.size()) @@ -1150,15 +1253,12 @@ mlir::LogicalResult LoadGatherOp::verify() { return emitOpError( "Result shape doesn't match TensorDesc shape. when VNNI is not enabled," "the result should have the same shape (or transposed shape if " - "transpose" - "is also enabled) as TensorDesc. When VNNI is enabled, the result " - "should" - "have one more dimention than the TensorDesc, with last dimention " - "having" - "vnni factor, but having same number of total data elements. The vnni " - "factor are typically calculated as simd_lane_width / " - "elementTypeBitWidth." - "For element type having more than 32 bits, vnni shouldn't be used.\n"); + "transpose is also enabled) as TensorDesc. When VNNI is enabled, " + "the result should have one more dimention than the TensorDesc, " + "with last dimention having vnni factor, but having same number of" + "total data elements. The vnni factor are typically calculated as " + "simd_lane_width/elementTypeBitWidth. For element type having " + "more than 32 bits, vnni shouldn't be used.\n"); return ::mlir::success(); } @@ -1244,6 +1344,11 @@ ::mlir::ParseResult StoreScatterOp::parse(::mlir::OpAsmParser &parser, } void StoreScatterOp::print(::mlir::OpAsmPrinter &printer) { + auto mode = getMode(); + bool printSep = false; + auto printDefaults = printDefaultValues(); + auto numAttrs = (*this)->getAttrs().size(); + printer << ' '; printer << getValue(); printer << ","; @@ -1252,10 +1357,19 @@ void StoreScatterOp::print(::mlir::OpAsmPrinter &printer) { printer << ","; printer << ' '; printer << getMask(); - if ((*this)->getAttrs().size()) { + + if (printDefaults || mode != imex::xegpu::Mode::SIMT || numAttrs > 1) { printer << ' ' << "{"; + } + + if (printDefaults || mode != imex::xegpu::Mode::SIMT) { printer << "mode = " << getMode(); - printCacheHintAttrs(printer, *this, true); + printSep = true; + } + + printCacheHintAttrs(printer, *this, printSep); + + if (printDefaults || mode != imex::xegpu::Mode::SIMT || numAttrs > 1) { printer << "}"; } @@ -1275,6 +1389,11 @@ ::mlir::LogicalResult StoreScatterOp::verify() { auto tdescTy = getTensorDesc().getType(); auto maskTy = getMask().getType(); + auto encoding = tdescTy.getEncoding(); + if (!encoding || !llvm::isa(encoding)) + return emitOpError("Invalid TensorDesc. StoreScatterOp only works on " + "TensorDescs with ScatteredAttr."); + std::vector valueShape, maskShape; auto getShape = [&](mlir::Type type, std::vector &shape) -> void { if (type.isIntOrIndexOrFloat()) @@ -1288,12 +1407,102 @@ ::mlir::LogicalResult StoreScatterOp::verify() { getShape(valueTy, valueShape); getShape(maskTy, maskShape); - if (tdescTy.getShape().vec() != maskShape || valueShape != maskShape) { - return emitOpError( - "Mask and value should have the same shape/size as TensorDesc." - "Mask and Value can be scalar if TensorDesc is in form of " - "TensorDesc<1xf16>."); + if (valueShape != maskShape) { + return emitOpError("Mask and value should have the same shape/size"); + } + + auto tdescShape = tdescTy.getShape().vec(); + + auto mode = getMode(); + auto mapping = tdescTy.getMapping(); + + if (mode != imex::xegpu::Mode::VC || mapping) { + return emitOpError("StoreScatterOp only supports VC mode and mapping " + "attribute of TensorDesc is not expected.\n"); + } + + if (tdescShape != valueShape) { + return emitOpError("TensorDesc shape and value shape doesn't match. ") + << "The expected/derived value shape is: " << makeString(tdescShape) + << ".\nMask and value should have the same shape/size as " + "TensorDesc.\n"; + } + + return ::mlir::success(); +} + +::mlir::ParseResult PrefetchOp::parse(::mlir::OpAsmParser &parser, + ::mlir::OperationState &result) { + mlir::OpAsmParser::UnresolvedOperand TensorDescRawOperands[1]; + llvm::ArrayRef<::mlir::OpAsmParser::UnresolvedOperand> TensorDescOperands( + TensorDescRawOperands); + llvm::SMLoc TensorDescOperandsLoc; + mlir::Type TensorDescRawTypes[1]; + llvm::ArrayRef<::mlir::Type> TensorDescTypes(TensorDescRawTypes); + + TensorDescOperandsLoc = parser.getCurrentLocation(); + if (parser.parseOperand(TensorDescRawOperands[0])) + return ::mlir::failure(); + + if (parseOptionalAttrDict(parser, result, + {"mode", "l1_hint", "l2_hint", "l3_hint"})) + return mlir::failure(); + + if (parser.parseColon()) + return ::mlir::failure(); + + if (parser.parseType(TensorDescRawTypes[0])) + return ::mlir::failure(); + if (parser.resolveOperands(TensorDescOperands, TensorDescTypes, + TensorDescOperandsLoc, result.operands)) + return ::mlir::failure(); + return ::mlir::success(); +} + +void PrefetchOp::print(::mlir::OpAsmPrinter &printer) { + auto mode = getMode(); + bool printSep = false; + auto printDefaults = printDefaultValues(); + auto numAttrs = (*this)->getAttrs().size(); + + printer << ' '; + printer << getTensorDesc(); + + if (printDefaults || mode != imex::xegpu::Mode::SIMT || numAttrs > 1) { + printer << ' ' << "{"; + } + + if (printDefaults || mode != imex::xegpu::Mode::SIMT) { + printer << "mode = " << getMode(); + printSep = true; + } + + printCacheHintAttrs(printer, *this, printSep); + + if (printDefaults || mode != imex::xegpu::Mode::SIMT || numAttrs > 1) { + printer << "}"; + } + + printer << ' ' << ":"; + printer << ' '; + printer << getTensorDesc().getType(); +} + +::mlir::LogicalResult PrefetchOp::verify() { + auto mode = getMode(); + auto tdescTy = getTensorDesc().getType(); + auto encoding = tdescTy.getEncoding(); + auto mapping = tdescTy.getMapping(); + + if (!encoding || !llvm::isa(encoding)) + return emitOpError("Invalid TensorDesc. PrefetchOp only works on " + "TensorDescs with ScatteredAttr."); + + if (mode != imex::xegpu::Mode::VC || mapping) { + return emitOpError("PrefetchOp only supports VC mode. and mapping " + "attribute of TensorDesc is not expected.\n"); } + return ::mlir::success(); } @@ -1310,29 +1519,18 @@ ::mlir::LogicalResult UpdateOffsetOp::verify() { auto shape = srcTy.getShape(); auto encoding = srcTy.getEncoding(); - if (!encoding || !llvm::isa(encoding)) { - return emitOpError( - "Invalid TensorDesc, it should have a scattered attribute."); + if (!encoding) { + return emitOpError("Invalid TensorDesc. UpdateOffsetOp only works on " + "TensorDescs with ScatteredAttr."); } - // For VC mode with chunkSize > 1. For chunkSize == 1, it is hard to - // distinguish between VC and SIMT mode by only looking at updateOffsetOp - // itself. So current verifier skipped these two cases. - if (shape.size() == 2) { - if (!llvm::isa(offTy)) - return emitOpError( - "Based on TensorDesc shape, it is an VC tensor descriptor, " - "in which the offset should be an 1D vector."); - - auto vecTy = llvm::dyn_cast(offTy); - if (vecTy.getRank() != 1) - return emitOpError("The index should be an 1D vector Type for VC mode " - "tensor descriptor."); + auto vecTy = llvm::dyn_cast(offTy); + if (!vecTy || vecTy.getRank() != 1) + return emitOpError("The offset should be an 1D vector.\n"); - if (shape[0] != vecTy.getShape()[0]) - return emitOpError("For VC Mode TensorDesc. The offset should have same" - "length as the dim-0 of TensorDesc."); - } + if (shape[0] != vecTy.getShape()[0]) + return emitOpError( + "The offset should have same length as the dim-0 of TensorDesc."); return ::mlir::success(); } @@ -1340,10 +1538,19 @@ ::mlir::LogicalResult UpdateOffsetOp::verify() { ::mlir::LogicalResult UpdateNDOffsetOp::verify() { // number of offsets specified must match the rank of the tensor descriptor if (getTensorDesc().getType().getRank() != getOffsets().size()) { - return emitOpError("invalid number of offsets."); + return emitOpError("Invalid number of offsets."); } return ::mlir::success(); } + +::mlir::LogicalResult AtomicRMWOp::verify() { + auto mode = getMode(); + if (mode != imex::xegpu::Mode::VC) { + return emitOpError("AtomicRMWOp only work on VC mode.\n"); + } + return ::mlir::success(); +} + } // namespace xegpu } // namespace imex diff --git a/test/Conversion/XeGPUToSPIRV/atomic_basic.vc.mlir b/test/Conversion/XeGPUToSPIRV/atomic_basic.vc.mlir index a06272f10..67c4aab5a 100644 --- a/test/Conversion/XeGPUToSPIRV/atomic_basic.vc.mlir +++ b/test/Conversion/XeGPUToSPIRV/atomic_basic.vc.mlir @@ -14,8 +14,8 @@ module @gemm attributes {gpu.container_module} { %mask = arith.constant dense : vector<16xi1> %offsets = arith.constant dense<[0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60]> : vector<16xindex> %1 = arith.constant dense<0.5> : vector<16xf32> - %2 = xegpu.create_tdesc %arg0, %offsets {chunk_size_per_lane = 1} : memref<8x16xf32>, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> - %3 = xegpu.atomic_rmw "addf" %2, %mask, %1 : !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1>, vector<16xf32> -> vector<16xf32> + %2 = xegpu.create_tdesc %arg0, %offsets {mode = vc, chunk_size_per_lane = 1} : memref<8x16xf32>, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> + %3 = xegpu.atomic_rmw "addf" %2, %mask, %1 {mode = vc} : !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1>, vector<16xf32> -> vector<16xf32> gpu.return } } diff --git a/test/Conversion/XeTileToXeGPU/sg_level_gemm_1k_1k_1k_f16_f32.mlir b/test/Conversion/XeTileToXeGPU/sg_level_gemm_1k_1k_1k_f16_f32.mlir index 47400e5cd..cd6384096 100644 --- a/test/Conversion/XeTileToXeGPU/sg_level_gemm_1k_1k_1k_f16_f32.mlir +++ b/test/Conversion/XeTileToXeGPU/sg_level_gemm_1k_1k_1k_f16_f32.mlir @@ -15,162 +15,162 @@ func.func @test_gemm(%A: memref<1024x1024xf16>, %B: memref<1024x1024xf16>, %C: m // intialize C tile and load it //CHECK: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi %2, %c16_14 : index //CHECK-NEXT: arith.addi %3, %c16_15 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 40 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 40 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 40 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 40 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 56 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 56 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 56 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 56 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> %c_init_tile = xetile.init_tile %C[%m, %n] : memref<1024x1024xf32> -> !xetile.tile<64x64xf32> //CHECK: xegpu.load_nd {{.*}} {mode = vc, {{.*}}} : !xegpu.tensor_desc<8x16xf32> -> vector<8x16xf32> //CHECK-NEXT: xegpu.load_nd {{.*}} {mode = vc, {{.*}}} : !xegpu.tensor_desc<8x16xf32> -> vector<8x16xf32> @@ -209,196 +209,196 @@ func.func @test_gemm(%A: memref<1024x1024xf16>, %B: memref<1024x1024xf16>, %C: m // CHECK: arith.constant 0 : index // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 8 : index // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 8 : index // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 8 : index // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 8 : index // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 24 : index // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 24 : index // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 24 : index // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 24 : index // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 40 : index // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 40 : index // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 40 : index // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 40 : index // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 56 : index // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 56 : index // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 56 : index // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK-NEXT: arith.constant 56 : index // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> %a_init_tile = xetile.init_tile %A[%m, %c0] : memref<1024x1024xf16> -> !xetile.tile<64x64xf16> // CHECK: arith.constant 0 : index // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.addi {{.*}} : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> %b_init_tile = xetile.init_tile %B[%c0, %n] : memref<1024x1024xf16> -> !xetile.tile<64x64xf16> // compute the value of C tile by iterating over tiles in k-dimension and doing dpas // CHECK: scf.for diff --git a/test/Conversion/XeTileToXeGPU/sg_level_load_tile.mlir b/test/Conversion/XeTileToXeGPU/sg_level_load_tile.mlir index c333b564e..40b929854 100644 --- a/test/Conversion/XeTileToXeGPU/sg_level_load_tile.mlir +++ b/test/Conversion/XeTileToXeGPU/sg_level_load_tile.mlir @@ -5,10 +5,10 @@ func.func @sglevel_tiled_load_tile(%a: memref<1024x1024xf16>, %b: memref<1024x10 //CHECK: arith.constant 0 : index //CHECK-NEXT: arith.constant 64 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 64 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xetile.init_tile %a[%c0, %c64] : memref<1024x1024xf16> -> !xetile.tile<16x16xf16> //CHECK: xegpu.load_nd {{.*}} {mode = vc, {{.*}}} : !xegpu.tensor_desc<8x16xf16> -> vector<8x16xf16> diff --git a/test/Conversion/XeTileToXeGPU/sg_level_scf_for.mlir b/test/Conversion/XeTileToXeGPU/sg_level_scf_for.mlir index 4a09b538f..34e1364ce 100644 --- a/test/Conversion/XeTileToXeGPU/sg_level_scf_for.mlir +++ b/test/Conversion/XeTileToXeGPU/sg_level_scf_for.mlir @@ -4,10 +4,10 @@ func.func @sglevel_tiled_gemm(%a: memref<1024x1024xf16>, %b: memref<1024x1024xf1 %c0 = arith.constant 0 : index %c64 = arith.constant 64 : index %c1024 = arith.constant 1024 : index - //CHECK: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 64 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xetile.init_tile %a[%c0, %c64] : memref<1024x1024xf16> -> !xetile.tile<16x16xf16> %2 = arith.constant dense<0.0> : vector<16x16xf16> //CHECK: !xegpu.tensor_desc<8x16xf16>, !xegpu.tensor_desc<8x16xf16>, vector<8x16xf16>, vector<8x16xf16> @@ -22,8 +22,8 @@ func.func @sglevel_tiled_gemm(%a: memref<1024x1024xf16>, %b: memref<1024x1024xf1 scf.yield %5, %3: !xetile.tile<16x16xf16>, vector<16x16xf16> } - //CHECK: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> - //CHECK: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> %5 = xetile.init_tile %b[%c0, %c64] : memref<1024x1024xf16> -> !xetile.tile<16x16xf16> //CHECK: xegpu.store_nd {{.*}} {mode = vc, {{.*}}} : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16> //CHECK: xegpu.store_nd {{.*}} {mode = vc, {{.*}}} : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16> diff --git a/test/Conversion/XeTileToXeGPU/sg_level_store.mlir b/test/Conversion/XeTileToXeGPU/sg_level_store.mlir index 833140e3e..6e0d04be6 100644 --- a/test/Conversion/XeTileToXeGPU/sg_level_store.mlir +++ b/test/Conversion/XeTileToXeGPU/sg_level_store.mlir @@ -12,28 +12,28 @@ func.func @sglevel_tiled_store(%a: memref<1024x1024xf32>) { // CHECK: arith.constant 0 : index // CHECK-NEXT: arith.constant 32 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.constant 48 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 8 : index // CHECK-NEXT: arith.constant 32 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 8 : index // CHECK-NEXT: arith.constant 48 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.constant 32 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.constant 48 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 24 : index // CHECK-NEXT: arith.constant 32 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 24 : index // CHECK-NEXT: arith.constant 48 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> %1 = xetile.init_tile %a[0, 32] : memref<1024x1024xf32> -> !xetile.tile<32x32xf32> // CHECK: xegpu.store_nd {{.*}} {mode = vc, {{.*}} : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32> diff --git a/test/Conversion/XeTileToXeGPU/sg_level_tile_mma.mlir b/test/Conversion/XeTileToXeGPU/sg_level_tile_mma.mlir index e654e7b4f..76d42e9c6 100644 --- a/test/Conversion/XeTileToXeGPU/sg_level_tile_mma.mlir +++ b/test/Conversion/XeTileToXeGPU/sg_level_tile_mma.mlir @@ -5,28 +5,28 @@ func.func @sglevel_tiled_gemm(%a: memref<1024x1024xf16>, %b: memref<1024x1024xf1 //CHECK: arith.constant 0 : index //CHECK-NEXT: arith.constant 64 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 80 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 64 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 80 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 64 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 80 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 64 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 80 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xetile.init_tile %a[%c0, %c64] : memref<1024x1024xf16> -> !xetile.tile<4x2x8x16xf16> //CHECK: xegpu.load_nd {{.*}} {mode = vc, vnni_axis = 1, {{.*}}} : !xegpu.tensor_desc<8x16xf16> -> vector<8x8x2xf16> @@ -41,28 +41,28 @@ func.func @sglevel_tiled_gemm(%a: memref<1024x1024xf16>, %b: memref<1024x1024xf1 //CHECK: arith.constant 0 : index //CHECK-NEXT: arith.constant 64 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 64 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 64 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 64 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 80 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 80 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 80 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 80 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> %3 = xetile.init_tile %b[%c64, %c0] : memref<1024x1024xf16> -> !xetile.tile<2x4x16x16xf16> //CHECK: xegpu.load_nd {{.*}} {mode = vc, vnni_axis = 0, {{.*}}} : !xegpu.tensor_desc<16x16xf16> -> vector<8x16x2xf16> diff --git a/test/Conversion/XeTileToXeGPU/sg_level_tiled_gemm.mlir b/test/Conversion/XeTileToXeGPU/sg_level_tiled_gemm.mlir index 56b2334d3..e7f348e2d 100644 --- a/test/Conversion/XeTileToXeGPU/sg_level_tiled_gemm.mlir +++ b/test/Conversion/XeTileToXeGPU/sg_level_tiled_gemm.mlir @@ -32,162 +32,162 @@ func.func @sglevel_tiled_gemm(%a: memref<1024x1024xf16>, //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 40 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 40 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 40 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 40 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 56 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 56 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 56 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 56 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xetile.init_tile %a[%offset_0_dim_0, %offset_0_dim_1] : memref<1024x1024xf16> -> !xetile.tile<8x4x8x16xf16> %tile_1_dim_0 = arith.constant 64 : index @@ -207,82 +207,82 @@ func.func @sglevel_tiled_gemm(%a: memref<1024x1024xf16>, //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}}: index //CHECK-NEXT: arith.addi {{.*}}: index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> %2 = xetile.init_tile %b[%offset_1_dim_0, %offset_1_dim_1] : memref<1024x1024xf16> -> !xetile.tile<4x4x16x16xf16> //CHECK: arith.constant dense<0.000000e+00> : vector<8x16xf32> @@ -598,162 +598,162 @@ func.func @sglevel_tiled_gemm(%a: memref<1024x1024xf16>, //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 40 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 40 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 40 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 40 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 56 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 56 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 56 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc %arg2[%261, %262] {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc %arg2[%261, %262] {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 56 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> %9 = xetile.init_tile %c[%offset_3_dim_0, %offset_3_dim_1] : memref<1024x1024xf32> -> !xetile.tile<8x4x8x16xf32> diff --git a/test/Conversion/XeTileToXeGPU/sg_level_tiled_load_tile.mlir b/test/Conversion/XeTileToXeGPU/sg_level_tiled_load_tile.mlir index 8a7a9fbd0..1fa6b5f4c 100644 --- a/test/Conversion/XeTileToXeGPU/sg_level_tiled_load_tile.mlir +++ b/test/Conversion/XeTileToXeGPU/sg_level_tiled_load_tile.mlir @@ -4,10 +4,10 @@ func.func @sglevel_tiled_load_tile(%a: memref<1024x1024xf16>, %b: memref<1024x10 %c64 = arith.constant 64 : index //CHECK: arith.constant 0 : index //CHECK-NEXT: arith.constant 64 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 64 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xetile.init_tile %a[%c0, %c64] : memref<1024x1024xf16> -> !xetile.tile<2x1x8x16xf16> //CHECK: xegpu.load_nd {{.*}} {mode = vc, {{.*}}} : !xegpu.tensor_desc<8x16xf16> -> vector<8x16xf16> //CHECK-NEXT: xegpu.load_nd {{.*}} {mode = vc, {{.*}}} : !xegpu.tensor_desc<8x16xf16> -> vector<8x16xf16> diff --git a/test/Conversion/XeTileToXeGPU/sg_level_tiled_scf_for.mlir b/test/Conversion/XeTileToXeGPU/sg_level_tiled_scf_for.mlir index 5a4cb1d86..79508f499 100644 --- a/test/Conversion/XeTileToXeGPU/sg_level_tiled_scf_for.mlir +++ b/test/Conversion/XeTileToXeGPU/sg_level_tiled_scf_for.mlir @@ -8,10 +8,10 @@ func.func @sglevel_tiled_gemm(%a: memref<1024x1024xf16>, %b: memref<1024x1024xf1 %c1024 = arith.constant 1024 : index //CHECK: arith.constant 0 : index //CHECK: arith.constant 64 : index - //CHECK: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK: arith.constant 8 : index //CHECK: arith.constant 64 : index - //CHECK: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xetile.init_tile %a[%c0, %c64] : memref<1024x1024xf16> -> !xetile.tile<2x1x8x16xf16> //CHECK: arith.constant dense<0.000000e+00> : vector<8x16xf16> //CHECK: arith.constant dense<0.000000e+00> : vector<8x16xf16> @@ -30,10 +30,10 @@ func.func @sglevel_tiled_gemm(%a: memref<1024x1024xf16>, %b: memref<1024x1024xf1 } //CHECK: arith.constant 0 : index //CHECK-NEXT: arith.constant 64 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 64 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> %5 = xetile.init_tile %b[%c0, %c64] : memref<1024x1024xf16> -> !xetile.tile<2x1x8x16xf16> //CHECK: xegpu.store_nd {{.*}} {mode = vc, {{.*}}} : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: xegpu.store_nd {{.*}} {mode = vc, {{.*}}} : vector<8x16xf16>, !xegpu.tensor_desc<8x16xf16> diff --git a/test/Conversion/XeTileToXeGPU/sg_level_tiled_simple.mlir b/test/Conversion/XeTileToXeGPU/sg_level_tiled_simple.mlir index cb8d84377..be68ad3a6 100644 --- a/test/Conversion/XeTileToXeGPU/sg_level_tiled_simple.mlir +++ b/test/Conversion/XeTileToXeGPU/sg_level_tiled_simple.mlir @@ -22,24 +22,24 @@ func.func @sglevel_tiled_gemm(%a: memref<1024x1024xf16>, %b: memref<1024x1024xf1 //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi %6, %c0_1 : index //CHECK-NEXT: arith.addi %7, %c0_2 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi %6, %c8 : index //CHECK-NEXT: arith.addi %7, %c0_3 : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xetile.init_tile %a[%offset_0_dim_0, %offset_0_dim_1] : memref<1024x1024xf16> -> !xetile.tile<2x1x8x16xf16> //CHECK: arith.constant 0 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> %2 = xetile.init_tile %b[%offset_0_dim_0, %offset_0_dim_1] : memref<1024x1024xf16> -> !xetile.tile<1x2x16x16xf16> //CHECK: arith.constant dense<0.000000e+00> : vector<8x16xf32> @@ -80,22 +80,22 @@ func.func @sglevel_tiled_gemm(%a: memref<1024x1024xf16>, %b: memref<1024x1024xf1 //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> %5 = xetile.init_tile %c[%offset_0_dim_0, %offset_0_dim_1] : memref<1024x1024xf32> -> !xetile.tile<2x2x8x16xf32> //CHECK: xegpu.store_nd {{.*}} {mode = vc, {{.*}}} : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32> //CHECK-NEXT: xegpu.store_nd {{.*}} {mode = vc, {{.*}}} : vector<8x16xf32>, !xegpu.tensor_desc<8x16xf32> diff --git a/test/Conversion/XeTileToXeGPU/sg_level_tiled_store.mlir b/test/Conversion/XeTileToXeGPU/sg_level_tiled_store.mlir index 7a47e7216..9b33b89e9 100644 --- a/test/Conversion/XeTileToXeGPU/sg_level_tiled_store.mlir +++ b/test/Conversion/XeTileToXeGPU/sg_level_tiled_store.mlir @@ -2,100 +2,100 @@ func.func @sglevel_tiled_store(%a: memref<1024x1024xf32>) { // CHECK: arith.constant 0 : index // CHECK-NEXT: arith.constant 32 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.constant 48 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.constant 64 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 0 : index // CHECK-NEXT: arith.constant 80 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 8 : index // CHECK-NEXT: arith.constant 32 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 8 : index // CHECK-NEXT: arith.constant 48 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 8 : index // CHECK-NEXT: arith.constant 64 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 8 : index // CHECK-NEXT: arith.constant 80 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.constant 32 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.constant 48 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.constant 64 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 16 : index // CHECK-NEXT: arith.constant 80 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 24 : index // CHECK-NEXT: arith.constant 32 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 24 : index // CHECK-NEXT: arith.constant 48 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 24 : index // CHECK-NEXT: arith.constant 64 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 24 : index // CHECK-NEXT: arith.constant 80 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.constant 32 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.constant 48 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.constant 64 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 32 : index // CHECK-NEXT: arith.constant 80 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 40 : index // CHECK-NEXT: arith.constant 32 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 40 : index // CHECK-NEXT: arith.constant 48 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 40 : index // CHECK-NEXT: arith.constant 64 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 40 : index // CHECK-NEXT: arith.constant 80 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.constant 32 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.constant 48 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.constant 64 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 48 : index // CHECK-NEXT: arith.constant 80 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 56 : index // CHECK-NEXT: arith.constant 32 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 56 : index // CHECK-NEXT: arith.constant 48 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 56 : index // CHECK-NEXT: arith.constant 64 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> // CHECK-NEXT: arith.constant 56 : index // CHECK-NEXT: arith.constant 80 : index - // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> + // CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf32> -> !xegpu.tensor_desc<8x16xf32> %1 = xetile.init_tile %a[0, 32] : memref<1024x1024xf32> -> !xetile.tile<8x4x8x16xf32> //CHECK: arith.constant dense<0.000000e+00> : vector<8x16xf32> diff --git a/test/Conversion/XeTileToXeGPU/sg_level_tiled_tile_mma.mlir b/test/Conversion/XeTileToXeGPU/sg_level_tiled_tile_mma.mlir index 9a0e4cc76..43b553971 100644 --- a/test/Conversion/XeTileToXeGPU/sg_level_tiled_tile_mma.mlir +++ b/test/Conversion/XeTileToXeGPU/sg_level_tiled_tile_mma.mlir @@ -20,162 +20,162 @@ func.func @sglevel_tiled_gemm(%a: memref<1024x1024xf16>, %b: memref<1024x1024xf1 //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 8 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 24 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 40 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 40 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 40 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 40 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 56 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 56 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 56 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> //CHECK-NEXT: arith.constant 56 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xetile.init_tile %a[%offset_0_dim_0, %offset_0_dim_1] : memref<1024x1024xf16> -> !xetile.tile<8x4x8x16xf16> //CHECK: xegpu.load_nd {{.*}} {mode = vc, vnni_axis = 1, {{.*}}} : !xegpu.tensor_desc<8x16xf16> -> vector<8x8x2xf16> @@ -229,82 +229,82 @@ func.func @sglevel_tiled_gemm(%a: memref<1024x1024xf16>, %b: memref<1024x1024xf1 //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 0 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 16 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 32 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.constant 48 : index //CHECK-NEXT: arith.addi {{.*}} : index //CHECK-NEXT: arith.addi {{.*}} : index - //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc, boundary_check = true} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> + //CHECK-NEXT: xegpu.create_nd_tdesc {{.*}} {mode = vc} : memref<1024x1024xf16> -> !xegpu.tensor_desc<16x16xf16> %3 = xetile.init_tile %b[%offset_1_dim_0, %offset_1_dim_1] : memref<1024x1024xf16> -> !xetile.tile<4x4x16x16xf16> //CHECK: xegpu.load_nd {{.*}} {mode = vc, vnni_axis = 0, {{.*}}} : !xegpu.tensor_desc<16x16xf16> -> vector<8x16x2xf16> diff --git a/test/Dialect/XeGPU/IR/XeGPUOps.mlir b/test/Dialect/XeGPU/IR/XeGPUOps.mlir index cf1f34f60..1b106ab0a 100644 --- a/test/Dialect/XeGPU/IR/XeGPUOps.mlir +++ b/test/Dialect/XeGPU/IR/XeGPUOps.mlir @@ -53,13 +53,13 @@ func.func @test_store_nd_vc(%src: memref<24x32xf16>, %dst: memref<24x32xf16>) { %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, boundary_check = true} + // CHECK-SAME: {mode = vc} // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xegpu.create_nd_tdesc %src[%c0, %c1] {mode = vc} : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, boundary_check = true} + // CHECK-SAME: {mode = vc} // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> %2 = xegpu.create_nd_tdesc %dst[%c0, %c1] {mode = vc} : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> @@ -92,7 +92,7 @@ func.func @test_update_nd_offset_vc(%src: memref<24x32xf32>) { %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, boundary_check = true} + // CHECK-SAME: {mode = vc} // CHECK-SAME: memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32> %1 = xegpu.create_nd_tdesc %src[%c0, %c1] {mode = vc} : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32> @@ -112,7 +112,7 @@ func.func @test_update_nd_offset_vc(%src: memref<24x32xf32>) { // CHECK-LABEL: func @test_prefetch_nd_vc({{.*}}) { func.func @test_prefetch_nd_vc(%src: memref<24x32xf16>, %x : index, %y : index) { // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, boundary_check = true} + // CHECK-SAME: {mode = vc} // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xegpu.create_nd_tdesc %src[%x, %y] {mode = vc} : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> // CHECK: xegpu.prefetch_nd diff --git a/test/Dialect/XeGPU/IR/atomic_rmw.mlir b/test/Dialect/XeGPU/IR/atomic_rmw.mlir index dc5bdc70a..001b8cffa 100644 --- a/test/Dialect/XeGPU/IR/atomic_rmw.mlir +++ b/test/Dialect/XeGPU/IR/atomic_rmw.mlir @@ -4,36 +4,40 @@ // Verify the generic form can be parsed. // RUN: imex-opt -mlir-print-op-generic %s | imex-opt | FileCheck %s -#sg_map_fp32 = #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}> // CHECK-LABEL: func @test_atomic_rmw({{.*}}) { -func.func @test_atomic_rmw(%src: ui64, %offsets : vector<16 x index>, %value : vector<16x1xf32>, %mask : vector<16xi1>) { - %1 = xegpu.create_tdesc %src, %offsets: ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #sg_map_fp32> +func.func @test_atomic_rmw(%src: ui64, %offsets : vector<16 x index>, %value : vector<16xf32>, %mask : vector<16xi1>) { + %1 = xegpu.create_tdesc %src, %offsets {mode=vc}: ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> // CHECK: xegpu.atomic_rmw - // CHECK-SAME: !xegpu.tensor_desc<16xf32, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>>, vector<16xi1>, vector<16x1xf32> - xegpu.atomic_rmw "addf" %1, %mask, %value: !xegpu.tensor_desc<16xf32, #sg_map_fp32>, vector<16xi1>, vector<16x1xf32> -> vector<16x1xf32> + // CHECK-SAME: !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1>, vector<16xf32> + xegpu.atomic_rmw "addf" %1, %mask, %value {mode=vc} + : !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1>, vector<16xf32> -> vector<16xf32> return } // CHECK-LABEL: func @test_atomic_rmw_0({{.*}}) { func.func @test_atomic_rmw_0(%src: ui64, %offsets : vector<16 x index>, %value : vector<16x2xf32>, %mask : vector<16xi1>) { - %1 = xegpu.create_tdesc %src, %offsets {chunk_size_per_lane = 2}: ui64, vector<16 x index> -> !xegpu.tensor_desc<16x2xf32, #sg_map_fp32> + %1 = xegpu.create_tdesc %src, %offsets {chunk_size_per_lane = 2, mode=vc} + : ui64, vector<16 x index> -> !xegpu.tensor_desc<16x2xf32, #xegpu.scattered> // CHECK: xegpu.atomic_rmw - // CHECK-SAME: tensor_desc<16x2xf32, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>>, vector<16xi1>, vector<16x2xf32> -> vector<16x2xf32> - xegpu.atomic_rmw "mulf" %1, %mask, %value : !xegpu.tensor_desc<16x2xf32, #sg_map_fp32>, vector<16xi1>, vector<16x2xf32> -> vector<16x2xf32> + // CHECK-SAME: tensor_desc<16x2xf32, #xegpu.scattered>, vector<16xi1>, vector<16x2xf32> -> vector<16x2xf32> + xegpu.atomic_rmw "mulf" %1, %mask, %value {mode=vc} + : !xegpu.tensor_desc<16x2xf32, #xegpu.scattered>, vector<16xi1>, vector<16x2xf32> -> vector<16x2xf32> return } // CHECK-LABEL: func @test_atomic_rmw_1({{.*}}) { func.func @test_atomic_rmw_1(%src: ui64, %offsets : vector<16 x index>, %value : vector<16x2xi32>, %mask : vector<16xi1>) { - %1 = xegpu.create_tdesc %src, %offsets {chunk_size_per_lane = 2}: ui64, vector<16 x index> -> !xegpu.tensor_desc<16x2xi32, #sg_map_fp32> + %1 = xegpu.create_tdesc %src, %offsets {chunk_size_per_lane = 2, mode=vc} + : ui64, vector<16 x index> -> !xegpu.tensor_desc<16x2xi32, #xegpu.scattered> // CHECK: xegpu.atomic_rmw - // CHECK-SAME: !xegpu.tensor_desc<16x2xi32, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>>, vector<16xi1>, vector<16x2xi32> -> vector<16x2xf32> - xegpu.atomic_rmw "andi" %1, %mask, %value: !xegpu.tensor_desc<16x2xi32, #sg_map_fp32>, vector<16xi1>, vector<16x2xi32> -> vector<16x2xf32> + // CHECK-SAME: !xegpu.tensor_desc<16x2xi32, #xegpu.scattered>, vector<16xi1>, vector<16x2xi32> -> vector<16x2xf32> + xegpu.atomic_rmw "andi" %1, %mask, %value {mode=vc} + : !xegpu.tensor_desc<16x2xi32, #xegpu.scattered>, vector<16xi1>, vector<16x2xi32> -> vector<16x2xf32> return } diff --git a/test/Dialect/XeGPU/IR/create_nd_tdesc.mlir b/test/Dialect/XeGPU/IR/create_nd_tdesc.mlir index d5aa32cb8..ce10c2471 100644 --- a/test/Dialect/XeGPU/IR/create_nd_tdesc.mlir +++ b/test/Dialect/XeGPU/IR/create_nd_tdesc.mlir @@ -4,21 +4,19 @@ // Verify the generic form can be parsed. // RUN: imex-opt -mlir-print-op-generic %s | imex-opt | FileCheck %s -#sg_map_fp16 = #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}> +#sg_map_fp16 = #xegpu.sg_map func.func @test_create_nd_tdesc_0(%src: memref<24x32xf16>) { %c0 = arith.constant 2 : index %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} - // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map> %1 = xegpu.create_nd_tdesc %src[%c0, %c1] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16, #sg_map_fp16> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} - // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map> %2 = xegpu.create_nd_tdesc %src[2, 4] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16, #sg_map_fp16> @@ -28,8 +26,7 @@ func.func @test_create_nd_tdesc_0(%src: memref<24x32xf16>) { // CHECK-LABEL: func @test_create_nd_tdesc_1({{.*}}) { func.func @test_create_nd_tdesc_1(%src: memref<24x32xf16>, %x : index, %y : index) { // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} - // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map> %1 = xegpu.create_nd_tdesc %src[%x, %y] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16, #sg_map_fp16> return @@ -39,8 +36,7 @@ func.func @test_create_nd_tdesc_1(%src: memref<24x32xf16>, %x : index, %y : inde func.func @test_create_nd_tdesc_2(%src: ui64, %w : index, %h : index, %x : index, %y : index) { %c1 = arith.constant 1 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} - // CHECK-SAME: ui64 -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: ui64 -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map> %1 = xegpu.create_nd_tdesc %src[%x, %y], [%h, %w], [%w, %c1] : ui64 -> !xegpu.tensor_desc<8x16xf16, #sg_map_fp16> return } @@ -49,8 +45,7 @@ func.func @test_create_nd_tdesc_2(%src: ui64, %w : index, %h : index, %x : index func.func @test_create_nd_tdesc_3(%src: memref, %w : index, %h : index, %x : index, %y : index) { %c1 = arith.constant 1 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} - // CHECK-SAME: memref -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: memref -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map> %1 = xegpu.create_nd_tdesc %src[%x, %y], [%h, %w], [%w, %c1] : memref -> !xegpu.tensor_desc<8x16xf16, #sg_map_fp16> return } @@ -60,8 +55,7 @@ func.func @test_create_nd_tdesc_3(%src: memref, %w : index, %h : index, func.func @test_create_nd_tdesc_4(%src: memref, %w : index, %h : index, %x : index, %y : index) { %c1 = arith.constant 1 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} - // CHECK-SAME: memref -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: memref -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map> %1 = xegpu.create_nd_tdesc %src[%x, %y], [%h, %w], [%w, %c1] {boundary_check = true} : memref -> !xegpu.tensor_desc<8x16xf16, #sg_map_fp16> return @@ -71,8 +65,7 @@ func.func @test_create_nd_tdesc_4(%src: memref, %w : index, %h : index, func.func @test_create_nd_tdesc_5(%src: memref, %w : index, %h : index, %x : index, %y : index) { %c1 = arith.constant 1 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} - // CHECK-SAME: memref -> !xegpu.tensor_desc<8x16xf16, memory_scope = slm, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: memref -> !xegpu.tensor_desc<8x16xf16, memory_scope = slm, #xegpu.sg_map> %1 = xegpu.create_nd_tdesc %src[%x, %y], [%h, %w], [%w, %c1] : memref -> !xegpu.tensor_desc<8x16xf16, memory_scope = slm, #sg_map_fp16> return @@ -82,8 +75,7 @@ func.func @test_create_nd_tdesc_5(%src: memref, %w : index, %h : index, func.func @test_create_nd_tdesc_6(%src: memref, %w : index, %h : index, %x : index, %y : index) { %c1 = arith.constant 1 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} - // CHECK-SAME: memref -> !xegpu.tensor_desc<8x16xf16, memory_scope = slm, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: memref -> !xegpu.tensor_desc<8x16xf16, memory_scope = slm, #xegpu.sg_map> %1 = xegpu.create_nd_tdesc %src[%x, %y], [%h, %w], [%w, %c1] {boundary_check = true} : memref -> !xegpu.tensor_desc<8x16xf16, memory_scope = slm, #sg_map_fp16> return @@ -92,8 +84,7 @@ func.func @test_create_nd_tdesc_6(%src: memref, %w : index, %h : index, // CHECK-LABEL: func @test_create_nd_tdesc_7({{.*}}) { func.func @test_create_nd_tdesc_7(%src: memref<1024xf16>, %offset : index) { // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} - // CHECK-SAME: memref<1024xf16> -> !xegpu.tensor_desc<16xf16, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: memref<1024xf16> -> !xegpu.tensor_desc<16xf16, #xegpu.sg_map> %1 = xegpu.create_nd_tdesc %src[%offset] : memref<1024xf16> -> !xegpu.tensor_desc<16xf16, #sg_map_fp16> return } @@ -103,8 +94,7 @@ func.func @test_create_nd_tdesc_7(%src: memref<1024xf16>, %offset : index) { func.func @test_create_nd_tdesc_8(%src: memref, %w : index, %h : index, %x : index) { %c1 = arith.constant 1 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} - // CHECK-SAME: memref -> !xegpu.tensor_desc<8x16xf16, memory_scope = slm, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: memref -> !xegpu.tensor_desc<8x16xf16, memory_scope = slm, #xegpu.sg_map> %1 = xegpu.create_nd_tdesc %src[8, %x], [%h, %w], [%w, %c1] {boundary_check = true} : memref -> !xegpu.tensor_desc<8x16xf16, memory_scope = slm, #sg_map_fp16> return @@ -114,8 +104,7 @@ func.func @test_create_nd_tdesc_8(%src: memref, %w : index, %h : index, func.func @test_create_nd_tdesc_9(%src: memref, %w : index, %h : index, %x : index) { %c1 = arith.constant 1 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} - // CHECK-SAME: memref -> !xegpu.tensor_desc<64x128xf16, memory_scope = slm, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: memref -> !xegpu.tensor_desc<64x128xf16, memory_scope = slm, #xegpu.sg_map> %1 = xegpu.create_nd_tdesc %src[8, %x], [%h, %w], [%w, %c1] {boundary_check = true} : memref -> !xegpu.tensor_desc<64x128xf16, memory_scope = slm, #sg_map_fp16> return diff --git a/test/Dialect/XeGPU/IR/create_tdesc_vc.mlir b/test/Dialect/XeGPU/IR/create_tdesc_vc.mlir index 1bf1098a8..073aede88 100644 --- a/test/Dialect/XeGPU/IR/create_tdesc_vc.mlir +++ b/test/Dialect/XeGPU/IR/create_tdesc_vc.mlir @@ -1,15 +1,15 @@ -// RUN: imex-opt %s | FileCheck %s +// RUN: IMEX_XEGPU_PRINT_DEFAULTS=true imex-opt %s | FileCheck %s // Verify the printed output can be parsed. -// RUN: imex-opt %s | imex-opt | FileCheck %s +// RUN: IMEX_XEGPU_PRINT_DEFAULTS=true imex-opt %s | IMEX_XEGPU_PRINT_DEFAULTS=true imex-opt | FileCheck %s // Verify the generic form can be parsed. -// RUN: imex-opt -mlir-print-op-generic %s | imex-opt | FileCheck %s +// RUN: IMEX_XEGPU_PRINT_DEFAULTS=true imex-opt -mlir-print-op-generic %s | IMEX_XEGPU_PRINT_DEFAULTS=true imex-opt | FileCheck %s // CHECK-LABEL: func @test_create_tdesc_vc({{.*}}) { func.func @test_create_tdesc_vc(%src: ui64, %offsets : vector<16 x index>) { // CHECK: xegpu.create_tdesc %arg0, %arg1 // CHECK-SAME: {mode = vc, chunk_size_per_lane = 1} - // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> + // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, memory_scope = global, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets {mode = vc}: ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> return } @@ -28,7 +28,7 @@ func.func @test_create_tdesc_vc_2(%src: ui64, %offsets : vector<16 x index>) { func.func @test_create_tdesc_vc_3(%src: ui64, %offsets : vector<16 x index>) { // CHECK: xegpu.create_tdesc %arg0, %arg1 // CHECK-SAME: {mode = vc, chunk_size_per_lane = 8} - // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16x8xf32, #xegpu.scattered> + // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16x8xf32, memory_scope = global, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets {mode = vc, chunk_size_per_lane = 8} : ui64, vector<16 x index> -> !xegpu.tensor_desc<16x8xf32, #xegpu.scattered> return @@ -54,24 +54,3 @@ func.func @test_create_tdesc_vc_5(%src: memref, %offsets : vector<16 x in : memref, vector<16 x index> -> !xegpu.tensor_desc<16x2xf32, memory_scope = slm, #xegpu.scattered> return } - - -// CHECK-LABEL: func @test_create_tdesc_vc_6({{.*}}) { -func.func @test_create_tdesc_vc_6(%src: memref, %offset : index) { - // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, chunk_size_per_lane = 2} - // CHECK-SAME: memref, index -> !xegpu.tensor_desc<2xf32, memory_scope = slm, #xegpu.scattered> - %1 = xegpu.create_tdesc %src, %offset {mode = vc, chunk_size_per_lane = 2} - : memref, index -> !xegpu.tensor_desc<2xf32, memory_scope = slm, #xegpu.scattered> - return -} - -// CHECK-LABEL: func @test_create_tdesc_vc_7({{.*}}) { -func.func @test_create_tdesc_vc_7(%src: memref, %offset : index) { - // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, chunk_size_per_lane = 1} - // CHECK-SAME: memref, index -> !xegpu.tensor_desc<1xf32, memory_scope = slm, #xegpu.scattered> - %1 = xegpu.create_tdesc %src, %offset {mode = vc, chunk_size_per_lane = 1} - : memref, index -> !xegpu.tensor_desc<1xf32, memory_scope = slm, #xegpu.scattered> - return -} diff --git a/test/Dialect/XeGPU/IR/load_gather_vc.mlir b/test/Dialect/XeGPU/IR/load_gather_vc.mlir index 9201aa18c..daa1ea56a 100644 --- a/test/Dialect/XeGPU/IR/load_gather_vc.mlir +++ b/test/Dialect/XeGPU/IR/load_gather_vc.mlir @@ -1,8 +1,8 @@ -// RUN: imex-opt %s | FileCheck %s +// RUN: IMEX_XEGPU_PRINT_DEFAULTS=true imex-opt %s | FileCheck %s // Verify the printed output can be parsed. -// RUN: imex-opt %s | imex-opt | FileCheck %s +// RUN: IMEX_XEGPU_PRINT_DEFAULTS=true imex-opt %s | IMEX_XEGPU_PRINT_DEFAULTS=true imex-opt | FileCheck %s // Verify the generic form can be parsed. -// RUN: imex-opt -mlir-print-op-generic %s | imex-opt | FileCheck %s +// RUN: IMEX_XEGPU_PRINT_DEFAULTS=true imex-opt -mlir-print-op-generic %s | IMEX_XEGPU_PRINT_DEFAULTS=true imex-opt | FileCheck %s // CHECK-LABEL: func @test_load_gather_vc({{.*}}) { @@ -10,12 +10,12 @@ func.func @test_load_gather_vc(%src: ui64, %offsets : vector<16xindex>) { %0 = arith.constant dense<1>: vector<16xi1> // CHECK: xegpu.create_tdesc // CHECK-SAME: {mode = vc, chunk_size_per_lane = 1} - // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> + // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, memory_scope = global, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets {mode = vc}: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> // CHECK: xegpu.load // CHECK-SAME: {mode = vc, l1_hint = cached, l2_hint = uncached} - // CHECK-SAME: !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1> -> vector<16xf32> + // CHECK-SAME: !xegpu.tensor_desc<16xf32, memory_scope = global, #xegpu.scattered>, vector<16xi1> -> vector<16xf32> %2 = xegpu.load %1, %0 {mode = vc, l1_hint = cached, l2_hint = uncached} : !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1> -> vector<16xf32> return @@ -26,49 +26,30 @@ func.func @test_load_gather_vc_2(%src: ui64, %offsets : vector<16xindex>) { %0 = arith.constant dense<1>: vector<16x8xi1> // CHECK: xegpu.create_tdesc // CHECK-SAME: {mode = vc, chunk_size_per_lane = 8} - // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16x8xf32, #xegpu.scattered> + // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16x8xf32, memory_scope = global, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets {mode = vc, chunk_size_per_lane = 8} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16x8xf32, #xegpu.scattered> // CHECK: xegpu.load // CHECK-SAME: {mode = vc, transpose = [1, 0], l1_hint = cached, l2_hint = uncached} - // CHECK-SAME: !xegpu.tensor_desc<16x8xf32, #xegpu.scattered>, vector<16x8xi1> -> vector<8x16xf32> + // CHECK-SAME: !xegpu.tensor_desc<16x8xf32, memory_scope = global, #xegpu.scattered>, vector<16x8xi1> -> vector<8x16xf32> %2 = xegpu.load %1, %0 {mode = vc, transpose = [1, 0], l1_hint = cached, l2_hint = uncached} : !xegpu.tensor_desc<16x8xf32, #xegpu.scattered>, vector<16x8xi1> -> vector<8x16xf32> return } - -// CHECK-LABEL: func @test_load_gather_vc_3({{.*}}) { -func.func @test_load_gather_vc_3(%src: ui64, %offset : index) { - %0 = arith.constant dense<1>: vector<8xi1> - // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, chunk_size_per_lane = 8} - // CHECK-SAME: ui64, index -> !xegpu.tensor_desc<8xf32, #xegpu.scattered> - %1 = xegpu.create_tdesc %src, %offset {mode = vc, chunk_size_per_lane = 8} - : ui64, index -> !xegpu.tensor_desc<8xf32, #xegpu.scattered> - - // CHECK: xegpu.load - // CHECK-SAME: {mode = vc, l1_hint = cached, l2_hint = uncached} - // CHECK-SAME: !xegpu.tensor_desc<8xf32, #xegpu.scattered>, vector<8xi1> -> vector<8xf32> - %2 = xegpu.load %1, %0 {mode = vc, l1_hint = cached, l2_hint = uncached} - : !xegpu.tensor_desc<8xf32, #xegpu.scattered>, vector<8xi1> -> vector<8xf32> - return -} - - // CHECK-LABEL: func @test_load_gather_vc_4({{.*}}) { func.func @test_load_gather_vc_4(%src: ui64, %offsets : vector<16xindex>) { %0 = arith.constant dense<1>: vector<16xi1> // CHECK: xegpu.create_tdesc // CHECK-SAME: {mode = vc, chunk_size_per_lane = 1} - // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> + // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, memory_scope = global, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets {mode = vc, chunk_size_per_lane = 1} : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> // CHECK: xegpu.load // CHECK-SAME: {mode = vc, l1_hint = cached, l2_hint = uncached} - // CHECK-SAME: !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1> -> vector<16xf32> + // CHECK-SAME: !xegpu.tensor_desc<16xf32, memory_scope = global, #xegpu.scattered>, vector<16xi1> -> vector<16xf32> %2 = xegpu.load %1, %0 {mode = vc, l1_hint = cached, l2_hint = uncached} : !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1> -> vector<16xf32> return diff --git a/test/Dialect/XeGPU/IR/load_nd.mlir b/test/Dialect/XeGPU/IR/load_nd.mlir index 3616c05bd..922f9970a 100644 --- a/test/Dialect/XeGPU/IR/load_nd.mlir +++ b/test/Dialect/XeGPU/IR/load_nd.mlir @@ -4,213 +4,194 @@ // Verify the generic form can be parsed. // RUN: imex-opt -mlir-print-op-generic %s | imex-opt | FileCheck %s -#sg_map_fp16_a = #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}> -#sg_map_fp16_b = #xegpu.sg_map<{mma_block_size = [16, 16], wi_layout = [1, 16], wi_data = [1, 1]}> -#sg_map_fp16_c = #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [1, 16], wi_data = [1, 1]}> -#sg_map_fp16_d = #xegpu.sg_map<{wi_layout = [2, 8], wi_data = [1, 2]}> +#sg_map_fp16_a = #xegpu.sg_map +#sg_map_fp16_b = #xegpu.sg_map +#sg_map_fp16_c = #xegpu.sg_map +#sg_map_fp16_d = #xegpu.sg_map // CHECK-LABEL: func @test_load_nd_fp16({{.*}}) { func.func @test_load_nd_fp16(%A: memref<24x32xf16>, %B : memref<24x32xf16>, %C : memref<24x32xf16>) { %c0 = arith.constant 2 : index %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<24x32xf16> - // CHECK-SAME: -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map> %1 = xegpu.create_nd_tdesc %A[%c0, %c1] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16, #sg_map_fp16_a> // CHECK: xegpu.load_nd - // CHECK-SAME: {mode = simt, vnni_axis = 1} - // CHECK-SAME: !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: {vnni_axis = 1} + // CHECK-SAME: !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map> // CHECK-SAME: -> vector<4x1x2xf16> %2 = xegpu.load_nd %1 {vnni_axis = 1} : !xegpu.tensor_desc<8x16xf16, #sg_map_fp16_a> -> vector<4x1x2xf16> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<24x32xf16> - // CHECK-SAME: -> !xegpu.tensor_desc<16x16xf16, #xegpu.sg_map<{mma_block_size = [16, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> + // CHECK-SAME: -> !xegpu.tensor_desc<16x16xf16, #xegpu.sg_map> %3 = xegpu.create_nd_tdesc %B[%c0, %c1] : memref<24x32xf16> -> !xegpu.tensor_desc<16x16xf16, #sg_map_fp16_b> // CHECK: xegpu.load_nd - // CHECK-SAME: {mode = simt, vnni_axis = 0} - // CHECK-SAME: !xegpu.tensor_desc<16x16xf16, #xegpu.sg_map<{mma_block_size = [16, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> + // CHECK-SAME: {vnni_axis = 0} + // CHECK-SAME: !xegpu.tensor_desc<16x16xf16, #xegpu.sg_map> // CHECK-SAME: -> vector<8x1x2xf16> %4 = xegpu.load_nd %3 {vnni_axis = 0} : !xegpu.tensor_desc<16x16xf16, #sg_map_fp16_b> -> vector<8x1x2xf16> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<24x32xf16> - // CHECK-SAME: -> !xegpu.tensor_desc<8x16xf32, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> + // CHECK-SAME: -> !xegpu.tensor_desc<8x16xf32, #xegpu.sg_map> %5 = xegpu.create_nd_tdesc %C[%c0, %c1] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf32, #sg_map_fp16_c> // CHECK: xegpu.load_nd - // CHECK-SAME: {mode = simt} - // CHECK-SAME: !xegpu.tensor_desc<8x16xf32, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> + // CHECK-SAME: !xegpu.tensor_desc<8x16xf32, #xegpu.sg_map> // CHECK-SAME: -> vector<8x1xf32> %6 = xegpu.load_nd %5 : !xegpu.tensor_desc<8x16xf32, #sg_map_fp16_c> -> vector<8x1xf32> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<24x32xf16> - // CHECK-SAME: -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map<{wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: -> !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map> %7 = xegpu.create_nd_tdesc %A[%c0, %c1] : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16, #sg_map_fp16_d> // CHECK: xegpu.load_nd - // CHECK-SAME: {mode = simt, vnni_axis = 1} - // CHECK-SAME: !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map<{wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: {vnni_axis = 1} + // CHECK-SAME: !xegpu.tensor_desc<8x16xf16, #xegpu.sg_map> // CHECK-SAME: -> vector<4x1x2xf16> %8 = xegpu.load_nd %7 {vnni_axis = 1} : !xegpu.tensor_desc<8x16xf16, #sg_map_fp16_d> -> vector<4x1x2xf16> return } -#sg_map_bf16_a = #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}> -#sg_map_bf16_b = #xegpu.sg_map<{mma_block_size = [16, 16], wi_layout = [1, 16], wi_data = [1, 1]}> -#sg_map_bf16_c = #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [1, 16], wi_data = [1, 1]}> +#sg_map_bf16_a = #xegpu.sg_map +#sg_map_bf16_b = #xegpu.sg_map +#sg_map_bf16_c = #xegpu.sg_map // CHECK-LABEL: func @test_load_nd_bf16({{.*}}) { func.func @test_load_nd_bf16(%A: memref<24x32xbf16>, %B : memref<24x32xbf16>, %C : memref<24x32xbf16>) { %c0 = arith.constant 2 : index %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<24x32xbf16> - // CHECK-SAME: -> !xegpu.tensor_desc<8x16xbf16, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: -> !xegpu.tensor_desc<8x16xbf16, #xegpu.sg_map> %1 = xegpu.create_nd_tdesc %A[%c0, %c1] : memref<24x32xbf16> -> !xegpu.tensor_desc<8x16xbf16, #sg_map_bf16_a> // CHECK: xegpu.load_nd - // CHECK-SAME: {mode = simt, vnni_axis = 1} - // CHECK-SAME: !xegpu.tensor_desc<8x16xbf16, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: {vnni_axis = 1} + // CHECK-SAME: !xegpu.tensor_desc<8x16xbf16, #xegpu.sg_map> // CHECK-SAME: -> vector<4x1x2xbf16> %2 = xegpu.load_nd %1 {vnni_axis = 1} : !xegpu.tensor_desc<8x16xbf16, #sg_map_bf16_a> -> vector<4x1x2xbf16> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<24x32xbf16> - // CHECK-SAME: -> !xegpu.tensor_desc<16x16xbf16, #xegpu.sg_map<{mma_block_size = [16, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> + // CHECK-SAME: -> !xegpu.tensor_desc<16x16xbf16, #xegpu.sg_map> %3 = xegpu.create_nd_tdesc %B[%c0, %c1] : memref<24x32xbf16> -> !xegpu.tensor_desc<16x16xbf16, #sg_map_bf16_b> // CHECK: xegpu.load_nd - // CHECK-SAME: {mode = simt, vnni_axis = 0} - // CHECK-SAME: !xegpu.tensor_desc<16x16xbf16, #xegpu.sg_map<{mma_block_size = [16, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> + // CHECK-SAME: {vnni_axis = 0} + // CHECK-SAME: !xegpu.tensor_desc<16x16xbf16, #xegpu.sg_map> // CHECK-SAME: -> vector<8x1x2xbf16> %4 = xegpu.load_nd %3 {vnni_axis = 0} : !xegpu.tensor_desc<16x16xbf16, #sg_map_bf16_b> -> vector<8x1x2xbf16> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<24x32xbf16> - // CHECK-SAME: -> !xegpu.tensor_desc<8x16xf32, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> + // CHECK-SAME: -> !xegpu.tensor_desc<8x16xf32, #xegpu.sg_map> %5 = xegpu.create_nd_tdesc %C[%c0, %c1] : memref<24x32xbf16> -> !xegpu.tensor_desc<8x16xf32, #sg_map_fp16_c> // CHECK: xegpu.load_nd - // CHECK-SAME: {mode = simt} - // CHECK-SAME: !xegpu.tensor_desc<8x16xf32, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> + // CHECK-SAME: !xegpu.tensor_desc<8x16xf32, #xegpu.sg_map> // CHECK-SAME: -> vector<8x1xf32> %6 = xegpu.load_nd %5 : !xegpu.tensor_desc<8x16xf32, #sg_map_bf16_c> -> vector<8x1xf32> return } -#sg_map_i8_a = #xegpu.sg_map<{mma_block_size = [8, 32], wi_layout = [2, 8], wi_data = [1, 4]}> -#sg_map_i8_b = #xegpu.sg_map<{mma_block_size = [32, 16], wi_layout = [1, 16], wi_data = [1, 1]}> -#sg_map_i8_c = #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [1, 16], wi_data = [1, 1]}> +#sg_map_i8_a = #xegpu.sg_map +#sg_map_i8_b = #xegpu.sg_map +#sg_map_i8_c = #xegpu.sg_map // CHECK-LABEL: func @test_load_nd_i8({{.*}}) { func.func @test_load_nd_i8(%A: memref<64x64xi8>, %B : memref<64x64xi8>, %C : memref<64x64xi8>) { %c0 = arith.constant 2 : index %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<64x64xi8> - // CHECK-SAME: -> !xegpu.tensor_desc<8x32xi8, #xegpu.sg_map<{mma_block_size = [8, 32], wi_layout = [2, 8], wi_data = [1, 4]}>> + // CHECK-SAME: -> !xegpu.tensor_desc<8x32xi8, #xegpu.sg_map> %1 = xegpu.create_nd_tdesc %A[%c0, %c1] : memref<64x64xi8> -> !xegpu.tensor_desc<8x32xi8, #sg_map_i8_a> // CHECK: xegpu.load_nd - // CHECK-SAME: {mode = simt, vnni_axis = 1} - // CHECK-SAME: !xegpu.tensor_desc<8x32xi8, #xegpu.sg_map<{mma_block_size = [8, 32], wi_layout = [2, 8], wi_data = [1, 4]}>> + // CHECK-SAME: {vnni_axis = 1} + // CHECK-SAME: !xegpu.tensor_desc<8x32xi8, #xegpu.sg_map> // CHECK-SAME: -> vector<4x1x4xi8> %2 = xegpu.load_nd %1 {vnni_axis = 1} : !xegpu.tensor_desc<8x32xi8, #sg_map_i8_a> -> vector<4x1x4xi8> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<64x64xi8> - // CHECK-SAME: -> !xegpu.tensor_desc<32x16xi8, #xegpu.sg_map<{mma_block_size = [32, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> + // CHECK-SAME: -> !xegpu.tensor_desc<32x16xi8, #xegpu.sg_map> %3 = xegpu.create_nd_tdesc %B[%c0, %c1] : memref<64x64xi8> -> !xegpu.tensor_desc<32x16xi8, #sg_map_i8_b> // CHECK: xegpu.load_nd - // CHECK-SAME: {mode = simt, vnni_axis = 0} - // CHECK-SAME: !xegpu.tensor_desc<32x16xi8, #xegpu.sg_map<{mma_block_size = [32, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> + // CHECK-SAME: {vnni_axis = 0} + // CHECK-SAME: !xegpu.tensor_desc<32x16xi8, #xegpu.sg_map> // CHECK-SAME: -> vector<8x1x4xi8> %4 = xegpu.load_nd %3 {vnni_axis = 0} : !xegpu.tensor_desc<32x16xi8, #sg_map_i8_b> -> vector<8x1x4xi8> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<64x64xi8> - // CHECK-SAME: -> !xegpu.tensor_desc<8x16xi32, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> + // CHECK-SAME: -> !xegpu.tensor_desc<8x16xi32, #xegpu.sg_map> %5 = xegpu.create_nd_tdesc %C[%c0, %c1] : memref<64x64xi8> -> !xegpu.tensor_desc<8x16xi32, #sg_map_i8_c> // CHECK: xegpu.load_nd - // CHECK-SAME: {mode = simt} - // CHECK-SAME: !xegpu.tensor_desc<8x16xi32, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> + // CHECK-SAME: !xegpu.tensor_desc<8x16xi32, #xegpu.sg_map> // CHECK-SAME: -> vector<8x1xi32> %6 = xegpu.load_nd %5 : !xegpu.tensor_desc<8x16xi32, #sg_map_i8_c> -> vector<8x1xi32> return } -#sg_map_f64_a = #xegpu.sg_map<{mma_block_size = [4, 8], wi_layout = [2, 8], wi_data = [1, 1]}> -#sg_map_f64_b = #xegpu.sg_map<{mma_block_size = [8, 8], wi_layout = [2, 8], wi_data = [1, 1]}> -#sg_map_f64_c = #xegpu.sg_map<{mma_block_size = [4, 8], wi_layout = [2, 8], wi_data = [1, 1]}> +#sg_map_f64_a = #xegpu.sg_map +#sg_map_f64_b = #xegpu.sg_map +#sg_map_f64_c = #xegpu.sg_map // CHECK-LABEL: func @test_load_nd_f64({{.*}}) { func.func @test_load_nd_f64(%A: memref<64x64xf64>, %B : memref<64x64xf64>, %C : memref<64x64xf64>) { %c0 = arith.constant 2 : index %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<64x64xf64> - // CHECK-SAME: -> !xegpu.tensor_desc<4x8xf64, #xegpu.sg_map<{mma_block_size = [4, 8], wi_layout = [2, 8], wi_data = [1, 1]}>> + // CHECK-SAME: -> !xegpu.tensor_desc<4x8xf64, #xegpu.sg_map> %1 = xegpu.create_nd_tdesc %A[%c0, %c1] : memref<64x64xf64> -> !xegpu.tensor_desc<4x8xf64, #sg_map_f64_a> // CHECK: xegpu.load_nd - // CHECK-SAME: {mode = simt} - // CHECK-SAME: !xegpu.tensor_desc<4x8xf64, #xegpu.sg_map<{mma_block_size = [4, 8], wi_layout = [2, 8], wi_data = [1, 1]}>> + // CHECK-SAME: !xegpu.tensor_desc<4x8xf64, #xegpu.sg_map> // CHECK-SAME: -> vector<2x1xf64> %2 = xegpu.load_nd %1 : !xegpu.tensor_desc<4x8xf64, #sg_map_f64_a> -> vector<2x1xf64> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<64x64xf64> - // CHECK-SAME: -> !xegpu.tensor_desc<8x8xf64, #xegpu.sg_map<{mma_block_size = [8, 8], wi_layout = [2, 8], wi_data = [1, 1]}>> + // CHECK-SAME: -> !xegpu.tensor_desc<8x8xf64, #xegpu.sg_map> %3 = xegpu.create_nd_tdesc %B[%c0, %c1] : memref<64x64xf64> -> !xegpu.tensor_desc<8x8xf64, #sg_map_f64_b> // CHECK: xegpu.load_nd - // CHECK-SAME: {mode = simt} - // CHECK-SAME: !xegpu.tensor_desc<8x8xf64, #xegpu.sg_map<{mma_block_size = [8, 8], wi_layout = [2, 8], wi_data = [1, 1]}>> + // CHECK-SAME: !xegpu.tensor_desc<8x8xf64, #xegpu.sg_map> // CHECK-SAME: -> vector<4x1xf64> %4 = xegpu.load_nd %3 : !xegpu.tensor_desc<8x8xf64, #sg_map_f64_b> -> vector<4x1xf64> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = simt, boundary_check = true} // CHECK-SAME: memref<64x64xf64> - // CHECK-SAME: -> !xegpu.tensor_desc<4x8xf64, #xegpu.sg_map<{mma_block_size = [4, 8], wi_layout = [2, 8], wi_data = [1, 1]}>> + // CHECK-SAME: -> !xegpu.tensor_desc<4x8xf64, #xegpu.sg_map> %5 = xegpu.create_nd_tdesc %C[%c0, %c1] : memref<64x64xf64> -> !xegpu.tensor_desc<4x8xf64, #sg_map_f64_c> // CHECK: xegpu.load_nd - // CHECK-SAME: {mode = simt} - // CHECK-SAME: !xegpu.tensor_desc<4x8xf64, #xegpu.sg_map<{mma_block_size = [4, 8], wi_layout = [2, 8], wi_data = [1, 1]}>> + // CHECK-SAME: !xegpu.tensor_desc<4x8xf64, #xegpu.sg_map> // CHECK-SAME: -> vector<2x1xf64> %6 = xegpu.load_nd %5 : !xegpu.tensor_desc<4x8xf64, #sg_map_f64_c> -> vector<2x1xf64> diff --git a/test/Dialect/XeGPU/IR/load_nd_vc.mlir b/test/Dialect/XeGPU/IR/load_nd_vc.mlir index dd794285b..89f76e146 100644 --- a/test/Dialect/XeGPU/IR/load_nd_vc.mlir +++ b/test/Dialect/XeGPU/IR/load_nd_vc.mlir @@ -11,7 +11,7 @@ func.func @test_load_nd_simd_f32(%src: memref<24x32xf32>) { %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, boundary_check = true} + // CHECK-SAME: {mode = vc} // CHECK-SAME: memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32> %1 = xegpu.create_nd_tdesc %src[%c0, %c1] {mode = vc, boundary_check = true} : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32> @@ -32,7 +32,7 @@ func.func @test_load_nd_simd_f32(%src: memref<24x32xf32>) { func.func @test_load_nd_simd_f16(%src: memref<24x32xf16>, %x : index, %y : index) { // CHECK: xegpu.create_nd_tdesc // CHECK-SAME: %arg0[%arg1, %arg2] - // CHECK-SAME: {mode = vc, boundary_check = true} + // CHECK-SAME: {mode = vc} // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xegpu.create_nd_tdesc %src[%x, %y] {mode = vc, boundary_check = true} : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> @@ -49,7 +49,7 @@ func.func @test_load_nd_simd_bf16(%src: ui64, %w : index, %h : index, %x : index %c1 = arith.constant 1 : index // CHECK: xegpu.create_nd_tdesc // CHECK-SAME: %arg0[%arg3, %arg4], [%arg2, %arg1], [%arg1, %c1] - // CHECK-SAME: {mode = vc, boundary_check = true} + // CHECK-SAME: {mode = vc} // CHECK-SAME: ui64 -> !xegpu.tensor_desc<8x16xbf16> %1 = xegpu.create_nd_tdesc %src[%x, %y], [%h, %w], [%w, %c1] {mode = vc, boundary_check = true} : ui64 -> !xegpu.tensor_desc<8x16xbf16> // CHECK: xegpu.load_nd diff --git a/test/Dialect/XeGPU/IR/prefetch_nd_vc.mlir b/test/Dialect/XeGPU/IR/prefetch_nd_vc.mlir index 5d8a2fd0c..98dc8ccc0 100644 --- a/test/Dialect/XeGPU/IR/prefetch_nd_vc.mlir +++ b/test/Dialect/XeGPU/IR/prefetch_nd_vc.mlir @@ -19,7 +19,7 @@ func.func @test_prefetch_nd_tdesc_vc_0(%src: memref<24x32xf32>) { // CHECK-LABEL: func @test_prefetch_nd_tdesc_vc_1({{.*}}) { func.func @test_prefetch_nd_tdesc_vc_1(%src: memref<24x32xf16>, %x : index, %y : index) { // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, boundary_check = true} + // CHECK-SAME: {mode = vc} // CHECK-SAME: memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> %1 = xegpu.create_nd_tdesc %src[%x, %y] {mode = vc} : memref<24x32xf16> -> !xegpu.tensor_desc<8x16xf16> @@ -45,7 +45,7 @@ func.func @test_prefetch_nd_tdesc_vc_i8(%src: memref<24x32xi8>) { // CHECK-LABEL: func @test_prefetch_nd_tdesc_vc_bf16({{.*}}) { func.func @test_prefetch_nd_tdesc_vc_bf16(%src: memref<24x32xbf16>, %x : index, %y : index) { // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, boundary_check = true} + // CHECK-SAME: {mode = vc} // CHECK-SAME: memref<24x32xbf16> -> !xegpu.tensor_desc<8x16xbf16> %1 = xegpu.create_nd_tdesc %src[%x, %y] {mode = vc} : memref<24x32xbf16> -> !xegpu.tensor_desc<8x16xbf16> diff --git a/test/Dialect/XeGPU/IR/simple_gemm.mlir b/test/Dialect/XeGPU/IR/simple_gemm.mlir index 7c0d59827..53c659b38 100644 --- a/test/Dialect/XeGPU/IR/simple_gemm.mlir +++ b/test/Dialect/XeGPU/IR/simple_gemm.mlir @@ -6,9 +6,9 @@ // ---- BF16 ------ -#sg_map_fp16_a = #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}> -#sg_map_fp16_b = #xegpu.sg_map<{mma_block_size = [16, 16], wi_layout = [1, 16], wi_data = [1, 1]}> -#sg_map_fp16_c = #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [1, 16], wi_data = [1, 1]}> +#sg_map_fp16_a = #xegpu.sg_map +#sg_map_fp16_b = #xegpu.sg_map +#sg_map_fp16_c = #xegpu.sg_map // CHECK-LABEL: func @test_gemm_bf16({{.*}}) { func.func @test_gemm_bf16(%a : memref<1024x1024xbf16>, %b: memref<1024x1024xbf16>, %c: memref<1024x1024xf32>) { %c0 = arith.constant 0 : index @@ -25,12 +25,12 @@ func.func @test_gemm_bf16(%a : memref<1024x1024xbf16>, %b: memref<1024x1024xbf16 scf.for %j= %c0 to %c1024 step %c16 { // CHECK: xegpu.create_nd_tdesc // CHECK-SAME: memref<1024x1024xbf16> - // CHECK-SAME: -> !xegpu.tensor_desc<8x16xbf16, #xegpu.sg_map<{mma_block_size = [8, 16], wi_layout = [2, 8], wi_data = [1, 2]}>> + // CHECK-SAME: -> !xegpu.tensor_desc<8x16xbf16, #xegpu.sg_map> %1 = xegpu.create_nd_tdesc %a[%i, %c0] : memref<1024x1024xbf16> -> !xegpu.tensor_desc<8x16xbf16, #sg_map_fp16_a> // CHECK: xegpu.create_nd_tdesc // CHECK-SAME: memref<1024x1024xbf16> - // CHECK-SAME: -> !xegpu.tensor_desc<16x16xbf16, #xegpu.sg_map<{mma_block_size = [16, 16], wi_layout = [1, 16], wi_data = [1, 1]}>> + // CHECK-SAME: -> !xegpu.tensor_desc<16x16xbf16, #xegpu.sg_map> %2 = xegpu.create_nd_tdesc %b[%c0, %j] : memref<1024x1024xbf16> -> !xegpu.tensor_desc<16x16xbf16, #sg_map_fp16_b> %3 = arith.constant dense<0.0> : vector<8x1xf32> diff --git a/test/Dialect/XeGPU/IR/store_nd_vc.mlir b/test/Dialect/XeGPU/IR/store_nd_vc.mlir index 16a2824f1..e15b276a2 100644 --- a/test/Dialect/XeGPU/IR/store_nd_vc.mlir +++ b/test/Dialect/XeGPU/IR/store_nd_vc.mlir @@ -10,13 +10,13 @@ func.func @test_store_nd_vc_bf16(%src: memref<24x32xbf16>, %dst: memref<24x32xbf %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, boundary_check = true} + // CHECK-SAME: {mode = vc} // CHECK-SAME: memref<24x32xbf16> -> !xegpu.tensor_desc<8x16xbf16> %1 = xegpu.create_nd_tdesc %src[%c0, %c1] {mode = vc} : memref<24x32xbf16> -> !xegpu.tensor_desc<8x16xbf16> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, boundary_check = true} + // CHECK-SAME: {mode = vc} // CHECK-SAME: memref<24x32xbf16> -> !xegpu.tensor_desc<8x16xbf16> %2 = xegpu.create_nd_tdesc %dst[%c0, %c1] {mode = vc} : memref<24x32xbf16> -> !xegpu.tensor_desc<8x16xbf16> @@ -39,13 +39,13 @@ func.func @test_store_nd_vc_f64(%src: memref<24x32xf64>, %dst: memref<24x32xf64> %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, boundary_check = true} + // CHECK-SAME: {mode = vc} // CHECK-SAME: memref<24x32xf64> -> !xegpu.tensor_desc<8x16xf64> %1 = xegpu.create_nd_tdesc %src[%c0, %c1] {mode = vc} : memref<24x32xf64> -> !xegpu.tensor_desc<8x16xf64> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, boundary_check = true} + // CHECK-SAME: {mode = vc} // CHECK-SAME: memref<24x32xf64> -> !xegpu.tensor_desc<8x16xf64> %2 = xegpu.create_nd_tdesc %dst[%c0, %c1] {mode = vc} : memref<24x32xf64> -> !xegpu.tensor_desc<8x16xf64> @@ -68,13 +68,13 @@ func.func @test_store_nd_vc_i8(%src: memref<24x32xi8>, %dst: memref<24x32xi8>) { %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, boundary_check = true} + // CHECK-SAME: {mode = vc} // CHECK-SAME: memref<24x32xi8> -> !xegpu.tensor_desc<8x16xi8> %1 = xegpu.create_nd_tdesc %src[%c0, %c1] {mode = vc} : memref<24x32xi8> -> !xegpu.tensor_desc<8x16xi8> // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, boundary_check = true} + // CHECK-SAME: {mode = vc} // CHECK-SAME: memref<24x32xi8> -> !xegpu.tensor_desc<8x16xi8> %2 = xegpu.create_nd_tdesc %dst[%c0, %c1] {mode = vc} : memref<24x32xi8> -> !xegpu.tensor_desc<8x16xi8> diff --git a/test/Dialect/XeGPU/IR/store_scatter.mlir b/test/Dialect/XeGPU/IR/store_scatter.mlir index 8924aefb8..8c1bb1c38 100644 --- a/test/Dialect/XeGPU/IR/store_scatter.mlir +++ b/test/Dialect/XeGPU/IR/store_scatter.mlir @@ -5,29 +5,29 @@ // RUN: imex-opt -mlir-print-op-generic %s | imex-opt | FileCheck %s // CHECK-LABEL: func @test_store_scatter({{.*}}) { -func.func @test_store_scatter(%src: ui64, %offsets : index, %dst: ui64) { - %0 = arith.constant 1: i1 +func.func @test_store_scatter(%src: ui64, %offsets : vector<16xindex>, %dst: ui64) { + %0 = arith.constant dense: vector<16xi1> // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = simt, chunk_size_per_lane = 1} - // CHECK-SAME: ui64, index -> !xegpu.tensor_desc<1xf32, #xegpu.scattered> - %1 = xegpu.create_tdesc %src, %offsets - : ui64, index -> !xegpu.tensor_desc<1xf32, #xegpu.scattered> + // CHECK-SAME: {mode = vc} + // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> + %1 = xegpu.create_tdesc %src, %offsets {mode = vc} + : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = simt, chunk_size_per_lane = 1} - // CHECK-SAME: ui64, index -> !xegpu.tensor_desc<1xf32, #xegpu.scattered> - %2 = xegpu.create_tdesc %dst, %offsets - : ui64, index -> !xegpu.tensor_desc<1xf32, #xegpu.scattered> + // CHECK-SAME: {mode = vc} + // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> + %2 = xegpu.create_tdesc %dst, %offsets {mode = vc} + : ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> // CHECK: xegpu.load - // CHECK-SAME: {mode = simt, l1_hint = cached, l2_hint = uncached} - // CHECK-SAME: !xegpu.tensor_desc<1xf32, #xegpu.scattered>, i1 -> f32 - %3 = xegpu.load %1, %0 {l1_hint = cached, l2_hint = uncached} - : !xegpu.tensor_desc<1xf32, #xegpu.scattered>, i1 -> f32 + // CHECK-SAME: {mode = vc, l1_hint = cached, l2_hint = uncached} + // CHECK-SAME: !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1> -> vector<16xf32> + %3 = xegpu.load %1, %0 {mode = vc, l1_hint = cached, l2_hint = uncached} + : !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1> -> vector<16xf32> // CHECK: xegpu.store - // CHECK-SAME: {mode = simt, l1_hint = write_back, l2_hint = uncached} - // CHECK-SAME: f32, !xegpu.tensor_desc<1xf32, #xegpu.scattered>, i1 - xegpu.store %3, %2, %0 {l1_hint = write_back, l2_hint = uncached} - : f32, !xegpu.tensor_desc<1xf32, #xegpu.scattered>, i1 + // CHECK-SAME: {mode = vc, l1_hint = write_back, l2_hint = uncached} + // CHECK-SAME: vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1> + xegpu.store %3, %2, %0 {mode = vc, l1_hint = write_back, l2_hint = uncached} + : vector<16xf32>, !xegpu.tensor_desc<16xf32, #xegpu.scattered>, vector<16xi1> return } diff --git a/test/Dialect/XeGPU/IR/store_scatter_vc.mlir b/test/Dialect/XeGPU/IR/store_scatter_vc.mlir index e8650efb0..83f95487d 100644 --- a/test/Dialect/XeGPU/IR/store_scatter_vc.mlir +++ b/test/Dialect/XeGPU/IR/store_scatter_vc.mlir @@ -8,13 +8,13 @@ func.func @test_store_scatter_vc(%src: ui64, %offsets : vector<16 x index>, %dst: ui64) { %0 = arith.constant dense<1>: vector<16xi1> // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, chunk_size_per_lane = 1} + // CHECK-SAME: {mode = vc} // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets {mode = vc} : ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, chunk_size_per_lane = 1} + // CHECK-SAME: {mode = vc} // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> %2 = xegpu.create_tdesc %dst, %offsets {mode = vc} : ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> diff --git a/test/Dialect/XeGPU/IR/update_nd_offset.mlir b/test/Dialect/XeGPU/IR/update_nd_offset.mlir index 4de5560e0..342cb6ae0 100644 --- a/test/Dialect/XeGPU/IR/update_nd_offset.mlir +++ b/test/Dialect/XeGPU/IR/update_nd_offset.mlir @@ -9,7 +9,7 @@ func.func @test_update_nd_offset_vc_0(%src: memref<24x32xf32>) { %c1 = arith.constant 4 : index // CHECK: xegpu.create_nd_tdesc - // CHECK-SAME: {mode = vc, boundary_check = true} + // CHECK-SAME: {mode = vc} // CHECK-SAME: memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32> %1 = xegpu.create_nd_tdesc %src[%c0, %c1] {mode = vc} : memref<24x32xf32> -> !xegpu.tensor_desc<8x16xf32> diff --git a/test/Dialect/XeGPU/IR/update_offset_vc.mlir b/test/Dialect/XeGPU/IR/update_offset_vc.mlir index 812bbace2..e131d243a 100644 --- a/test/Dialect/XeGPU/IR/update_offset_vc.mlir +++ b/test/Dialect/XeGPU/IR/update_offset_vc.mlir @@ -8,7 +8,7 @@ func.func @test_update_offset_VC(%src: ui64, %offsets : vector<16 x index>) { %0 = arith.constant dense<1>: vector<16xi1> // CHECK: xegpu.create_tdesc - // CHECK-SAME: {mode = vc, chunk_size_per_lane = 1} + // CHECK-SAME: {mode = vc} // CHECK-SAME: ui64, vector<16xindex> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered> %1 = xegpu.create_tdesc %src, %offsets {mode = vc} : ui64, vector<16 x index> -> !xegpu.tensor_desc<16xf32, #xegpu.scattered>