Skip to content

Commit 73bef6e

Browse files
committed
mma spirv api
1 parent a6c8e53 commit 73bef6e

File tree

11 files changed

+792
-409
lines changed

11 files changed

+792
-409
lines changed

include/cute/arch/mma_xe.hpp

+184-167
Large diffs are not rendered by default.

include/cute/arch/xe_config.hpp

+258-55
Large diffs are not rendered by default.

include/cute/arch/xe_copy_1B.hpp

+103-98
Large diffs are not rendered by default.

include/cute/arch/xe_copy_2B.hpp

+37-38
Large diffs are not rendered by default.

include/cute/arch/xe_copy_4B.hpp

+29-30
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ SYCL_DEVICE_OCL(void intel_sub_group_2d_block_prefetch_32b_16r8x1c(
146146
cute::intel::coord_t coord));
147147

148148
namespace cute::detail {
149-
#if defined(CUTE_ARCH_COPY_XE_BUILTIN_ENABLED)
149+
#if defined(CUTE_ARCH_XE_BUILTIN_ENABLED)
150150
template<>
151151
struct XeSubgroup2DBlockLoad<4, 16, 1, 1> {
152152
template<typename T>
@@ -432,7 +432,6 @@ struct XeSubgroup2DBlockStore<4, 16, 8, 1> {
432432
reinterpret_cast<long>(dstBasePointer), memoryWidth - 1, memoryHeight - 1, memoryPitch - 1, coordinate, *(intel::uint8 *)(srcPointer));
433433
}
434434
};
435-
#endif
436435

437436
template<>
438437
struct XeSubgroup2DBlockPrefetch<4, 8, 16, 1> {
@@ -443,7 +442,7 @@ struct XeSubgroup2DBlockPrefetch<4, 8, 16, 1> {
443442
(__global void*)(srcBasePointer), memoryWidth - 1, memoryHeight - 1, memoryPitch - 1, coordinate);
444443
}
445444
};
446-
445+
#endif
447446
} // namespace cute::detail end
448447

449448
namespace cute
@@ -455,7 +454,7 @@ struct XE_2D_U32x1x16_LD_N {
455454
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
456455
int height, int pitch, intel::coord_t coord,
457456
T *dst) {
458-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
457+
#if defined(CUTE_ARCH_XE_ENABLED)
459458
static_assert(sizeof(T) == 4, "Expected T to have size 4");
460459
detail::XeSubgroup2DBlockLoad<4, 16, 1, 1>{}(baseoffset, width, height, pitch, coord, dst);
461460
#else
@@ -471,7 +470,7 @@ struct XE_2D_U32x2x16_LD_N {
471470
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
472471
int height, int pitch, intel::coord_t coord,
473472
T *dst) {
474-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
473+
#if defined(CUTE_ARCH_XE_ENABLED)
475474
static_assert(sizeof(T) == 4, "Expected T to have size 4");
476475
detail::XeSubgroup2DBlockLoad<4, 16, 2, 1>{}(baseoffset, width, height, pitch, coord, dst);
477476
#else
@@ -487,7 +486,7 @@ struct XE_2D_U32x4x16_LD_N {
487486
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
488487
int height, int pitch, intel::coord_t coord,
489488
T *dst) {
490-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
489+
#if defined(CUTE_ARCH_XE_ENABLED)
491490
static_assert(sizeof(T) == 4, "Expected T to have size 4");
492491
detail::XeSubgroup2DBlockLoad<4, 16, 4, 1>{}(baseoffset, width, height, pitch, coord, dst);
493492
#else
@@ -503,7 +502,7 @@ struct XE_2D_U32x8x16_LD_N {
503502
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
504503
int height, int pitch, intel::coord_t coord,
505504
T *dst) {
506-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
505+
#if defined(CUTE_ARCH_XE_ENABLED)
507506
static_assert(sizeof(T) == 4, "Expected T to have size 4");
508507
detail::XeSubgroup2DBlockLoad<4, 16, 8, 1>{}(baseoffset, width, height, pitch, coord, dst);
509508
#else
@@ -519,7 +518,7 @@ struct XE_2D_U32x16x16_LD_N {
519518
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
520519
int height, int pitch, intel::coord_t coord,
521520
T *dst) {
522-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
521+
#if defined(CUTE_ARCH_XE_ENABLED)
523522
static_assert(sizeof(T) == 4, "Expected T to have size 4");
524523
detail::XeSubgroup2DBlockLoad<4, 16, 16, 1>{}(baseoffset, width, height, pitch, coord, dst);
525524
#else
@@ -535,7 +534,7 @@ struct XE_2D_U32x32x16_LD_N {
535534
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
536535
int height, int pitch, intel::coord_t coord,
537536
T *dst) {
538-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
537+
#if defined(CUTE_ARCH_XE_ENABLED)
539538
static_assert(sizeof(T) == 4, "Expected T to have size 4");
540539
detail::XeSubgroup2DBlockLoad<4, 16, 32, 1>{}(baseoffset, width, height, pitch, coord, dst);
541540
#else
@@ -551,7 +550,7 @@ struct XE_2D_TF32x1x8_LD_N {
551550
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
552551
int height, int pitch, intel::coord_t coord,
553552
T *dst) {
554-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
553+
#if defined(CUTE_ARCH_XE_ENABLED)
555554
static_assert(sizeof(T) == 4, "Expected T to have size 4");
556555
detail::XeSubgroup2DBlockLoad<4, 8, 1, 1>{}(baseoffset, width, height, pitch, coord, dst);
557556
#else
@@ -568,7 +567,7 @@ struct XE_2D_TF32x2x8_LD_N {
568567
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
569568
int height, int pitch, intel::coord_t coord,
570569
T *dst) {
571-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
570+
#if defined(CUTE_ARCH_XE_ENABLED)
572571
static_assert(sizeof(T) == 4, "Expected T to have size 4");
573572
detail::XeSubgroup2DBlockLoad<4, 8, 2, 1>{}(baseoffset, width, height, pitch, coord, dst);
574573
#else
@@ -585,7 +584,7 @@ struct XE_2D_TF32x4x8_LD_N {
585584
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
586585
int height, int pitch, intel::coord_t coord,
587586
T *dst) {
588-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
587+
#if defined(CUTE_ARCH_XE_ENABLED)
589588
static_assert(sizeof(T) == 4, "Expected T to have size 4");
590589
detail::XeSubgroup2DBlockLoad<4, 8, 4, 1>{}(baseoffset, width, height, pitch, coord, dst);
591590
#else
@@ -602,7 +601,7 @@ struct XE_2D_TF32x8x8_LD_N {
602601
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
603602
int height, int pitch, intel::coord_t coord,
604603
T *dst) {
605-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
604+
#if defined(CUTE_ARCH_XE_ENABLED)
606605
static_assert(sizeof(T) == 4, "Expected T to have size 4");
607606
detail::XeSubgroup2DBlockLoad<4, 8, 8, 1>{}(baseoffset, width, height, pitch, coord, dst);
608607
#else
@@ -619,7 +618,7 @@ struct XE_2D_TF32x16x8_LD_N {
619618
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
620619
int height, int pitch, intel::coord_t coord,
621620
T *dst) {
622-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
621+
#if defined(CUTE_ARCH_XE_ENABLED)
623622
static_assert(sizeof(T) == 4, "Expected T to have size 4");
624623
detail::XeSubgroup2DBlockLoad<4, 8, 16, 1>{}(baseoffset, width, height, pitch, coord, dst);
625624
#else
@@ -636,7 +635,7 @@ struct XE_2D_TF32x32x8_LD_N {
636635
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
637636
int height, int pitch, intel::coord_t coord,
638637
T *dst) {
639-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
638+
#if defined(CUTE_ARCH_XE_ENABLED)
640639
static_assert(sizeof(T) == 4, "Expected T to have size 4");
641640
detail::XeSubgroup2DBlockLoad<4, 8, 32, 1>{}(baseoffset, width, height, pitch, coord, dst);
642641
#else
@@ -652,7 +651,7 @@ struct XE_2D_TF32x1x16_LD_N {
652651
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
653652
int height, int pitch, intel::coord_t coord,
654653
T *dst) {
655-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
654+
#if defined(CUTE_ARCH_XE_ENABLED)
656655
static_assert(sizeof(T) == 4, "Expected T to have size 4");
657656
detail::XeSubgroup2DBlockLoad<4, 8, 1, 2>{}(baseoffset, width, height, pitch, coord, dst);
658657
#else
@@ -669,7 +668,7 @@ struct XE_2D_TF32x2x16_LD_N {
669668
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
670669
int height, int pitch, intel::coord_t coord,
671670
T *dst) {
672-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
671+
#if defined(CUTE_ARCH_XE_ENABLED)
673672
static_assert(sizeof(T) == 4, "Expected T to have size 4");
674673
detail::XeSubgroup2DBlockLoad<4, 8, 2, 2>{}(baseoffset, width, height, pitch, coord, dst);
675674
#else
@@ -686,7 +685,7 @@ struct XE_2D_TF32x4x16_LD_N {
686685
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
687686
int height, int pitch, intel::coord_t coord,
688687
T *dst) {
689-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
688+
#if defined(CUTE_ARCH_XE_ENABLED)
690689
static_assert(sizeof(T) == 4, "Expected T to have size 4");
691690
detail::XeSubgroup2DBlockLoad<4, 8, 4, 2>{}(baseoffset, width, height, pitch, coord, dst);
692691
#else
@@ -703,7 +702,7 @@ struct XE_2D_TF32x8x16_LD_N {
703702
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
704703
int height, int pitch, intel::coord_t coord,
705704
T *dst) {
706-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
705+
#if defined(CUTE_ARCH_XE_ENABLED)
707706
static_assert(sizeof(T) == 4, "Expected T to have size 4");
708707
detail::XeSubgroup2DBlockLoad<4, 8, 8, 2>{}(baseoffset, width, height, pitch, coord, dst);
709708
#else
@@ -720,7 +719,7 @@ struct XE_2D_TF32x16x16_LD_N {
720719
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
721720
int height, int pitch, intel::coord_t coord,
722721
T *dst) {
723-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
722+
#if defined(CUTE_ARCH_XE_ENABLED)
724723
static_assert(sizeof(T) == 4, "Expected T to have size 4");
725724
detail::XeSubgroup2DBlockLoad<4, 8, 16, 2>{}(baseoffset, width, height, pitch, coord, dst);
726725
#else
@@ -737,7 +736,7 @@ struct XE_2D_TF32x32x16_LD_N {
737736
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
738737
int height, int pitch, intel::coord_t coord,
739738
T *dst) {
740-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
739+
#if defined(CUTE_ARCH_XE_ENABLED)
741740
static_assert(sizeof(T) == 4, "Expected T to have size 4");
742741
detail::XeSubgroup2DBlockLoad<4, 8, 32, 2>{}(baseoffset, width, height, pitch, coord, dst);
743742
#else
@@ -754,7 +753,7 @@ struct XE_2D_U32x16x1_LD_T {
754753
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
755754
int height, int pitch, intel::coord_t coord,
756755
T *dst) {
757-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
756+
#if defined(CUTE_ARCH_XE_ENABLED)
758757
static_assert(sizeof(T) == 4, "Expected T to have size 4");
759758
detail::XeSubgroup2DBlockTranspose<4, 1, 16, 1>{}(baseoffset, width, height, pitch, coord, dst);
760759
#else
@@ -772,7 +771,7 @@ struct XE_2D_U32x16x2_LD_T {
772771
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
773772
int height, int pitch, intel::coord_t coord,
774773
T *dst) {
775-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
774+
#if defined(CUTE_ARCH_XE_ENABLED)
776775
static_assert(sizeof(T) == 4, "Expected T to have size 4");
777776
detail::XeSubgroup2DBlockTranspose<4, 2, 16, 1>{}(baseoffset, width, height, pitch, coord, dst);
778777
#else
@@ -790,7 +789,7 @@ struct XE_2D_U32x16x4_LD_T {
790789
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
791790
int height, int pitch, intel::coord_t coord,
792791
T *dst) {
793-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
792+
#if defined(CUTE_ARCH_XE_ENABLED)
794793
static_assert(sizeof(T) == 4, "Expected T to have size 4");
795794
detail::XeSubgroup2DBlockTranspose<4, 4, 16, 1>{}(baseoffset, width, height, pitch, coord, dst);
796795
#else
@@ -808,7 +807,7 @@ struct XE_2D_U32x16x8_LD_T {
808807
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
809808
int height, int pitch, intel::coord_t coord,
810809
T *dst) {
811-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
810+
#if defined(CUTE_ARCH_XE_ENABLED)
812811
static_assert(sizeof(T) == 4, "Expected T to have size 4");
813812
detail::XeSubgroup2DBlockTranspose<4, 8, 16, 1>{}(baseoffset, width, height, pitch, coord, dst);
814813
#else
@@ -820,7 +819,7 @@ struct XE_2D_U32x16x8_LD_T {
820819
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
821820
int height, int pitch,
822821
intel::coord_t coord) {
823-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
822+
#if defined(CUTE_ARCH_XE_ENABLED)
824823
detail::XeSubgroup2DBlockPrefetch<4, 8, 16, 1>{}(baseoffset, width, height, pitch, coord);
825824
#else
826825
CUTE_INVALID_CONTROL_PATH(
@@ -837,7 +836,7 @@ struct XE_2D_U32x1x16_ST_N {
837836
CUTE_HOST_DEVICE static void copy(void *baseoffset, int width, int height,
838837
int pitch, intel::coord_t coord,
839838
const T *src) {
840-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
839+
#if defined(CUTE_ARCH_XE_ENABLED)
841840
// static_assert(sizeof(T) == 4, "Expected T to have size 4");
842841
detail::XeSubgroup2DBlockStore<4, 16, 1, 1>{}(baseoffset, width, height, pitch, coord, src);
843842
#else
@@ -853,7 +852,7 @@ struct XE_2D_U32x2x16_ST_N {
853852
CUTE_HOST_DEVICE static void copy(void *baseoffset, int width, int height,
854853
int pitch, intel::coord_t coord,
855854
const T *src) {
856-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
855+
#if defined(CUTE_ARCH_XE_ENABLED)
857856
static_assert(sizeof(T) == 4, "Expected T to have size 4");
858857
detail::XeSubgroup2DBlockStore<4, 16, 2, 1>{}(baseoffset, width, height, pitch, coord, src);
859858
#else
@@ -869,7 +868,7 @@ struct XE_2D_U32x4x16_ST_N {
869868
CUTE_HOST_DEVICE static void copy(void *baseoffset, int width, int height,
870869
int pitch, intel::coord_t coord,
871870
const T *src) {
872-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
871+
#if defined(CUTE_ARCH_XE_ENABLED)
873872
static_assert(sizeof(T) == 4, "Expected T to have size 4");
874873
detail::XeSubgroup2DBlockStore<4, 16, 4, 1>{}(baseoffset, width, height, pitch, coord, src);
875874
#else
@@ -885,7 +884,7 @@ struct XE_2D_U32x8x16_ST_N {
885884
CUTE_HOST_DEVICE static void copy(void *baseoffset, int width, int height,
886885
int pitch, intel::coord_t coord,
887886
const T *src) {
888-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
887+
#if defined(CUTE_ARCH_XE_ENABLED)
889888
// static_assert(sizeof(T) == 4, "Expected T to have size 4");
890889
detail::XeSubgroup2DBlockStore<4, 16, 8, 1>{}(baseoffset, width, height, pitch, coord, src);
891890
#else

include/cute/arch/xe_copy_8B.hpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ SYCL_DEVICE_BUILTIN(
4949
int pitch_minus_one, cute::intel::coord_t coord));
5050

5151

52-
#if defined(CUTE_ARCH_COPY_XE_BUILTIN_ENABLED)
52+
#if defined(CUTE_ARCH_XE_BUILTIN_ENABLED)
5353
namespace cute::detail
5454
{
5555
template<>
@@ -95,7 +95,7 @@ struct XE_2D_U64x8x1_LD_T {
9595
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
9696
int height, int pitch, intel::coord_t coord,
9797
T *dst) {
98-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
98+
#if defined(CUTE_ARCH_XE_ENABLED)
9999
static_assert(sizeof(T) == 8, "Expected T to have size 8");
100100
detail::XeSubgroup2DBlockTranspose<8, 1, 8, 1>{}(baseoffset, width, height, pitch, coord, dst);
101101
#else
@@ -111,7 +111,7 @@ struct XE_2D_U64x8x2_LD_T {
111111
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
112112
int height, int pitch, intel::coord_t coord,
113113
T *dst) {
114-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
114+
#if defined(CUTE_ARCH_XE_ENABLED)
115115
static_assert(sizeof(T) == 8, "Expected T to have size 8");
116116
detail::XeSubgroup2DBlockTranspose<8, 2, 8, 1>{}(baseoffset, width, height, pitch, coord, dst);
117117
#else
@@ -127,7 +127,7 @@ struct XE_2D_U64x8x4_LD_T {
127127
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
128128
int height, int pitch, intel::coord_t coord,
129129
T *dst) {
130-
#if defined(CUTE_ARCH_COPY_XE_ENABLED)
130+
#if defined(CUTE_ARCH_XE_ENABLED)
131131
static_assert(sizeof(T) == 8, "Expected T to have size 8");
132132
detail::XeSubgroup2DBlockTranspose<8, 4, 8, 1>{}(baseoffset, width, height, pitch, coord, dst);
133133
#else

0 commit comments

Comments
 (0)