@@ -146,7 +146,7 @@ SYCL_DEVICE_OCL(void intel_sub_group_2d_block_prefetch_32b_16r8x1c(
146
146
cute::intel::coord_t coord));
147
147
148
148
namespace cute ::detail {
149
- #if defined(CUTE_ARCH_COPY_XE_BUILTIN_ENABLED )
149
+ #if defined(CUTE_ARCH_XE_BUILTIN_ENABLED )
150
150
template <>
151
151
struct XeSubgroup2DBlockLoad <4 , 16 , 1 , 1 > {
152
152
template <typename T>
@@ -432,7 +432,6 @@ struct XeSubgroup2DBlockStore<4, 16, 8, 1> {
432
432
reinterpret_cast <long >(dstBasePointer), memoryWidth - 1 , memoryHeight - 1 , memoryPitch - 1 , coordinate, *(intel::uint8 *)(srcPointer));
433
433
}
434
434
};
435
- #endif
436
435
437
436
template <>
438
437
struct XeSubgroup2DBlockPrefetch <4 , 8 , 16 , 1 > {
@@ -443,7 +442,7 @@ struct XeSubgroup2DBlockPrefetch<4, 8, 16, 1> {
443
442
(__global void *)(srcBasePointer), memoryWidth - 1 , memoryHeight - 1 , memoryPitch - 1 , coordinate);
444
443
}
445
444
};
446
-
445
+ # endif
447
446
} // namespace cute::detail end
448
447
449
448
namespace cute
@@ -455,7 +454,7 @@ struct XE_2D_U32x1x16_LD_N {
455
454
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
456
455
int height, int pitch, intel::coord_t coord,
457
456
T *dst) {
458
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
457
+ #if defined(CUTE_ARCH_XE_ENABLED )
459
458
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
460
459
detail::XeSubgroup2DBlockLoad<4 , 16 , 1 , 1 >{}(baseoffset, width, height, pitch, coord, dst);
461
460
#else
@@ -471,7 +470,7 @@ struct XE_2D_U32x2x16_LD_N {
471
470
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
472
471
int height, int pitch, intel::coord_t coord,
473
472
T *dst) {
474
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
473
+ #if defined(CUTE_ARCH_XE_ENABLED )
475
474
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
476
475
detail::XeSubgroup2DBlockLoad<4 , 16 , 2 , 1 >{}(baseoffset, width, height, pitch, coord, dst);
477
476
#else
@@ -487,7 +486,7 @@ struct XE_2D_U32x4x16_LD_N {
487
486
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
488
487
int height, int pitch, intel::coord_t coord,
489
488
T *dst) {
490
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
489
+ #if defined(CUTE_ARCH_XE_ENABLED )
491
490
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
492
491
detail::XeSubgroup2DBlockLoad<4 , 16 , 4 , 1 >{}(baseoffset, width, height, pitch, coord, dst);
493
492
#else
@@ -503,7 +502,7 @@ struct XE_2D_U32x8x16_LD_N {
503
502
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
504
503
int height, int pitch, intel::coord_t coord,
505
504
T *dst) {
506
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
505
+ #if defined(CUTE_ARCH_XE_ENABLED )
507
506
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
508
507
detail::XeSubgroup2DBlockLoad<4 , 16 , 8 , 1 >{}(baseoffset, width, height, pitch, coord, dst);
509
508
#else
@@ -519,7 +518,7 @@ struct XE_2D_U32x16x16_LD_N {
519
518
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
520
519
int height, int pitch, intel::coord_t coord,
521
520
T *dst) {
522
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
521
+ #if defined(CUTE_ARCH_XE_ENABLED )
523
522
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
524
523
detail::XeSubgroup2DBlockLoad<4 , 16 , 16 , 1 >{}(baseoffset, width, height, pitch, coord, dst);
525
524
#else
@@ -535,7 +534,7 @@ struct XE_2D_U32x32x16_LD_N {
535
534
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
536
535
int height, int pitch, intel::coord_t coord,
537
536
T *dst) {
538
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
537
+ #if defined(CUTE_ARCH_XE_ENABLED )
539
538
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
540
539
detail::XeSubgroup2DBlockLoad<4 , 16 , 32 , 1 >{}(baseoffset, width, height, pitch, coord, dst);
541
540
#else
@@ -551,7 +550,7 @@ struct XE_2D_TF32x1x8_LD_N {
551
550
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
552
551
int height, int pitch, intel::coord_t coord,
553
552
T *dst) {
554
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
553
+ #if defined(CUTE_ARCH_XE_ENABLED )
555
554
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
556
555
detail::XeSubgroup2DBlockLoad<4 , 8 , 1 , 1 >{}(baseoffset, width, height, pitch, coord, dst);
557
556
#else
@@ -568,7 +567,7 @@ struct XE_2D_TF32x2x8_LD_N {
568
567
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
569
568
int height, int pitch, intel::coord_t coord,
570
569
T *dst) {
571
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
570
+ #if defined(CUTE_ARCH_XE_ENABLED )
572
571
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
573
572
detail::XeSubgroup2DBlockLoad<4 , 8 , 2 , 1 >{}(baseoffset, width, height, pitch, coord, dst);
574
573
#else
@@ -585,7 +584,7 @@ struct XE_2D_TF32x4x8_LD_N {
585
584
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
586
585
int height, int pitch, intel::coord_t coord,
587
586
T *dst) {
588
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
587
+ #if defined(CUTE_ARCH_XE_ENABLED )
589
588
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
590
589
detail::XeSubgroup2DBlockLoad<4 , 8 , 4 , 1 >{}(baseoffset, width, height, pitch, coord, dst);
591
590
#else
@@ -602,7 +601,7 @@ struct XE_2D_TF32x8x8_LD_N {
602
601
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
603
602
int height, int pitch, intel::coord_t coord,
604
603
T *dst) {
605
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
604
+ #if defined(CUTE_ARCH_XE_ENABLED )
606
605
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
607
606
detail::XeSubgroup2DBlockLoad<4 , 8 , 8 , 1 >{}(baseoffset, width, height, pitch, coord, dst);
608
607
#else
@@ -619,7 +618,7 @@ struct XE_2D_TF32x16x8_LD_N {
619
618
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
620
619
int height, int pitch, intel::coord_t coord,
621
620
T *dst) {
622
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
621
+ #if defined(CUTE_ARCH_XE_ENABLED )
623
622
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
624
623
detail::XeSubgroup2DBlockLoad<4 , 8 , 16 , 1 >{}(baseoffset, width, height, pitch, coord, dst);
625
624
#else
@@ -636,7 +635,7 @@ struct XE_2D_TF32x32x8_LD_N {
636
635
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
637
636
int height, int pitch, intel::coord_t coord,
638
637
T *dst) {
639
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
638
+ #if defined(CUTE_ARCH_XE_ENABLED )
640
639
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
641
640
detail::XeSubgroup2DBlockLoad<4 , 8 , 32 , 1 >{}(baseoffset, width, height, pitch, coord, dst);
642
641
#else
@@ -652,7 +651,7 @@ struct XE_2D_TF32x1x16_LD_N {
652
651
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
653
652
int height, int pitch, intel::coord_t coord,
654
653
T *dst) {
655
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
654
+ #if defined(CUTE_ARCH_XE_ENABLED )
656
655
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
657
656
detail::XeSubgroup2DBlockLoad<4 , 8 , 1 , 2 >{}(baseoffset, width, height, pitch, coord, dst);
658
657
#else
@@ -669,7 +668,7 @@ struct XE_2D_TF32x2x16_LD_N {
669
668
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
670
669
int height, int pitch, intel::coord_t coord,
671
670
T *dst) {
672
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
671
+ #if defined(CUTE_ARCH_XE_ENABLED )
673
672
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
674
673
detail::XeSubgroup2DBlockLoad<4 , 8 , 2 , 2 >{}(baseoffset, width, height, pitch, coord, dst);
675
674
#else
@@ -686,7 +685,7 @@ struct XE_2D_TF32x4x16_LD_N {
686
685
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
687
686
int height, int pitch, intel::coord_t coord,
688
687
T *dst) {
689
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
688
+ #if defined(CUTE_ARCH_XE_ENABLED )
690
689
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
691
690
detail::XeSubgroup2DBlockLoad<4 , 8 , 4 , 2 >{}(baseoffset, width, height, pitch, coord, dst);
692
691
#else
@@ -703,7 +702,7 @@ struct XE_2D_TF32x8x16_LD_N {
703
702
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
704
703
int height, int pitch, intel::coord_t coord,
705
704
T *dst) {
706
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
705
+ #if defined(CUTE_ARCH_XE_ENABLED )
707
706
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
708
707
detail::XeSubgroup2DBlockLoad<4 , 8 , 8 , 2 >{}(baseoffset, width, height, pitch, coord, dst);
709
708
#else
@@ -720,7 +719,7 @@ struct XE_2D_TF32x16x16_LD_N {
720
719
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
721
720
int height, int pitch, intel::coord_t coord,
722
721
T *dst) {
723
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
722
+ #if defined(CUTE_ARCH_XE_ENABLED )
724
723
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
725
724
detail::XeSubgroup2DBlockLoad<4 , 8 , 16 , 2 >{}(baseoffset, width, height, pitch, coord, dst);
726
725
#else
@@ -737,7 +736,7 @@ struct XE_2D_TF32x32x16_LD_N {
737
736
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
738
737
int height, int pitch, intel::coord_t coord,
739
738
T *dst) {
740
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
739
+ #if defined(CUTE_ARCH_XE_ENABLED )
741
740
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
742
741
detail::XeSubgroup2DBlockLoad<4 , 8 , 32 , 2 >{}(baseoffset, width, height, pitch, coord, dst);
743
742
#else
@@ -754,7 +753,7 @@ struct XE_2D_U32x16x1_LD_T {
754
753
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
755
754
int height, int pitch, intel::coord_t coord,
756
755
T *dst) {
757
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
756
+ #if defined(CUTE_ARCH_XE_ENABLED )
758
757
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
759
758
detail::XeSubgroup2DBlockTranspose<4 , 1 , 16 , 1 >{}(baseoffset, width, height, pitch, coord, dst);
760
759
#else
@@ -772,7 +771,7 @@ struct XE_2D_U32x16x2_LD_T {
772
771
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
773
772
int height, int pitch, intel::coord_t coord,
774
773
T *dst) {
775
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
774
+ #if defined(CUTE_ARCH_XE_ENABLED )
776
775
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
777
776
detail::XeSubgroup2DBlockTranspose<4 , 2 , 16 , 1 >{}(baseoffset, width, height, pitch, coord, dst);
778
777
#else
@@ -790,7 +789,7 @@ struct XE_2D_U32x16x4_LD_T {
790
789
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
791
790
int height, int pitch, intel::coord_t coord,
792
791
T *dst) {
793
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
792
+ #if defined(CUTE_ARCH_XE_ENABLED )
794
793
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
795
794
detail::XeSubgroup2DBlockTranspose<4 , 4 , 16 , 1 >{}(baseoffset, width, height, pitch, coord, dst);
796
795
#else
@@ -808,7 +807,7 @@ struct XE_2D_U32x16x8_LD_T {
808
807
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
809
808
int height, int pitch, intel::coord_t coord,
810
809
T *dst) {
811
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
810
+ #if defined(CUTE_ARCH_XE_ENABLED )
812
811
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
813
812
detail::XeSubgroup2DBlockTranspose<4 , 8 , 16 , 1 >{}(baseoffset, width, height, pitch, coord, dst);
814
813
#else
@@ -820,7 +819,7 @@ struct XE_2D_U32x16x8_LD_T {
820
819
CUTE_HOST_DEVICE static void copy (const void *baseoffset, int width,
821
820
int height, int pitch,
822
821
intel::coord_t coord) {
823
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
822
+ #if defined(CUTE_ARCH_XE_ENABLED )
824
823
detail::XeSubgroup2DBlockPrefetch<4 , 8 , 16 , 1 >{}(baseoffset, width, height, pitch, coord);
825
824
#else
826
825
CUTE_INVALID_CONTROL_PATH (
@@ -837,7 +836,7 @@ struct XE_2D_U32x1x16_ST_N {
837
836
CUTE_HOST_DEVICE static void copy (void *baseoffset, int width, int height,
838
837
int pitch, intel::coord_t coord,
839
838
const T *src) {
840
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
839
+ #if defined(CUTE_ARCH_XE_ENABLED )
841
840
// static_assert(sizeof(T) == 4, "Expected T to have size 4");
842
841
detail::XeSubgroup2DBlockStore<4 , 16 , 1 , 1 >{}(baseoffset, width, height, pitch, coord, src);
843
842
#else
@@ -853,7 +852,7 @@ struct XE_2D_U32x2x16_ST_N {
853
852
CUTE_HOST_DEVICE static void copy (void *baseoffset, int width, int height,
854
853
int pitch, intel::coord_t coord,
855
854
const T *src) {
856
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
855
+ #if defined(CUTE_ARCH_XE_ENABLED )
857
856
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
858
857
detail::XeSubgroup2DBlockStore<4 , 16 , 2 , 1 >{}(baseoffset, width, height, pitch, coord, src);
859
858
#else
@@ -869,7 +868,7 @@ struct XE_2D_U32x4x16_ST_N {
869
868
CUTE_HOST_DEVICE static void copy (void *baseoffset, int width, int height,
870
869
int pitch, intel::coord_t coord,
871
870
const T *src) {
872
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
871
+ #if defined(CUTE_ARCH_XE_ENABLED )
873
872
static_assert (sizeof (T) == 4 , " Expected T to have size 4" );
874
873
detail::XeSubgroup2DBlockStore<4 , 16 , 4 , 1 >{}(baseoffset, width, height, pitch, coord, src);
875
874
#else
@@ -885,7 +884,7 @@ struct XE_2D_U32x8x16_ST_N {
885
884
CUTE_HOST_DEVICE static void copy (void *baseoffset, int width, int height,
886
885
int pitch, intel::coord_t coord,
887
886
const T *src) {
888
- #if defined(CUTE_ARCH_COPY_XE_ENABLED )
887
+ #if defined(CUTE_ARCH_XE_ENABLED )
889
888
// static_assert(sizeof(T) == 4, "Expected T to have size 4");
890
889
detail::XeSubgroup2DBlockStore<4 , 16 , 8 , 1 >{}(baseoffset, width, height, pitch, coord, src);
891
890
#else
0 commit comments