@@ -828,6 +828,60 @@ struct Copy_Traits_<XE_2D_U8x32x64_LD_N::PREFETCH, args_t...>
828
828
using RefLayout = DstLayout;
829
829
};
830
830
831
+ template <class ... args_t >
832
+ struct Copy_Traits_ <XE_2D_U8x16x32_LD_T, args_t ...>
833
+ : XE_2D_LD_Unpack<XE_2D_U8x16x32_LD_T, args_t ...> {
834
+ using ThrID = Layout<_16>;
835
+ // Map from (src-thr,src-val) to bit
836
+ using SrcLayout = Layout<Shape <_16,Shape <_8,_32>>,
837
+ Stride< _0, Stride<_1, _8>>>;
838
+ // Map from (dst-thr,dst-val) to bit
839
+ using DstLayout = Layout<Shape < _16,Shape <_8,_32>>,
840
+ Stride<_256,Stride< _1,_8>>>;
841
+ // Reference map from (thr,val) to bit
842
+ using RefLayout = DstLayout;
843
+
844
+ template <class ... ArgT>
845
+ Copy_Traits_ (ArgT... args)
846
+ : XE_2D_LD_Unpack<XE_2D_U8x16x32_LD_T, args_t ...>(args...) {}
847
+ };
848
+
849
+ template <class ... args_t >
850
+ struct Copy_Traits_ <XE_2D_U8x32x8_LD_T, args_t ...>
851
+ : XE_2D_LD_Unpack<XE_2D_U8x32x8_LD_T, args_t ...> {
852
+ using ThrID = Layout<_16>;
853
+ // Map from (src-thr,src-val) to bit
854
+ using SrcLayout = Layout<Shape <_16,Shape <_8, _2, _8>>,
855
+ Stride<_0, Stride<_1, _8, _16>>>;
856
+ // Map from (dst-thr,dst-val) to bit
857
+ using DstLayout = Layout<Shape < _16,Shape <_8, _2, _8>>,
858
+ Stride<_256,Stride<_1, _8, _16>>>;
859
+ // Reference map from (thr,val) to bit
860
+ using RefLayout = DstLayout;
861
+
862
+ template <class ... ArgT>
863
+ Copy_Traits_ (ArgT... args)
864
+ : XE_2D_LD_Unpack<XE_2D_U8x32x8_LD_T, args_t ...>(args...) {}
865
+ };
866
+
867
+ template <class ... args_t >
868
+ struct Copy_Traits_ <XE_2D_U8x32x4_LD_T, args_t ...>
869
+ : XE_2D_LD_Unpack<XE_2D_U8x32x4_LD_T, args_t ...> {
870
+ using ThrID = Layout<_16>;
871
+ // Map from (src-thr,src-val) to bit
872
+ using SrcLayout = Layout<Shape <_16,Shape <_8, _2, _4>>,
873
+ Stride<_0, Stride<_1, _8, _16>>>;
874
+ // Map from (dst-thr,dst-val) to bit
875
+ using DstLayout = Layout<Shape < _16,Shape <_8, _2, _4>>,
876
+ Stride<_256,Stride<_1, _8, _16>>>;
877
+ // Reference map from (thr,val) to bit
878
+ using RefLayout = DstLayout;
879
+
880
+ template <class ... ArgT>
881
+ Copy_Traits_ (ArgT... args)
882
+ : XE_2D_LD_Unpack<XE_2D_U8x32x4_LD_T, args_t ...>(args...) {}
883
+ };
884
+
831
885
template <class ... args_t >
832
886
struct Copy_Traits_ <XE_2D_U16x1x16_LD_N, args_t ...>
833
887
: XE_2D_LD_Unpack<XE_2D_U16x1x16_LD_N, args_t ...> {
@@ -1403,6 +1457,24 @@ struct Copy_Traits_<XE_2D_TF32x32x16_LD_N, args_t...>
1403
1457
: XE_2D_LD_Unpack<XE_2D_TF32x32x16_LD_N, args_t ...>(args...) {}
1404
1458
};
1405
1459
1460
+ template <class ... args_t >
1461
+ struct Copy_Traits_ <XE_2D_TF32x8x8_LD_T, args_t ...>
1462
+ : XE_2D_LD_Unpack<XE_2D_TF32x8x8_LD_T, args_t ...> {
1463
+ using ThrID = Layout<_16>;
1464
+ // Map from (src-thr,src-val) to bit
1465
+ using SrcLayout = Layout<Shape <_16, Shape <_4, _32>>,
1466
+ Stride< _0, Stride<_32, _1>>>;
1467
+ // Map from (dst-thr,dst-val) to bit
1468
+ using DstLayout = Layout<Shape <_16, Shape <_4, _32>>,
1469
+ Stride< _32, Stride<_32, _1>>>;
1470
+ // Reference map from (thr,val) to bit
1471
+ using RefLayout = DstLayout;
1472
+
1473
+ template <class ... ArgTs>
1474
+ Copy_Traits_ (ArgTs... args)
1475
+ : XE_2D_LD_Unpack<XE_2D_TF32x8x8_LD_T, args_t ...>(args...) {}
1476
+ };
1477
+
1406
1478
template <class ... args_t >
1407
1479
struct Copy_Traits_ <XE_2D_U32x1x16_LD_N, args_t ...>
1408
1480
: XE_2D_LD_Unpack<XE_2D_U32x1x16_LD_N, args_t ...> {
@@ -2213,6 +2285,9 @@ COPY_TRAIT_LD_DEF(XE_2D_U8x1x64_LD_N)
2213
2285
COPY_TRAIT_LD_DEF(XE_2D_U8x2x64_LD_N)
2214
2286
COPY_TRAIT_LD_DEF(XE_2D_U8x4x64_LD_N)
2215
2287
COPY_TRAIT_LD_DEF(XE_2D_U8x8x64_LD_N)
2288
+ COPY_TRAIT_LD_DEF(XE_2D_U8x32x8_LD_T)
2289
+ COPY_TRAIT_LD_DEF(XE_2D_U8x32x4_LD_T)
2290
+ COPY_TRAIT_LD_DEF(XE_2D_U8x16x32_LD_T)
2216
2291
COPY_TRAIT_LD_DEF(XE_2D_U64x8x1_LD_T)
2217
2292
COPY_TRAIT_LD_DEF(XE_2D_U64x8x2_LD_T)
2218
2293
COPY_TRAIT_LD_DEF(XE_2D_U64x8x4_LD_T)
@@ -2233,6 +2308,7 @@ COPY_TRAIT_LD_DEF(XE_2D_TF32x1x8_LD_N)
2233
2308
COPY_TRAIT_LD_DEF(XE_2D_TF32x2x8_LD_N)
2234
2309
COPY_TRAIT_LD_DEF(XE_2D_TF32x4x8_LD_N)
2235
2310
COPY_TRAIT_LD_DEF(XE_2D_TF32x8x8_LD_N)
2311
+ COPY_TRAIT_LD_DEF(XE_2D_TF32x8x8_LD_T)
2236
2312
COPY_TRAIT_LD_DEF(XE_2D_U32x1x16_LD_N)
2237
2313
COPY_TRAIT_LD_DEF(XE_2D_U32x2x16_LD_N)
2238
2314
COPY_TRAIT_LD_DEF(XE_2D_U32x4x16_LD_N)
0 commit comments