Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
taozha2 committed Jan 17, 2025
1 parent 85480f9 commit f628bef
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 3 deletions.
9 changes: 6 additions & 3 deletions benchmarks/pvc/benchmarks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,8 @@ using PvcGemmBF16BF16FP32_RCR_6 = cutlass::gemm::device::GemmConfiguration<
float, cutlass::layout::RowMajor,
float, Shape<_8, _128, _32>,
TiledMMA<MMAAtom, Layout<Shape<_1,_4,_1>>>,
XE_2D_U16x8x32_LD_N, XE_2D_U16x16x16_LD_T>;
XE_2D_U16x8x32_LD_N, XE_2D_U16x16x16_LD_T,
Scheduler::Gemm>;

using PvcGemmBF16BF16FP32_CRR_7 = cutlass::gemm::device::GemmConfiguration<
cutlass::arch::IntelPVC,
Expand All @@ -103,7 +104,8 @@ using PvcGemmBF16BF16FP32_CRR_7 = cutlass::gemm::device::GemmConfiguration<
float, cutlass::layout::RowMajor,
float, Shape<_8, _128, _32>,
TiledMMA<MMAAtom, Layout<Shape<_1,_4,_1>>>,
XE_2D_U16x16x16_LD_T, XE_2D_U16x32x32_LD_V>;
XE_2D_U16x16x16_LD_T, XE_2D_U16x32x32_LD_V,
Scheduler::Gemm>;

using PvcGemmBF16BF16FP32_CCR_8 = cutlass::gemm::device::GemmConfiguration<
cutlass::arch::IntelPVC,
Expand All @@ -112,7 +114,8 @@ using PvcGemmBF16BF16FP32_CCR_8 = cutlass::gemm::device::GemmConfiguration<
float, cutlass::layout::RowMajor,
float, Shape<_8, _128, _32>,
TiledMMA<MMAAtom, Layout<Shape<_1,_4,_1>>>,
XE_2D_U16x16x16_LD_T, XE_2D_U16x16x16_LD_T>;
XE_2D_U16x16x16_LD_T, XE_2D_U16x16x16_LD_T,
Scheduler::Gemm>;

CUTLASS_CREATE_GEMM_BENCHMARK(PvcGemmBF16BF16FP32_RRR_1);
CUTLASS_CREATE_GEMM_BENCHMARK(PvcGemmBF16BF16FP32_RRR_2);
Expand Down
14 changes: 14 additions & 0 deletions include/cute/arch/xe_copy_1B.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -394,6 +394,8 @@ struct XE_2D_U8x1x64_LD_N {
};

struct XE_2D_U8x2x64_LD_N {
using BlockShape = Shape<_2, _64>;

template <class T>
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch, intel::coord_t coord,
Expand Down Expand Up @@ -521,6 +523,8 @@ struct XE_2D_U8x16x64_LD_N {
};

struct XE_2D_U8x32x64_LD_N {
using BlockShape = Shape<_32, _64>;

template <class T>
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch, intel::coord_t coord,
Expand Down Expand Up @@ -554,6 +558,8 @@ struct XE_2D_U8x32x64_LD_N {


struct XE_2D_U8x32x16_LD_V {
using BlockShape = Shape<_32, _16>;

template <class T>
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch, intel::coord_t coord,
Expand Down Expand Up @@ -602,6 +608,8 @@ struct XE_2D_U8x32x32_LD_V {
};

struct XE_2D_U8x32x64_LD_V {
using BlockShape = Shape<_32, _64>;

template <class T>
CUTE_HOST_DEVICE static void copy(const void *baseoffset, int width,
int height, int pitch, intel::coord_t coord,
Expand All @@ -618,6 +626,8 @@ struct XE_2D_U8x32x64_LD_V {
};

struct XE_2D_U8x1x16_ST_N {
using BlockShape = Shape<_1, _16>;

template <class T>
CUTE_HOST_DEVICE static void copy(void *baseoffset, int width, int height,
int pitch, intel::coord_t coord,
Expand All @@ -634,6 +644,8 @@ struct XE_2D_U8x1x16_ST_N {
};

struct XE_2D_U8x2x16_ST_N {
using BlockShape = Shape<_2, _16>;

template <class T>
CUTE_HOST_DEVICE static void copy(void *baseoffset, int width, int height,
int pitch, intel::coord_t coord,
Expand All @@ -650,6 +662,8 @@ struct XE_2D_U8x2x16_ST_N {
};

struct XE_2D_U8x4x16_ST_N {
using BlockShape = Shape<_4, _16>;

template <class T>
CUTE_HOST_DEVICE static void copy(void *baseoffset, int width, int height,
int pitch, intel::coord_t coord,
Expand Down

0 comments on commit f628bef

Please sign in to comment.