Skip to content

Commit

Permalink
Support qs8 5x8c8 wasmdot microkernels
Browse files Browse the repository at this point in the history
  • Loading branch information
yolanda15 committed Oct 9, 2024
1 parent 14ce5c4 commit 3b45ce1
Show file tree
Hide file tree
Showing 21 changed files with 2,889 additions and 53 deletions.
44 changes: 44 additions & 0 deletions bench/qs8-qc8w-gemm-fp32.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4093,6 +4093,28 @@

BENCHMARK_GEMM(qs8_qc8w_gemm_minmax_fp32_ukernel_4x8c8__wasmusdot_u2)

static void qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__wasmusdot(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__wasmusdot,
xnn_init_qs8_qc8w_conv_minmax_fp32_scalar_params,
xnn_pack_qs8_to_qu8_gemm_goi_w,
/*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckWAsmUSDOT);
}

BENCHMARK_GEMM(qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__wasmusdot)

static void qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__wasmusdot_u2(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__wasmusdot_u2,
xnn_init_qs8_qc8w_conv_minmax_fp32_scalar_params,
xnn_pack_qs8_to_qu8_gemm_goi_w,
/*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckWAsmUSDOT);
}

BENCHMARK_GEMM(qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__wasmusdot_u2)

static void qs8_qc8w_gemm_minmax_fp32_ukernel_1x8c8__wasmsdot(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_1x8c8__wasmsdot,
Expand Down Expand Up @@ -4180,6 +4202,28 @@
}

BENCHMARK_GEMM(qs8_qc8w_gemm_minmax_fp32_ukernel_4x8c8__wasmsdot_u2)

static void qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__wasmsdot(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__wasmsdot,
xnn_init_qs8_qc8w_conv_minmax_fp32_scalar_params,
xnn_pack_qs8_gemm_goi_w,
/*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckWAsmSDOT);
}

BENCHMARK_GEMM(qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__wasmsdot)

static void qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__wasmsdot_u2(benchmark::State& state, const char* net) {
GEMMBenchmark(state,
xnn_qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__wasmsdot_u2,
xnn_init_qs8_qc8w_conv_minmax_fp32_scalar_params,
xnn_pack_qs8_gemm_goi_w,
/*mr=*/5, /*nr=*/8, /*kr=*/8, /*sr=*/1,
benchmark::utils::CheckWAsmSDOT);
}

BENCHMARK_GEMM(qs8_qc8w_gemm_minmax_fp32_ukernel_5x8c8__wasmsdot_u2)
#endif // XNN_ARCH_WASMRELAXEDSIMD


Expand Down
8 changes: 8 additions & 0 deletions cmake/gen/wasmrelaxedsimd_microkernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -530,6 +530,10 @@ SET(NON_PROD_WASMRELAXEDSIMD_MICROKERNEL_SRCS
src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-4x8c8-minmax-fp32-wasmusdot.c
src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-4x8c16-minmax-fp32-wasmsdot.c
src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-4x8c16-minmax-fp32-wasmusdot.c
src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-5x8c8-minmax-fp32-wasmsdot-u2.c
src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-5x8c8-minmax-fp32-wasmsdot.c
src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-5x8c8-minmax-fp32-wasmusdot-u2.c
src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-5x8c8-minmax-fp32-wasmusdot.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x4c16-minmax-fp32-wasmsdot.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x4c16-minmax-fp32-wasmusdot.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x8c8-minmax-fp32-wasmusdot.c
Expand All @@ -556,6 +560,10 @@ SET(NON_PROD_WASMRELAXEDSIMD_MICROKERNEL_SRCS
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x8c8-minmax-fp32-wasmusdot.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x8c16-minmax-fp32-wasmsdot.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x8c16-minmax-fp32-wasmusdot.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-5x8c8-minmax-fp32-wasmsdot-u2.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-5x8c8-minmax-fp32-wasmsdot.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-5x8c8-minmax-fp32-wasmusdot-u2.c
src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-5x8c8-minmax-fp32-wasmusdot.c
src/qs8-vcvt/gen/qs8-vcvt-wasmrelaxedsimd-u8.c
src/qs8-vcvt/gen/qs8-vcvt-wasmrelaxedsimd-u16.c
src/qs8-vlrelu/gen/qs8-vlrelu-wasmrelaxedsimd-arm-u16.c
Expand Down
8 changes: 8 additions & 0 deletions gen/wasmrelaxedsimd_microkernels.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,10 @@ NON_PROD_WASMRELAXEDSIMD_MICROKERNEL_SRCS = [
"src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-4x8c8-minmax-fp32-wasmusdot.c",
"src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-4x8c16-minmax-fp32-wasmsdot.c",
"src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-4x8c16-minmax-fp32-wasmusdot.c",
"src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-5x8c8-minmax-fp32-wasmsdot-u2.c",
"src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-5x8c8-minmax-fp32-wasmsdot.c",
"src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-5x8c8-minmax-fp32-wasmusdot-u2.c",
"src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-5x8c8-minmax-fp32-wasmusdot.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x4c16-minmax-fp32-wasmsdot.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x4c16-minmax-fp32-wasmusdot.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-1x8c8-minmax-fp32-wasmusdot.c",
Expand All @@ -553,6 +557,10 @@ NON_PROD_WASMRELAXEDSIMD_MICROKERNEL_SRCS = [
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x8c8-minmax-fp32-wasmusdot.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x8c16-minmax-fp32-wasmsdot.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-4x8c16-minmax-fp32-wasmusdot.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-5x8c8-minmax-fp32-wasmsdot-u2.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-5x8c8-minmax-fp32-wasmsdot.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-5x8c8-minmax-fp32-wasmusdot-u2.c",
"src/qs8-qc8w-igemm/gen/qs8-qc8w-igemm-5x8c8-minmax-fp32-wasmusdot.c",
"src/qs8-vcvt/gen/qs8-vcvt-wasmrelaxedsimd-u8.c",
"src/qs8-vcvt/gen/qs8-vcvt-wasmrelaxedsimd-u16.c",
"src/qs8-vlrelu/gen/qs8-vlrelu-wasmrelaxedsimd-arm-u16.c",
Expand Down
25 changes: 15 additions & 10 deletions scripts/generate-qs8-gemm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -304,35 +304,40 @@ tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=2 NR=8 -D REQUANTIZATION=FP3
tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=3 NR=8 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=0 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-3x8c16-minmax-fp32-wasmusdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=4 NR=8 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=0 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-4x8c16-minmax-fp32-wasmusdot.c &

tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=1 NR=4 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x4c16-minmax-wasmusdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=2 NR=4 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-2x4c16-minmax-wasmusdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=3 NR=4 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-3x4c16-minmax-wasmusdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=4 NR=4 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x4c16-minmax-wasmusdot.c &

tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=1 NR=8 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x8c16-minmax-wasmusdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=2 NR=8 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-2x8c16-minmax-wasmusdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=3 NR=8 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-3x8c16-minmax-wasmusdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=4 NR=8 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x8c16-minmax-wasmusdot.c &

### C8 micro-kernels
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=1 NR=8 -D UNROLL=0 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=0 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-1x8c8-minmax-fp32-wasmusdot.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=2 NR=8 -D UNROLL=0 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=0 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-2x8c8-minmax-fp32-wasmusdot.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=3 NR=8 -D UNROLL=0 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=0 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-3x8c8-minmax-fp32-wasmusdot.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=4 NR=8 -D UNROLL=0 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=0 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-4x8c8-minmax-fp32-wasmusdot.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=5 NR=8 -D UNROLL=0 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=0 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-5x8c8-minmax-fp32-wasmusdot.c &

tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=1 NR=8 -D UNROLL=1 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=0 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-1x8c8-minmax-fp32-wasmusdot-u2.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=2 NR=8 -D UNROLL=1 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=0 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-2x8c8-minmax-fp32-wasmusdot-u2.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=3 NR=8 -D UNROLL=1 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=0 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-3x8c8-minmax-fp32-wasmusdot-u2.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=4 NR=8 -D UNROLL=1 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=0 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-4x8c8-minmax-fp32-wasmusdot-u2.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=5 NR=8 -D UNROLL=1 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=0 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-5x8c8-minmax-fp32-wasmusdot-u2.c &

tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=1 NR=8 -D UNROLL=0 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=1 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-1x8c8-minmax-fp32-wasmsdot.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=2 NR=8 -D UNROLL=0 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=1 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-2x8c8-minmax-fp32-wasmsdot.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=3 NR=8 -D UNROLL=0 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=1 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-3x8c8-minmax-fp32-wasmsdot.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=4 NR=8 -D UNROLL=0 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=1 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-4x8c8-minmax-fp32-wasmsdot.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=5 NR=8 -D UNROLL=0 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=1 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-5x8c8-minmax-fp32-wasmsdot.c &

tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=1 NR=8 -D UNROLL=1 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=1 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-1x8c8-minmax-fp32-wasmsdot-u2.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=2 NR=8 -D UNROLL=1 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=1 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-2x8c8-minmax-fp32-wasmsdot-u2.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=3 NR=8 -D UNROLL=1 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=1 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-3x8c8-minmax-fp32-wasmsdot-u2.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=4 NR=8 -D UNROLL=1 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=1 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-4x8c8-minmax-fp32-wasmsdot-u2.c &

tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=1 NR=4 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x4c16-minmax-wasmusdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=2 NR=4 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-2x4c16-minmax-wasmusdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=3 NR=4 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-3x4c16-minmax-wasmusdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=4 NR=4 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x4c16-minmax-wasmusdot.c &

tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=1 NR=8 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x8c16-minmax-wasmusdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=2 NR=8 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-2x8c16-minmax-wasmusdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=3 NR=8 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-3x8c16-minmax-wasmusdot.c &
tools/xngen src/qs8-gemm/MRx4c16-wasmdot.c.in -D MR=4 NR=8 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-4x8c16-minmax-wasmusdot.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=5 NR=8 -D UNROLL=1 -D REQUANTIZATION=FP32 -D DATATYPE=QC8 -D SDOT=1 -o src/qs8-qc8w-gemm/gen/qs8-qc8w-gemm-5x8c8-minmax-fp32-wasmsdot-u2.c &

tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=1 NR=8 -D UNROLL=0 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-1x8c8-minmax-wasmusdot.c &
tools/xngen src/qs8-gemm/c8-wasmdot.c.in -D MR=2 NR=8 -D UNROLL=0 -D REQUANTIZATION= -D DATATYPE=QD8 -D SDOT=0 -o src/qd8-f32-qc8w-gemm/gen/qd8-f32-qc8w-gemm-2x8c8-minmax-wasmusdot.c &
Expand Down
Loading

0 comments on commit 3b45ce1

Please sign in to comment.