Skip to content

Commit

Permalink
Merge pull request #315 from opencompl/sasha/matmult-unroll-4
Browse files Browse the repository at this point in the history
matmult kernel has an unroll factor of 4
  • Loading branch information
superlopuh authored Oct 3, 2024
2 parents b19748e + 7e64504 commit d5c6f83
Show file tree
Hide file tree
Showing 11 changed files with 29 additions and 67 deletions.
60 changes: 11 additions & 49 deletions kernels/matmul_transb/snitch_stream.xdsl.mlir.template
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ riscv.assembly_section ".text" {

snitch_stream.streaming_region {
stride_patterns = [
#snitch_stream.stride_pattern<ub = [{{M}}, {{N // 8}}, {{K // 2}}], strides = [{{K * 4}}, 0, 8], repeat = 8>,
#snitch_stream.stride_pattern<ub = [{{M}}, {{N // 8}}, {{K // 2}}, {{8}}], strides = [0, {{4 * 8 * K}}, {{4 * 2}}, {{4 * K}}]>
#snitch_stream.stride_pattern<ub = [{{M}}, {{N // 4}}, {{K // 2}}], strides = [{{K * 4}}, 0, 8], repeat = 4>,
#snitch_stream.stride_pattern<ub = [{{M}}, {{N // 4}}, {{K // 2}}, {{4}}], strides = [0, {{4 * 4 * K}}, {{4 * 2}}, {{4 * K}}]>
]
} ins(%X_moved, %Y_moved : !riscv.reg, !riscv.reg) {
^bb0(%X_stream : !stream.readable<!riscv.freg<ft0>>, %Y_stream : !stream.readable<!riscv.freg<ft1>>):
%res_size = riscv.li {{M * N * 4}} : !riscv.reg
%row_size = riscv.li {{N * 4}} : !riscv.reg
%row_tile_stride = riscv.li {{4 * 8}} : !riscv.reg
%row_tile_stride = riscv.li {{4 * 4}} : !riscv.reg
%frep_count = riscv.li {{K // 2 - 1}} : !riscv.reg
riscv_scf.for %row_offset : !riscv.reg = %zero to %res_size step %row_size {
%row_start = riscv.add %G_moved, %row_offset : (!riscv.reg, !riscv.reg) -> !riscv.reg
Expand All @@ -37,10 +37,6 @@ riscv.assembly_section ".text" {
%g01 = riscv_snitch.vfcpka.s.s %zero_float, %zero_float : (!riscv.freg, !riscv.freg) -> !riscv.freg
%g02 = riscv_snitch.vfcpka.s.s %zero_float, %zero_float : (!riscv.freg, !riscv.freg) -> !riscv.freg
%g03 = riscv_snitch.vfcpka.s.s %zero_float, %zero_float : (!riscv.freg, !riscv.freg) -> !riscv.freg
%g04 = riscv_snitch.vfcpka.s.s %zero_float, %zero_float : (!riscv.freg, !riscv.freg) -> !riscv.freg
%g05 = riscv_snitch.vfcpka.s.s %zero_float, %zero_float : (!riscv.freg, !riscv.freg) -> !riscv.freg
%g06 = riscv_snitch.vfcpka.s.s %zero_float, %zero_float : (!riscv.freg, !riscv.freg) -> !riscv.freg
%g07 = riscv_snitch.vfcpka.s.s %zero_float, %zero_float : (!riscv.freg, !riscv.freg) -> !riscv.freg

%x00 = riscv_snitch.read from %X_stream : !riscv.freg<ft0>
%y00 = riscv_snitch.read from %Y_stream : !riscv.freg<ft1>
Expand All @@ -54,25 +50,12 @@ riscv.assembly_section ".text" {
%x03 = riscv_snitch.read from %X_stream : !riscv.freg<ft0>
%y03 = riscv_snitch.read from %Y_stream : !riscv.freg<ft1>
%init3 = riscv.vfmul.s %x03, %y03 : (!riscv.freg<ft0>, !riscv.freg<ft1>) -> !riscv.freg
%x04 = riscv_snitch.read from %X_stream : !riscv.freg<ft0>
%y04 = riscv_snitch.read from %Y_stream : !riscv.freg<ft1>
%init4 = riscv.vfmul.s %x04, %y04 : (!riscv.freg<ft0>, !riscv.freg<ft1>) -> !riscv.freg
%x05 = riscv_snitch.read from %X_stream : !riscv.freg<ft0>
%y05 = riscv_snitch.read from %Y_stream : !riscv.freg<ft1>
%init5 = riscv.vfmul.s %x05, %y05 : (!riscv.freg<ft0>, !riscv.freg<ft1>) -> !riscv.freg
%x06 = riscv_snitch.read from %X_stream : !riscv.freg<ft0>
%y06 = riscv_snitch.read from %Y_stream : !riscv.freg<ft1>
%init6 = riscv.vfmul.s %x06, %y06 : (!riscv.freg<ft0>, !riscv.freg<ft1>) -> !riscv.freg
%x07 = riscv_snitch.read from %X_stream : !riscv.freg<ft0>
%y07 = riscv_snitch.read from %Y_stream : !riscv.freg<ft1>
%init7 = riscv.vfmul.s %x07, %y07 : (!riscv.freg<ft0>, !riscv.freg<ft1>) -> !riscv.freg

%frep_count_minus_one = riscv.addi %frep_count, -1 : (!riscv.reg) -> !riscv.reg

%g10, %g11, %g12, %g13, %g14, %g15, %g16, %g17 = riscv_snitch.frep_outer %frep_count_minus_one iter_args(
%acc0 = %init0, %acc1 = %init1, %acc2 = %init2, %acc3 = %init3,
%acc4 = %init4, %acc5 = %init5, %acc6 = %init6, %acc7 = %init7
) -> (!riscv.freg, !riscv.freg, !riscv.freg, !riscv.freg, !riscv.freg, !riscv.freg, !riscv.freg, !riscv.freg) {
%g10, %g11, %g12, %g13 = riscv_snitch.frep_outer %frep_count_minus_one iter_args(
%acc0 = %init0, %acc1 = %init1, %acc2 = %init2, %acc3 = %init3
) -> (!riscv.freg, !riscv.freg, !riscv.freg, !riscv.freg) {
%x10 = riscv_snitch.read from %X_stream : !riscv.freg<ft0>
%y10 = riscv_snitch.read from %Y_stream : !riscv.freg<ft1>
%res0 = riscv_snitch.vfmac.s %acc0, %x10, %y10 : (!riscv.freg, !riscv.freg<ft0>, !riscv.freg<ft1>) -> !riscv.freg
Expand All @@ -85,41 +68,20 @@ riscv.assembly_section ".text" {
%x13 = riscv_snitch.read from %X_stream : !riscv.freg<ft0>
%y13 = riscv_snitch.read from %Y_stream : !riscv.freg<ft1>
%res3 = riscv_snitch.vfmac.s %acc3, %x13, %y13 : (!riscv.freg, !riscv.freg<ft0>, !riscv.freg<ft1>) -> !riscv.freg
%x14 = riscv_snitch.read from %X_stream : !riscv.freg<ft0>
%y14 = riscv_snitch.read from %Y_stream : !riscv.freg<ft1>
%res4 = riscv_snitch.vfmac.s %acc4, %x14, %y14 : (!riscv.freg, !riscv.freg<ft0>, !riscv.freg<ft1>) -> !riscv.freg
%x15 = riscv_snitch.read from %X_stream : !riscv.freg<ft0>
%y15 = riscv_snitch.read from %Y_stream : !riscv.freg<ft1>
%res5 = riscv_snitch.vfmac.s %acc5, %x15, %y15 : (!riscv.freg, !riscv.freg<ft0>, !riscv.freg<ft1>) -> !riscv.freg
%x16 = riscv_snitch.read from %X_stream : !riscv.freg<ft0>
%y16 = riscv_snitch.read from %Y_stream : !riscv.freg<ft1>
%res6 = riscv_snitch.vfmac.s %acc6, %x16, %y16 : (!riscv.freg, !riscv.freg<ft0>, !riscv.freg<ft1>) -> !riscv.freg
%x17 = riscv_snitch.read from %X_stream : !riscv.freg<ft0>
%y17 = riscv_snitch.read from %Y_stream : !riscv.freg<ft1>
%res7 = riscv_snitch.vfmac.s %acc7, %x17, %y17 : (!riscv.freg, !riscv.freg<ft0>, !riscv.freg<ft1>) -> !riscv.freg

riscv_snitch.frep_yield %res0, %res1, %res2, %res3, %res4, %res5, %res6, %res7 : !riscv.freg, !riscv.freg, !riscv.freg, !riscv.freg, !riscv.freg, !riscv.freg, !riscv.freg, !riscv.freg
riscv_snitch.frep_yield %res0, %res1, %res2, %res3 : !riscv.freg, !riscv.freg, !riscv.freg, !riscv.freg
}


%g20 = riscv_snitch.vfsum.s %g00, %g10 : (!riscv.freg, !riscv.freg) -> !riscv.freg
%g21 = riscv_snitch.vfsum.s %g01, %g11 : (!riscv.freg, !riscv.freg) -> !riscv.freg
%g22 = riscv_snitch.vfsum.s %g02, %g12 : (!riscv.freg, !riscv.freg) -> !riscv.freg
%g23 = riscv_snitch.vfsum.s %g03, %g13 : (!riscv.freg, !riscv.freg) -> !riscv.freg
%g24 = riscv_snitch.vfsum.s %g04, %g14 : (!riscv.freg, !riscv.freg) -> !riscv.freg
%g25 = riscv_snitch.vfsum.s %g05, %g15 : (!riscv.freg, !riscv.freg) -> !riscv.freg
%g26 = riscv_snitch.vfsum.s %g06, %g16 : (!riscv.freg, !riscv.freg) -> !riscv.freg
%g27 = riscv_snitch.vfsum.s %g07, %g17 : (!riscv.freg, !riscv.freg) -> !riscv.freg

%r0 = riscv_snitch.vfcpka.s.s %g20, %g21 : (!riscv.freg, !riscv.freg) -> !riscv.freg
%r1 = riscv_snitch.vfcpka.s.s %g22, %g23 : (!riscv.freg, !riscv.freg) -> !riscv.freg
%r2 = riscv_snitch.vfcpka.s.s %g24, %g25 : (!riscv.freg, !riscv.freg) -> !riscv.freg
%r3 = riscv_snitch.vfcpka.s.s %g26, %g27 : (!riscv.freg, !riscv.freg) -> !riscv.freg

riscv.fsd %tile_start, %r0, 0 : (!riscv.reg, !riscv.freg) -> ()
riscv.fsd %tile_start, %r1, 8 : (!riscv.reg, !riscv.freg) -> ()
riscv.fsd %tile_start, %r2, 16 : (!riscv.reg, !riscv.freg) -> ()
riscv.fsd %tile_start, %r3, 24 : (!riscv.reg, !riscv.freg) -> ()
riscv.fsw %tile_start, %g20, 0 : (!riscv.reg, !riscv.freg) -> ()
riscv.fsw %tile_start, %g21, 4 : (!riscv.reg, !riscv.freg) -> ()
riscv.fsw %tile_start, %g22, 8 : (!riscv.reg, !riscv.freg) -> ()
riscv.fsw %tile_start, %g23, 12 : (!riscv.reg, !riscv.freg) -> ()

riscv_scf.yield
}
Expand Down
2 changes: 1 addition & 1 deletion results/kernels.csv
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ matmul,4x16x8xf64,baseline,2495,3293,3290,2.9941520467836256,1.4991334488734835,
matmul,4x16x8xf64,linalg,2694,3483,3480,2.9941520467836256,1.4745484400656814,512,513,1536,0.19042316258351893,0.44415584415584414,1155,898,609,0.4287305122494432,0,33,1.0,1.0,1,3.4,1155,0.73520050922979,416,17,5,0.1544172234595397,69,790,0.0,0.5831477357089829,0.0
matmul,4x16x8xf64,linalg_xdsl,708,1493,1490,2.811418685121107,0.0,512,578,1625,0.8163841807909604,0.996551724137931,580,0,0,0.8192090395480226,0,0,5.37037037037037,5.37037037037037,1,0.0,108,0.5869565217391305,76,0,0,0.10734463276836158,0,786,0.0,0.9265536723163842,0.0
matmul_transb,4x16x16xf32,baseline,3386,4184,4181,2.539660056657224,1.4921875,0,706,1793,0.20850561134081513,0.3935340022296544,1794,1528,1024,0.5298287064382753,0,64,1.0,1.0,1,0.0,1794,0.5561066336019839,1432,0,0,0.42291789722386297,0,799,0.0,0.9527466036621383,0.0
matmul_transb,4x16x16xf32,snitch_stream,871,1660,1657,2.648367952522255,0.0,0,674,1785,0.7738231917336394,0.9519774011299436,708,0,0,0.8128587830080367,0,32,2.1325301204819276,2.1325301204819276,1,0.0,332,0.7793427230046949,94,0,0,0.1079219288174512,0,790,0.0,0.9207807118254879,0.0
matmul_transb,4x16x16xf32,snitch_stream,845,1636,1633,2.7429906542056073,0.0,0,642,1761,0.7597633136094675,0.9067796610169492,708,0,0,0.8378698224852071,0,64,2.0823529411764703,2.0823529411764707,1,0.0,340,0.7296137339055794,126,0,0,0.14911242603550295,0,792,0.0,0.98698224852071,0.0
matmul_transb,4x16x16xf32,snrt,849,1612,1609,2.648367952522255,0.0,0,674,1785,0.7938751472320377,0.9519774011299436,708,0,0,0.833922261484099,0,32,2.1325301204819276,2.1325301204819276,1,0.0,332,0.8924731182795699,40,0,0,0.04711425206124853,0,764,0.0,0.8810365135453475,0.0
pooling_nchw_max_d1_s2_3x3,4x4xf64,baseline,584,1328,1325,0.995575221238938,1.1226415094339623,0,226,225,0.386986301369863,0.6330532212885154,357,119,106,0.6113013698630136,0,25,1.0,1.0,1,0.0,357,0.9153846153846154,33,0,0,0.05650684931506849,0,745,0.0,0.6678082191780821,0.0
pooling_nchw_max_d1_s2_3x3,4x4xf64,linalg,484,1242,1239,0.993103448275862,1.0909090909090908,0,145,144,0.29958677685950413,0.5823293172690763,249,96,88,0.5144628099173554,0,16,1.0,1.0,1,0.0,249,0.8498293515358362,44,0,0,0.09090909090909091,32,759,0.0,0.6053719008264463,0.0
Expand Down
2 changes: 1 addition & 1 deletion results/kernels.fast.csv
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ matmul,4x16x8xf64,baseline,2495,3293,3290,2.9941520467836256,1.4991334488734835,
matmul,4x16x8xf64,linalg,2694,3483,3480,2.9941520467836256,1.4745484400656814,512,513,1536,0.19042316258351893,0.44415584415584414,1155,898,609,0.4287305122494432,0,33,1.0,1.0,1,3.4,1155,0.73520050922979,416,17,5,0.1544172234595397,69,790,0.0,0.5831477357089829,0.0
matmul,4x16x8xf64,linalg_xdsl,708,1493,1490,2.811418685121107,0.0,512,578,1625,0.8163841807909604,0.996551724137931,580,0,0,0.8192090395480226,0,0,5.37037037037037,5.37037037037037,1,0.0,108,0.5869565217391305,76,0,0,0.10734463276836158,0,786,0.0,0.9265536723163842,0.0
matmul_transb,4x16x16xf32,baseline,3386,4184,4181,2.539660056657224,1.4921875,0,706,1793,0.20850561134081513,0.3935340022296544,1794,1528,1024,0.5298287064382753,0,64,1.0,1.0,1,0.0,1794,0.5561066336019839,1432,0,0,0.42291789722386297,0,799,0.0,0.9527466036621383,0.0
matmul_transb,4x16x16xf32,snitch_stream,871,1660,1657,2.648367952522255,0.0,0,674,1785,0.7738231917336394,0.9519774011299436,708,0,0,0.8128587830080367,0,32,2.1325301204819276,2.1325301204819276,1,0.0,332,0.7793427230046949,94,0,0,0.1079219288174512,0,790,0.0,0.9207807118254879,0.0
matmul_transb,4x16x16xf32,snitch_stream,845,1636,1633,2.7429906542056073,0.0,0,642,1761,0.7597633136094675,0.9067796610169492,708,0,0,0.8378698224852071,0,64,2.0823529411764703,2.0823529411764707,1,0.0,340,0.7296137339055794,126,0,0,0.14911242603550295,0,792,0.0,0.98698224852071,0.0
matmul_transb,4x16x16xf32,snrt,849,1612,1609,2.648367952522255,0.0,0,674,1785,0.7938751472320377,0.9519774011299436,708,0,0,0.833922261484099,0,32,2.1325301204819276,2.1325301204819276,1,0.0,332,0.8924731182795699,40,0,0,0.04711425206124853,0,764,0.0,0.8810365135453475,0.0
pooling_nchw_max_d1_s2_3x3,4x4xf64,baseline,584,1328,1325,0.995575221238938,1.1226415094339623,0,226,225,0.386986301369863,0.6330532212885154,357,119,106,0.6113013698630136,0,25,1.0,1.0,1,0.0,357,0.9153846153846154,33,0,0,0.05650684931506849,0,745,0.0,0.6678082191780821,0.0
pooling_nchw_max_d1_s2_3x3,4x4xf64,linalg,484,1242,1239,0.993103448275862,1.0909090909090908,0,145,144,0.29958677685950413,0.5823293172690763,249,96,88,0.5144628099173554,0,16,1.0,1.0,1,0.0,249,0.8498293515358362,44,0,0,0.09090909090909091,32,759,0.0,0.6053719008264463,0.0
Expand Down
18 changes: 9 additions & 9 deletions results/kernels.low_level_representation.csv
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
test,params,impl,cycles,end,end_fpss,fpss_avg_fpu_latency,fpss_avg_load_latency,fpss_fpu_fmadd_issues,fpss_fpu_issues,fpss_fpu_latency,fpss_fpu_occupancy,fpss_fpu_rel_occupancy,fpss_issues,fpss_load_latency,fpss_loads,fpss_occupancy,fpss_section_latency,fpss_stores,fseq_fpu_yield,fseq_yield,section,snitch_avg_load_latency,snitch_fseq_offloads,snitch_fseq_rel_offloads,snitch_issues,snitch_load_latency,snitch_loads,snitch_occupancy,snitch_stores,start,tend,total_ipc,tstart
matmul_transb,1x12x40xf32,snitch_stream,501,1307,1304,2.56140350877193,0,0,342,876,0.6826347305389222,0.9395604395604396,364,0,0,0.7265469061876247,0,20,1.7416267942583734,1.7416267942583732,1,0,209,0.7545126353790613,68,0,0,0.13572854291417166,0,807,0.0,0.8622754491017964,0.0
matmul_transb,1x16x40xf32,snitch_stream,580,1369,1366,2.6445497630331753,0,0,422,1116,0.7275862068965517,0.9504504504504504,444,0,0,0.7655172413793103,0,20,2.1244019138755985,2.124401913875598,1,0,209,0.7517985611510791,69,0,0,0.11896551724137931,0,790,0.0,0.8844827586206896,0.0
matmul_transb,1x20x16xf32,snitch_stream,344,1131,1128,2.6881188118811883,0,0,202,543,0.5872093023255814,0.9528301886792453,212,0,0,0.6162790697674418,0,8,2.4651162790697674,2.4651162790697674,1,0,86,0.6013986013986014,57,0,0,0.16569767441860464,0,788,0.0,0.7819767441860465,0.0
matmul_transb,1x20x24xf32,snitch_stream,460,1265,1262,2.6953642384105962,0,0,302,814,0.6565217391304348,0.9556962025316456,316,0,0,0.6869565217391305,0,12,2.488188976377953,2.4881889763779528,1,0,127,0.675531914893617,61,0,0,0.13260869565217392,0,806,0.0,0.8195652173913044,0.0
matmul_transb,1x20x32xf32,snitch_stream,554,1343,1340,2.699004975124378,0,0,402,1085,0.7256317689530686,0.9571428571428572,420,0,0,0.7581227436823105,0,16,2.5,2.5,1,0,168,0.721030042918455,65,0,0,0.11732851985559567,0,790,0.0,0.8754512635379061,0.0
matmul_transb,1x20x40xf32,snitch_stream,660,1459,1456,2.701195219123506,0,0,502,1356,0.7606060606060606,0.9580152671755725,524,0,0,0.793939393939394,0,20,2.5071770334928227,2.507177033492823,1,0,209,0.7517985611510791,69,0,0,0.10454545454545454,0,800,0.0,0.8984848484848484,0.0
matmul_transb,1x20x8xf32,snitch_stream,219,1001,998,2.6666666666666665,0,0,102,272,0.4657534246575342,0.9444444444444444,108,0,0,0.4931506849315068,0,4,2.4,2.4,1,0,45,0.46875,51,0,0,0.2328767123287671,0,783,0.0,0.726027397260274,0.0
matmul_transb,1x4x40xf32,snitch_stream,359,1138,1135,2.159340659340659,0,0,182,393,0.5069637883008357,0.8921568627450981,204,0,0,0.5682451253481894,0,20,0.9760765550239234,0.9760765550239234,1,0,209,0.7683823529411765,63,0,0,0.17548746518105848,0,780,0.0,0.7437325905292479,0.0
matmul_transb,1x8x40xf32,snitch_stream,421,1212,1209,2.427480916030534,0,0,262,636,0.6223277909738717,0.9225352112676056,284,0,0,0.6745843230403801,0,20,1.3588516746411485,1.3588516746411483,1,0,209,0.7545126353790613,68,0,0,0.16152019002375298,0,792,0.0,0.836104513064133,0.0
matmul_transb,1x12x40xf32,snitch_stream,484,1267,1264,2.6739130434782608,0,0,322,861,0.6652892561983471,0.8846153846153846,364,0,0,0.7520661157024794,0,40,1.7009345794392525,1.7009345794392523,1,0,214,0.7086092715231788,88,0,0,0.18181818181818182,0,784,0.0,0.9338842975206612,0.0
matmul_transb,1x16x40xf32,snitch_stream,573,1386,1383,2.7388059701492535,0,0,402,1101,0.7015706806282722,0.9054054054054054,444,0,0,0.774869109947644,0,40,2.074766355140187,2.074766355140187,1,0,214,0.7062706270627063,89,0,0,0.15532286212914484,0,814,0.0,0.9301919720767888,0.0
matmul_transb,1x20x16xf32,snitch_stream,326,1132,1129,2.768041237113402,0,0,194,537,0.5950920245398773,0.9150943396226415,212,0,0,0.6503067484662577,0,16,2.409090909090909,2.409090909090909,1,0,88,0.5751633986928104,65,0,0,0.19938650306748465,0,807,0.0,0.8496932515337423,0.0
matmul_transb,1x20x24xf32,snitch_stream,431,1220,1217,2.7758620689655173,0,0,290,805,0.6728538283062645,0.9177215189873418,316,0,0,0.7331786542923434,0,24,2.430769230769231,2.4307692307692306,1,0,130,0.6403940886699507,73,0,0,0.16937354988399073,0,790,0.0,0.9025522041763341,0.0
matmul_transb,1x20x32xf32,snitch_stream,547,1360,1357,2.7797927461139897,0,0,386,1073,0.7056672760511883,0.919047619047619,420,0,0,0.7678244972577697,0,32,2.441860465116279,2.441860465116279,1,0,172,0.6798418972332015,81,0,0,0.1480804387568556,0,814,0.0,0.9159049360146253,0.0
matmul_transb,1x20x40xf32,snitch_stream,653,1476,1473,2.7821576763485476,0,0,482,1341,0.7381316998468607,0.9198473282442748,524,0,0,0.8024502297090352,0,40,2.448598130841121,2.4485981308411215,1,0,214,0.7062706270627063,89,0,0,0.1362940275650842,0,824,0.0,0.9387442572741195,0.0
matmul_transb,1x20x8xf32,snitch_stream,215,983,980,2.7448979591836733,0,0,98,269,0.4558139534883721,0.9074074074074074,108,0,0,0.5023255813953489,0,8,2.347826086956522,2.347826086956522,1,0,46,0.44660194174757284,57,0,0,0.2651162790697674,0,769,0.0,0.7674418604651163,0.0
matmul_transb,1x4x40xf32,snitch_stream,380,1133,1130,2.3518518518518516,0,0,162,381,0.4263157894736842,0.7941176470588235,204,0,0,0.5368421052631579,0,40,0.9532710280373832,0.9532710280373832,1,0,214,0.7328767123287672,78,0,0,0.20526315789473684,0,754,0.0,0.7421052631578947,0.0
matmul_transb,1x8x40xf32,snitch_stream,412,1180,1177,2.5661157024793386,0,0,242,621,0.587378640776699,0.852112676056338,284,0,0,0.6893203883495146,0,40,1.327102803738318,1.3271028037383177,1,0,214,0.7086092715231788,88,0,0,0.21359223300970873,0,769,0.0,0.9029126213592233,0.0
relu,16x40xf32,snitch_stream,371,1151,1148,0.9969040247678018,0,0,323,322,0.8706199460916442,0.9938461538461538,325,0,0,0.876010781671159,0,0,46.42857142857143,46.42857142857143,1,0,7,0.28,18,0,0,0.04851752021563342,0,781,0.0,0.9245283018867924,0.0
relu,24x40xf32,snitch_stream,531,1331,1328,0.9979296066252588,0,0,483,482,0.9096045197740112,0.9958762886597938,485,0,0,0.9133709981167608,0,0,69.28571428571428,69.28571428571429,1,0,7,0.28,18,0,0,0.03389830508474576,0,801,0.0,0.9472693032015066,0.0
relu,32x40xf32,snitch_stream,688,1502,1499,0.9984447900466563,0,0,643,642,0.934593023255814,0.9968992248062015,645,0,0,0.9375,0,0,92.14285714285715,92.14285714285714,1,0,7,0.2916666666666667,17,0,0,0.024709302325581394,0,815,0.0,0.9622093023255814,0.0
Expand Down
2 changes: 1 addition & 1 deletion results/pivoted.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ddot 128xf64,956,965,,213,577
dense 8x8xf64,3206,3530,,2741,2723
fill 4x4xf64,50,50,64,,
matmul 4x16x8xf64,2495,2694,708,,
matmul_transb 4x16x16xf32,3386,,,871,849
matmul_transb 4x16x16xf32,3386,,,845,849
pooling_nchw_max_d1_s2_3x3 4x4xf64,584,484,275,,
pooling_nchw_sum_d1_s2_3x3 4x4xf64,902,832,271,,
relu 4x4xf64,142,125,72,,
Expand Down
2 changes: 1 addition & 1 deletion results/pivoted.fast.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ddot 128xf64,956,965,,213,577
dense 8x8xf64,3206,3530,,2741,2723
fill 4x4xf64,50,50,64,,
matmul 4x16x8xf64,2495,2694,708,,
matmul_transb 4x16x16xf32,3386,,,871,849
matmul_transb 4x16x16xf32,3386,,,845,849
pooling_nchw_max_d1_s2_3x3 4x4xf64,584,484,275,,
pooling_nchw_sum_d1_s2_3x3 4x4xf64,902,832,271,,
relu 4x4xf64,142,125,72,,
Expand Down
2 changes: 1 addition & 1 deletion results/pivoted_fpu.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ddot 128xf64,0.13,0.13,,0.64,0.22
dense 8x8xf64,0.20,0.18,,0.26,0.26
fill 4x4xf64,0.02,0.02,0.28,,
matmul 4x16x8xf64,0.21,0.19,0.82,,
matmul_transb 4x16x16xf32,0.21,,,0.77,0.79
matmul_transb 4x16x16xf32,0.21,,,0.76,0.79
pooling_nchw_max_d1_s2_3x3 4x4xf64,0.39,0.30,0.65,,
pooling_nchw_sum_d1_s2_3x3 4x4xf64,0.22,0.17,0.66,,
relu 4x4xf64,0.13,0.14,0.25,,
Expand Down
2 changes: 1 addition & 1 deletion results/pivoted_fpu.fast.csv
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ ddot 128xf64,0.13,0.13,,0.64,0.22
dense 8x8xf64,0.20,0.18,,0.26,0.26
fill 4x4xf64,0.02,0.02,0.28,,
matmul 4x16x8xf64,0.21,0.19,0.82,,
matmul_transb 4x16x16xf32,0.21,,,0.77,0.79
matmul_transb 4x16x16xf32,0.21,,,0.76,0.79
pooling_nchw_max_d1_s2_3x3 4x4xf64,0.39,0.30,0.65,,
pooling_nchw_sum_d1_s2_3x3 4x4xf64,0.22,0.17,0.66,,
relu 4x4xf64,0.13,0.14,0.25,,
Expand Down
Loading

0 comments on commit d5c6f83

Please sign in to comment.