diff --git a/kernels/matmul/8x8xf64/Makefile b/kernels/matmul/8x8xf64/Makefile new file mode 100644 index 00000000..0d36d7f6 --- /dev/null +++ b/kernels/matmul/8x8xf64/Makefile @@ -0,0 +1,8 @@ +.DEFAULT_GOAL := all + +include ../../../snitch/Makefile.rules + +TESTS = +TESTS += baseline.x + +include ../../Makefile.kernels diff --git a/kernels/matmul/8x8xf64/baseline.c b/kernels/matmul/8x8xf64/baseline.c new file mode 100644 index 00000000..8b245d78 --- /dev/null +++ b/kernels/matmul/8x8xf64/baseline.c @@ -0,0 +1,16 @@ +#include "data.h" + +#include + +#include + +void matmul(const double* x, const double* y, double* g) { + for (uint32_t i = 0; i < M; ++i) { + for (uint32_t j = 0; j < N; ++j) { + for (uint32_t k = 0; k < K; ++k) { + // row-major accesses + g[i * N + j] += x[i * K + k] * y[k * N + j]; + } + } + } +} diff --git a/kernels/matmul/8x8xf64/data.c b/kernels/matmul/8x8xf64/data.c new file mode 100644 index 00000000..ba859643 --- /dev/null +++ b/kernels/matmul/8x8xf64/data.c @@ -0,0 +1,207 @@ +#define M 8 +#define K 8 +#define N 8 + +const double X[M * K] = { +97.62700785, + 430.37873274, + 205.52675214, + 89.76636599, +-152.69040132, + 291.78822613, +-124.82557747, + 783.54600156, + 927.325521 , +-233.11696235, + 583.45007617, + 57.78983951, + 136.08912219, + 851.19327659, +-857.9278836 , + -825.7414006 , +-959.56320512, + 665.2396911 , + 556.3135019 , + 740.02429649, + 957.23668447, + 598.31712843, + -77.04127549, + 561.05835257, +-763.45114826, + 279.84204266, +-713.29342518, + 889.3378341 , + 43.6966435 , +-170.67612002, + -470.88877579, + 548.46737887, + -87.69933557, + 136.86789774, +-962.42039913, + 235.27099415, + 224.19144544, + 233.86799375, + 887.49615703, + 363.64059821, + -280.98419885, +-125.9360924 , + 395.26239185, +-879.54905674, + 333.53343089, + 341.27573924, +-579.23487785, +-742.14740469, +-369.14329815, +-272.57845811, + 140.39354084, +-122.79697308, + 976.74767612, +-795.9103785 , +-582.24648781, + -677.38096423, + 306.21665093, +-493.41679492, + -67.37845429, +-511.148816 , + -682.06083271, +-779.24971767, + 312.65917893, +-723.6340973 +}; + + +const double Y[K * N] = { +-606.83527664, +-262.54965868, + 641.9864597 , +-805.79744841, + 675.889815 , + -807.80318421, + 952.91893003, + -62.6975967 , + 953.52217638, + 209.69103949, + 478.5271588 , +-921.62441549, +-434.38607485, +-759.60687757, +-407.71960496, + -762.54456209, +-364.03364121, +-171.47401097, +-871.7050073 , + 384.94423874, + 133.20290841, +-469.22101812, + 46.49610693, +-812.11897848, + 151.89299111, + 858.59239515, +-362.8620951 , + 334.82075993, +-736.40427519, + 432.65440824, + -421.18781411, +-633.61727599, + 173.02586962, +-959.78490763, + 657.88005843, + -990.60904761, + 355.63307359, +-459.98405362, + 470.38804425, + 924.37709023, + -502.49371296, + 152.31466884, + 184.08386254, + 144.50381158, +-553.83673472, + 905.49802303, +-105.74924276, + 692.81734494, + 398.95855064, +-405.12609829, + 627.5956394 , +-206.98851831, + 762.20639422, + 162.54574527, + 763.47072371, + 385.06318016, + 450.50855964, + 2.64876385, + 912.16726945, + 287.98039846, + -152.28990288, + 212.78642826, +-961.61360338, +-396.85036665 +}; + + +const double G[M * N] = { +4.20101194e+05, + 3.50083946e+05, + 6.46538181e+05, + 7.87611951e+04, + -5.90066796e+05, + 1.75051891e+04, +-1.06214482e+06, +-8.56097454e+05, + -2.10708718e+06, + 1.63457337e+03, +-1.09121328e+06, +-3.60471830e+05, + -1.87997154e+05, +-4.27787358e+05, + 1.09454317e+06, + 3.22031941e+05, + 1.21350670e+06, + 1.36502415e+05, + 1.52156393e+05, +-6.22301317e+04, + -1.54349419e+06, + 5.37282592e+05, +-1.68277900e+06, +-3.20747360e+05, + 1.27742014e+06, + 1.26930043e+06, + 1.44955858e+05, + 5.67935087e+05, + -1.71986709e+06, + 9.89135329e+05, +-2.09766800e+06, +-6.26582774e+05, + 1.00898615e+06, +-1.19381322e+05, + 1.84200115e+06, +-6.14448878e+05, + 1.51101302e+05, + 8.50535943e+05, + 1.25405040e+05, + 1.10035520e+06, + -9.06271345e+05, +-8.11028383e+05, +-1.02428859e+06, +-1.74769700e+05, + 1.66273819e+05, +-3.39830959e+05, + 5.64655615e+05, + 9.66178163e+05, + -7.41759606e+04, +-9.14352682e+05, +-9.32476120e+05, +-5.95542872e+05, + 4.25567604e+05, +-1.02251944e+06, + 5.68088238e+05, + 5.90867503e+05, + -6.37130208e+05, +-2.03819589e+05, +-8.51329047e+05, + 3.00857968e+05, + 1.32626599e+06, +-5.57125464e+05, + 1.40126649e+06, +-2.71458462e+04 +}; + diff --git a/kernels/matmul/8x8xf64/data.h b/kernels/matmul/8x8xf64/data.h new file mode 100644 index 00000000..f3da7f22 --- /dev/null +++ b/kernels/matmul/8x8xf64/data.h @@ -0,0 +1,9 @@ +#pragma once + +#define M 8 +#define K 8 +#define N 8 + +extern const double X[M * K]; +extern const double Y[K * N]; +extern const double G[M * N]; diff --git a/kernels/matmul/8x8xf64/main.c b/kernels/matmul/8x8xf64/main.c new file mode 100644 index 00000000..945ea11a --- /dev/null +++ b/kernels/matmul/8x8xf64/main.c @@ -0,0 +1,43 @@ +#include "data.h" + +#include + +#include + +// Kernel provided via external definition +void matmul(double *x, double *y, double *g); + +int main() { + // Allocate shared local memory + // By avoiding allocators and bumping by a known offset a base pointer + // (snrt_l1_next()) that is the same for all the cores in the cluster, we are + // essentially providing the same memory regions to all the cores in this cluster. + double *local_x = (double *)snrt_l1_next(); + double *local_y = local_x + K * N; + double *local_z = local_y + M * N; + + // Copy data in shared local memory + if (snrt_is_dm_core()) { + snrt_dma_start_1d(local_x, X, M * N * sizeof(double)); + snrt_dma_start_1d(local_y, Y, M * N * sizeof(double)); + } + + snrt_cluster_hw_barrier(); + + // Launch kernel: from this point on only core 0 is required to be alive. + int thiscore = snrt_cluster_core_idx(); + if (thiscore != 0) return 0; + + (void)snrt_mcycle(); + matmul(local_x, local_y, local_z); + (void)snrt_mcycle(); + + // Correctness check + int nerr = 0; + for (int i = 0; i < M * N; i++) { + double d = fabs(local_z[i] - G[i]); + nerr += !(d <= 1E-2f); // Make sure to take into account NaNs (e.g.: happy path + // on the taken branch) + } + return nerr; +} diff --git a/scripts/run.sh b/scripts/run.sh index 2107d296..5114056e 100755 --- a/scripts/run.sh +++ b/scripts/run.sh @@ -84,6 +84,7 @@ KERNEL_DIRS=( # "ssum/8x16xf32/" # "ssum/14x26xf32/" "dsum/8x16xf32/" + "matmul/8x8xf64/" #"matmul/16x16xf64/" "relu/16x16xf64/" )