Skip to content

Commit

Permalink
Merge pull request #48 from opencompl/christos/add-dgemm-8by8
Browse files Browse the repository at this point in the history
Add 8x8 double precision matmul baseline
  • Loading branch information
compor authored Nov 1, 2023
2 parents 30f27b9 + d9363c4 commit 2fc0391
Show file tree
Hide file tree
Showing 6 changed files with 284 additions and 0 deletions.
8 changes: 8 additions & 0 deletions kernels/matmul/8x8xf64/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.DEFAULT_GOAL := all

include ../../../snitch/Makefile.rules

TESTS =
TESTS += baseline.x

include ../../Makefile.kernels
16 changes: 16 additions & 0 deletions kernels/matmul/8x8xf64/baseline.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#include "data.h"

#include <snrt.h>

#include <stdint.h>

void matmul(const double* x, const double* y, double* g) {
for (uint32_t i = 0; i < M; ++i) {
for (uint32_t j = 0; j < N; ++j) {
for (uint32_t k = 0; k < K; ++k) {
// row-major accesses
g[i * N + j] += x[i * K + k] * y[k * N + j];
}
}
}
}
207 changes: 207 additions & 0 deletions kernels/matmul/8x8xf64/data.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,207 @@
#define M 8
#define K 8
#define N 8

const double X[M * K] = {
97.62700785,
430.37873274,
205.52675214,
89.76636599,
-152.69040132,
291.78822613,
-124.82557747,
783.54600156,
927.325521 ,
-233.11696235,
583.45007617,
57.78983951,
136.08912219,
851.19327659,
-857.9278836 ,
-825.7414006 ,
-959.56320512,
665.2396911 ,
556.3135019 ,
740.02429649,
957.23668447,
598.31712843,
-77.04127549,
561.05835257,
-763.45114826,
279.84204266,
-713.29342518,
889.3378341 ,
43.6966435 ,
-170.67612002,
-470.88877579,
548.46737887,
-87.69933557,
136.86789774,
-962.42039913,
235.27099415,
224.19144544,
233.86799375,
887.49615703,
363.64059821,
-280.98419885,
-125.9360924 ,
395.26239185,
-879.54905674,
333.53343089,
341.27573924,
-579.23487785,
-742.14740469,
-369.14329815,
-272.57845811,
140.39354084,
-122.79697308,
976.74767612,
-795.9103785 ,
-582.24648781,
-677.38096423,
306.21665093,
-493.41679492,
-67.37845429,
-511.148816 ,
-682.06083271,
-779.24971767,
312.65917893,
-723.6340973
};


const double Y[K * N] = {
-606.83527664,
-262.54965868,
641.9864597 ,
-805.79744841,
675.889815 ,
-807.80318421,
952.91893003,
-62.6975967 ,
953.52217638,
209.69103949,
478.5271588 ,
-921.62441549,
-434.38607485,
-759.60687757,
-407.71960496,
-762.54456209,
-364.03364121,
-171.47401097,
-871.7050073 ,
384.94423874,
133.20290841,
-469.22101812,
46.49610693,
-812.11897848,
151.89299111,
858.59239515,
-362.8620951 ,
334.82075993,
-736.40427519,
432.65440824,
-421.18781411,
-633.61727599,
173.02586962,
-959.78490763,
657.88005843,
-990.60904761,
355.63307359,
-459.98405362,
470.38804425,
924.37709023,
-502.49371296,
152.31466884,
184.08386254,
144.50381158,
-553.83673472,
905.49802303,
-105.74924276,
692.81734494,
398.95855064,
-405.12609829,
627.5956394 ,
-206.98851831,
762.20639422,
162.54574527,
763.47072371,
385.06318016,
450.50855964,
2.64876385,
912.16726945,
287.98039846,
-152.28990288,
212.78642826,
-961.61360338,
-396.85036665
};


const double G[M * N] = {
4.20101194e+05,
3.50083946e+05,
6.46538181e+05,
7.87611951e+04,
-5.90066796e+05,
1.75051891e+04,
-1.06214482e+06,
-8.56097454e+05,
-2.10708718e+06,
1.63457337e+03,
-1.09121328e+06,
-3.60471830e+05,
-1.87997154e+05,
-4.27787358e+05,
1.09454317e+06,
3.22031941e+05,
1.21350670e+06,
1.36502415e+05,
1.52156393e+05,
-6.22301317e+04,
-1.54349419e+06,
5.37282592e+05,
-1.68277900e+06,
-3.20747360e+05,
1.27742014e+06,
1.26930043e+06,
1.44955858e+05,
5.67935087e+05,
-1.71986709e+06,
9.89135329e+05,
-2.09766800e+06,
-6.26582774e+05,
1.00898615e+06,
-1.19381322e+05,
1.84200115e+06,
-6.14448878e+05,
1.51101302e+05,
8.50535943e+05,
1.25405040e+05,
1.10035520e+06,
-9.06271345e+05,
-8.11028383e+05,
-1.02428859e+06,
-1.74769700e+05,
1.66273819e+05,
-3.39830959e+05,
5.64655615e+05,
9.66178163e+05,
-7.41759606e+04,
-9.14352682e+05,
-9.32476120e+05,
-5.95542872e+05,
4.25567604e+05,
-1.02251944e+06,
5.68088238e+05,
5.90867503e+05,
-6.37130208e+05,
-2.03819589e+05,
-8.51329047e+05,
3.00857968e+05,
1.32626599e+06,
-5.57125464e+05,
1.40126649e+06,
-2.71458462e+04
};

9 changes: 9 additions & 0 deletions kernels/matmul/8x8xf64/data.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#pragma once

#define M 8
#define K 8
#define N 8

extern const double X[M * K];
extern const double Y[K * N];
extern const double G[M * N];
43 changes: 43 additions & 0 deletions kernels/matmul/8x8xf64/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#include "data.h"

#include <snrt.h>

#include <math.h>

// Kernel provided via external definition
void matmul(double *x, double *y, double *g);

int main() {
// Allocate shared local memory
// By avoiding allocators and bumping by a known offset a base pointer
// (snrt_l1_next()) that is the same for all the cores in the cluster, we are
// essentially providing the same memory regions to all the cores in this cluster.
double *local_x = (double *)snrt_l1_next();
double *local_y = local_x + K * N;
double *local_z = local_y + M * N;

// Copy data in shared local memory
if (snrt_is_dm_core()) {
snrt_dma_start_1d(local_x, X, M * N * sizeof(double));
snrt_dma_start_1d(local_y, Y, M * N * sizeof(double));
}

snrt_cluster_hw_barrier();

// Launch kernel: from this point on only core 0 is required to be alive.
int thiscore = snrt_cluster_core_idx();
if (thiscore != 0) return 0;

(void)snrt_mcycle();
matmul(local_x, local_y, local_z);
(void)snrt_mcycle();

// Correctness check
int nerr = 0;
for (int i = 0; i < M * N; i++) {
double d = fabs(local_z[i] - G[i]);
nerr += !(d <= 1E-2f); // Make sure to take into account NaNs (e.g.: happy path
// on the taken branch)
}
return nerr;
}
1 change: 1 addition & 0 deletions scripts/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ KERNEL_DIRS=(
# "ssum/8x16xf32/"
# "ssum/14x26xf32/"
"dsum/8x16xf32/"
"matmul/8x8xf64/"
#"matmul/16x16xf64/"
"relu/16x16xf64/"
)
Expand Down

0 comments on commit 2fc0391

Please sign in to comment.