-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #48 from opencompl/christos/add-dgemm-8by8
Add 8x8 double precision matmul baseline
- Loading branch information
Showing
6 changed files
with
284 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
.DEFAULT_GOAL := all | ||
|
||
include ../../../snitch/Makefile.rules | ||
|
||
TESTS = | ||
TESTS += baseline.x | ||
|
||
include ../../Makefile.kernels |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#include "data.h" | ||
|
||
#include <snrt.h> | ||
|
||
#include <stdint.h> | ||
|
||
void matmul(const double* x, const double* y, double* g) { | ||
for (uint32_t i = 0; i < M; ++i) { | ||
for (uint32_t j = 0; j < N; ++j) { | ||
for (uint32_t k = 0; k < K; ++k) { | ||
// row-major accesses | ||
g[i * N + j] += x[i * K + k] * y[k * N + j]; | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,207 @@ | ||
#define M 8 | ||
#define K 8 | ||
#define N 8 | ||
|
||
const double X[M * K] = { | ||
97.62700785, | ||
430.37873274, | ||
205.52675214, | ||
89.76636599, | ||
-152.69040132, | ||
291.78822613, | ||
-124.82557747, | ||
783.54600156, | ||
927.325521 , | ||
-233.11696235, | ||
583.45007617, | ||
57.78983951, | ||
136.08912219, | ||
851.19327659, | ||
-857.9278836 , | ||
-825.7414006 , | ||
-959.56320512, | ||
665.2396911 , | ||
556.3135019 , | ||
740.02429649, | ||
957.23668447, | ||
598.31712843, | ||
-77.04127549, | ||
561.05835257, | ||
-763.45114826, | ||
279.84204266, | ||
-713.29342518, | ||
889.3378341 , | ||
43.6966435 , | ||
-170.67612002, | ||
-470.88877579, | ||
548.46737887, | ||
-87.69933557, | ||
136.86789774, | ||
-962.42039913, | ||
235.27099415, | ||
224.19144544, | ||
233.86799375, | ||
887.49615703, | ||
363.64059821, | ||
-280.98419885, | ||
-125.9360924 , | ||
395.26239185, | ||
-879.54905674, | ||
333.53343089, | ||
341.27573924, | ||
-579.23487785, | ||
-742.14740469, | ||
-369.14329815, | ||
-272.57845811, | ||
140.39354084, | ||
-122.79697308, | ||
976.74767612, | ||
-795.9103785 , | ||
-582.24648781, | ||
-677.38096423, | ||
306.21665093, | ||
-493.41679492, | ||
-67.37845429, | ||
-511.148816 , | ||
-682.06083271, | ||
-779.24971767, | ||
312.65917893, | ||
-723.6340973 | ||
}; | ||
|
||
|
||
const double Y[K * N] = { | ||
-606.83527664, | ||
-262.54965868, | ||
641.9864597 , | ||
-805.79744841, | ||
675.889815 , | ||
-807.80318421, | ||
952.91893003, | ||
-62.6975967 , | ||
953.52217638, | ||
209.69103949, | ||
478.5271588 , | ||
-921.62441549, | ||
-434.38607485, | ||
-759.60687757, | ||
-407.71960496, | ||
-762.54456209, | ||
-364.03364121, | ||
-171.47401097, | ||
-871.7050073 , | ||
384.94423874, | ||
133.20290841, | ||
-469.22101812, | ||
46.49610693, | ||
-812.11897848, | ||
151.89299111, | ||
858.59239515, | ||
-362.8620951 , | ||
334.82075993, | ||
-736.40427519, | ||
432.65440824, | ||
-421.18781411, | ||
-633.61727599, | ||
173.02586962, | ||
-959.78490763, | ||
657.88005843, | ||
-990.60904761, | ||
355.63307359, | ||
-459.98405362, | ||
470.38804425, | ||
924.37709023, | ||
-502.49371296, | ||
152.31466884, | ||
184.08386254, | ||
144.50381158, | ||
-553.83673472, | ||
905.49802303, | ||
-105.74924276, | ||
692.81734494, | ||
398.95855064, | ||
-405.12609829, | ||
627.5956394 , | ||
-206.98851831, | ||
762.20639422, | ||
162.54574527, | ||
763.47072371, | ||
385.06318016, | ||
450.50855964, | ||
2.64876385, | ||
912.16726945, | ||
287.98039846, | ||
-152.28990288, | ||
212.78642826, | ||
-961.61360338, | ||
-396.85036665 | ||
}; | ||
|
||
|
||
const double G[M * N] = { | ||
4.20101194e+05, | ||
3.50083946e+05, | ||
6.46538181e+05, | ||
7.87611951e+04, | ||
-5.90066796e+05, | ||
1.75051891e+04, | ||
-1.06214482e+06, | ||
-8.56097454e+05, | ||
-2.10708718e+06, | ||
1.63457337e+03, | ||
-1.09121328e+06, | ||
-3.60471830e+05, | ||
-1.87997154e+05, | ||
-4.27787358e+05, | ||
1.09454317e+06, | ||
3.22031941e+05, | ||
1.21350670e+06, | ||
1.36502415e+05, | ||
1.52156393e+05, | ||
-6.22301317e+04, | ||
-1.54349419e+06, | ||
5.37282592e+05, | ||
-1.68277900e+06, | ||
-3.20747360e+05, | ||
1.27742014e+06, | ||
1.26930043e+06, | ||
1.44955858e+05, | ||
5.67935087e+05, | ||
-1.71986709e+06, | ||
9.89135329e+05, | ||
-2.09766800e+06, | ||
-6.26582774e+05, | ||
1.00898615e+06, | ||
-1.19381322e+05, | ||
1.84200115e+06, | ||
-6.14448878e+05, | ||
1.51101302e+05, | ||
8.50535943e+05, | ||
1.25405040e+05, | ||
1.10035520e+06, | ||
-9.06271345e+05, | ||
-8.11028383e+05, | ||
-1.02428859e+06, | ||
-1.74769700e+05, | ||
1.66273819e+05, | ||
-3.39830959e+05, | ||
5.64655615e+05, | ||
9.66178163e+05, | ||
-7.41759606e+04, | ||
-9.14352682e+05, | ||
-9.32476120e+05, | ||
-5.95542872e+05, | ||
4.25567604e+05, | ||
-1.02251944e+06, | ||
5.68088238e+05, | ||
5.90867503e+05, | ||
-6.37130208e+05, | ||
-2.03819589e+05, | ||
-8.51329047e+05, | ||
3.00857968e+05, | ||
1.32626599e+06, | ||
-5.57125464e+05, | ||
1.40126649e+06, | ||
-2.71458462e+04 | ||
}; | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#pragma once | ||
|
||
#define M 8 | ||
#define K 8 | ||
#define N 8 | ||
|
||
extern const double X[M * K]; | ||
extern const double Y[K * N]; | ||
extern const double G[M * N]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#include "data.h" | ||
|
||
#include <snrt.h> | ||
|
||
#include <math.h> | ||
|
||
// Kernel provided via external definition | ||
void matmul(double *x, double *y, double *g); | ||
|
||
int main() { | ||
// Allocate shared local memory | ||
// By avoiding allocators and bumping by a known offset a base pointer | ||
// (snrt_l1_next()) that is the same for all the cores in the cluster, we are | ||
// essentially providing the same memory regions to all the cores in this cluster. | ||
double *local_x = (double *)snrt_l1_next(); | ||
double *local_y = local_x + K * N; | ||
double *local_z = local_y + M * N; | ||
|
||
// Copy data in shared local memory | ||
if (snrt_is_dm_core()) { | ||
snrt_dma_start_1d(local_x, X, M * N * sizeof(double)); | ||
snrt_dma_start_1d(local_y, Y, M * N * sizeof(double)); | ||
} | ||
|
||
snrt_cluster_hw_barrier(); | ||
|
||
// Launch kernel: from this point on only core 0 is required to be alive. | ||
int thiscore = snrt_cluster_core_idx(); | ||
if (thiscore != 0) return 0; | ||
|
||
(void)snrt_mcycle(); | ||
matmul(local_x, local_y, local_z); | ||
(void)snrt_mcycle(); | ||
|
||
// Correctness check | ||
int nerr = 0; | ||
for (int i = 0; i < M * N; i++) { | ||
double d = fabs(local_z[i] - G[i]); | ||
nerr += !(d <= 1E-2f); // Make sure to take into account NaNs (e.g.: happy path | ||
// on the taken branch) | ||
} | ||
return nerr; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters