-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.c
46 lines (37 loc) · 1.36 KB
/
main.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#include "data.h"
#include <snrt.h>
#include <math.h>
// Kernel provided via external definition
void saxpy(float a, float *x, float *y, float *z);
int main() {
// Allocate shared local memory
// By avoiding allocators and bumping by a known offset a base pointer
// (snrt_l1_next()) that is the same for all the cores in the cluster, we are
// essentially providing the same memory regions to all the cores in this cluster.
float *local_x = (float *)snrt_l1_next();
float *local_y = local_x + N;
float *local_z = local_y + N;
// Copy data in shared local memory
if (snrt_is_dm_core()) {
snrt_dma_start_1d(local_x, X, N * sizeof(float));
snrt_dma_start_1d(local_y, Y, N * sizeof(float));
snrt_dma_wait_all();
}
snrt_cluster_hw_barrier();
// Launch kernel: from this point on only core 0 is required to be alive.
int thiscore = snrt_cluster_core_idx();
if (thiscore != 0) return 0;
snrt_fpu_fence();
(void)snrt_mcycle();
saxpy(A, local_x, local_y, local_z);
snrt_fpu_fence();
(void)snrt_mcycle();
// Correctness check
int nerr = 0;
for (int i = 0; i < N; i++) {
float d = fabsf(local_z[i] - G[i]);
nerr += !(d <= 1E-2f); // Make sure to take into account NaNs (e.g.: happy path
// on the taken branch)
}
return nerr;
}