opencompl · superlopuh · Nov 6, 2023 · Nov 4, 2023 · Nov 4, 2023
diff --git a/kernels/fill/16x16xf64/Makefile b/kernels/fill/16x16xf64/Makefile
@@ -0,0 +1,9 @@
+.DEFAULT_GOAL := all
+
+include ../../../snitch/Makefile.rules
+
+TESTS =
+TESTS += baseline.x
+TESTS += linalg.x
+
+include ../../Makefile.kernels
diff --git a/kernels/fill/16x16xf64/baseline.c b/kernels/fill/16x16xf64/baseline.c
@@ -0,0 +1,14 @@
+#include "data.h"
+
+#include <snrt.h>
+
+#include <stdint.h>
+
+void fill(const double x, double* y) {
+    for (uint32_t i = 0; i < M; ++i) {
+        for (uint32_t j = 0; j < N; ++j) {
+            // row-major accesses
+            y[i * N + j] = x;
+        }
+    }
+}
diff --git a/kernels/fill/16x16xf64/baseline.csv b/kernels/fill/16x16xf64/baseline.csv
@@ -0,0 +1 @@
+370
diff --git a/kernels/fill/16x16xf64/cycles.csv b/kernels/fill/16x16xf64/cycles.csv
@@ -0,0 +1,2 @@
+baseline,370
+linalg,37
diff --git a/kernels/fill/16x16xf64/data.c b/kernels/fill/16x16xf64/data.c
@@ -0,0 +1,264 @@
+#define M 16
+#define N 16
+
+const double X = 4.0;
+
+const double Y[M * N] = {
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+ 4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+ 4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+ 4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+ 4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+ 4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+ 4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+ 4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+ 4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+ 4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+ 4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+ 4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+ 4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+ 4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+4.,
+ 4.,
+4.,
+4.,
+4.
+};
+
diff --git a/kernels/fill/16x16xf64/data.h b/kernels/fill/16x16xf64/data.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#define M 16
+#define N 16
+
+extern const double X;
+extern const double Y[M * N];
diff --git a/kernels/fill/16x16xf64/linalg.csv b/kernels/fill/16x16xf64/linalg.csv
@@ -0,0 +1 @@
+37
diff --git a/kernels/fill/16x16xf64/linalg.mlir b/kernels/fill/16x16xf64/linalg.mlir
@@ -0,0 +1,6 @@
+
+func.func public @fill(%X: f64,
+                       %Y: tensor<16x16xf64>) -> () {
+  %res = linalg.fill ins(%X : f64) outs(%Y : tensor<16x16xf64>) -> tensor<16x16xf64>
+  return
+}
diff --git a/kernels/fill/16x16xf64/main.c b/kernels/fill/16x16xf64/main.c
@@ -0,0 +1,36 @@
+#include "data.h"
+
+#include <snrt.h>
+
+#include <math.h>
+
+// Kernel provided via external definition
+void fill(double x, double *y);
+
+int main() {
+    // Allocate shared local memory
+    // By avoiding allocators and bumping by a known offset a base pointer
+    // (snrt_l1_next()) that is the same for all the cores in the cluster, we are
+    // essentially providing the same memory regions to all the cores in this cluster.
+    double local_x = X;
+    double *local_y = (double *)snrt_l1_next();
+
+    snrt_cluster_hw_barrier();
+
+    // Launch kernel: from this point on only core 0 is required to be alive.
+    int thiscore = snrt_cluster_core_idx();
+    if (thiscore != 0) return 0;
+
+    (void)snrt_mcycle();
+    fill(local_x, local_y);
+    (void)snrt_mcycle();
+
+    // Correctness check
+    int nerr = 0;
+    for (int i = 0; i < M * N; i++) {
+        double d = fabs(local_y[i] - Y[i]);
+        nerr += !(d <= 1E-2f);  // Make sure to take into account NaNs (e.g.: happy path
+                                // on the taken branch)
+    }
+    return nerr;
+}