Skip to content

Commit

Permalink
add linalg.fill
Browse files Browse the repository at this point in the history
  • Loading branch information
superlopuh committed Nov 4, 2023
1 parent 2f43517 commit 0a91fd6
Show file tree
Hide file tree
Showing 12 changed files with 463 additions and 1 deletion.
9 changes: 9 additions & 0 deletions kernels/fill/16x16xf64/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
.DEFAULT_GOAL := all

include ../../../snitch/Makefile.rules

TESTS =
TESTS += baseline.x
TESTS += linalg.x

include ../../Makefile.kernels
14 changes: 14 additions & 0 deletions kernels/fill/16x16xf64/baseline.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#include "data.h"

#include <snrt.h>

#include <stdint.h>

void fill(const double x, double* y) {
for (uint32_t i = 0; i < M; ++i) {
for (uint32_t j = 0; j < N; ++j) {
// row-major accesses
y[i * N + j] = x;
}
}
}
1 change: 1 addition & 0 deletions kernels/fill/16x16xf64/baseline.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
370
2 changes: 2 additions & 0 deletions kernels/fill/16x16xf64/cycles.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
baseline,370
linalg,37
264 changes: 264 additions & 0 deletions kernels/fill/16x16xf64/data.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,264 @@
#define M 16
#define N 16

const double X = 4.0;

const double Y[M * N] = {
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.,
4.
};

7 changes: 7 additions & 0 deletions kernels/fill/16x16xf64/data.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#pragma once

#define M 16
#define N 16

extern const double X;
extern const double Y[M * N];
1 change: 1 addition & 0 deletions kernels/fill/16x16xf64/linalg.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
37
6 changes: 6 additions & 0 deletions kernels/fill/16x16xf64/linalg.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

func.func public @fill(%X: f64,
%Y: tensor<16x16xf64>) -> () {
%res = linalg.fill ins(%X : f64) outs(%Y : tensor<16x16xf64>) -> tensor<16x16xf64>
return
}
36 changes: 36 additions & 0 deletions kernels/fill/16x16xf64/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#include "data.h"

#include <snrt.h>

#include <math.h>

// Kernel provided via external definition
void fill(double x, double *y);

int main() {
// Allocate shared local memory
// By avoiding allocators and bumping by a known offset a base pointer
// (snrt_l1_next()) that is the same for all the cores in the cluster, we are
// essentially providing the same memory regions to all the cores in this cluster.
double local_x = X;
double *local_y = (double *)snrt_l1_next();

snrt_cluster_hw_barrier();

// Launch kernel: from this point on only core 0 is required to be alive.
int thiscore = snrt_cluster_core_idx();
if (thiscore != 0) return 0;

(void)snrt_mcycle();
fill(local_x, local_y);
(void)snrt_mcycle();

// Correctness check
int nerr = 0;
for (int i = 0; i < M * N; i++) {
double d = fabs(local_y[i] - Y[i]);
nerr += !(d <= 1E-2f); // Make sure to take into account NaNs (e.g.: happy path
// on the taken branch)
}
return nerr;
}
Loading

0 comments on commit 0a91fd6

Please sign in to comment.