diff --git a/kernels/fill/16x16xf64/Makefile b/kernels/fill/16x16xf64/Makefile new file mode 100644 index 00000000..26ef0923 --- /dev/null +++ b/kernels/fill/16x16xf64/Makefile @@ -0,0 +1,9 @@ +.DEFAULT_GOAL := all + +include ../../../snitch/Makefile.rules + +TESTS = +TESTS += baseline.x +TESTS += linalg.x + +include ../../Makefile.kernels diff --git a/kernels/fill/16x16xf64/baseline.c b/kernels/fill/16x16xf64/baseline.c new file mode 100644 index 00000000..3be19b6f --- /dev/null +++ b/kernels/fill/16x16xf64/baseline.c @@ -0,0 +1,14 @@ +#include "data.h" + +#include + +#include + +void fill(const double x, double* y) { + for (uint32_t i = 0; i < M; ++i) { + for (uint32_t j = 0; j < N; ++j) { + // row-major accesses + y[i * N + j] = x; + } + } +} diff --git a/kernels/fill/16x16xf64/baseline.csv b/kernels/fill/16x16xf64/baseline.csv new file mode 100644 index 00000000..5b0cffbc --- /dev/null +++ b/kernels/fill/16x16xf64/baseline.csv @@ -0,0 +1 @@ +370 diff --git a/kernels/fill/16x16xf64/cycles.csv b/kernels/fill/16x16xf64/cycles.csv new file mode 100644 index 00000000..9b648834 --- /dev/null +++ b/kernels/fill/16x16xf64/cycles.csv @@ -0,0 +1,2 @@ +baseline,370 +linalg,37 diff --git a/kernels/fill/16x16xf64/data.c b/kernels/fill/16x16xf64/data.c new file mode 100644 index 00000000..7f20063f --- /dev/null +++ b/kernels/fill/16x16xf64/data.c @@ -0,0 +1,264 @@ +#define M 16 +#define N 16 + +const double X = 4.0; + +const double Y[M * N] = { +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., + 4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., + 4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., + 4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., + 4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., + 4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., + 4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., + 4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., + 4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., + 4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., + 4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., + 4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., + 4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., + 4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., +4., + 4., +4., +4., +4. +}; + diff --git a/kernels/fill/16x16xf64/data.h b/kernels/fill/16x16xf64/data.h new file mode 100644 index 00000000..d7fe28a2 --- /dev/null +++ b/kernels/fill/16x16xf64/data.h @@ -0,0 +1,7 @@ +#pragma once + +#define M 16 +#define N 16 + +extern const double X; +extern const double Y[M * N]; diff --git a/kernels/fill/16x16xf64/linalg.csv b/kernels/fill/16x16xf64/linalg.csv new file mode 100644 index 00000000..5b0cffbc --- /dev/null +++ b/kernels/fill/16x16xf64/linalg.csv @@ -0,0 +1 @@ +370 diff --git a/kernels/fill/16x16xf64/linalg.mlir b/kernels/fill/16x16xf64/linalg.mlir new file mode 100644 index 00000000..f3586edc --- /dev/null +++ b/kernels/fill/16x16xf64/linalg.mlir @@ -0,0 +1,6 @@ + +func.func public @fill(%X: f64, + %Y: memref<16x16xf64>) -> () { + linalg.fill ins(%X : f64) outs(%Y : memref<16x16xf64>) -> () + return +} diff --git a/kernels/fill/16x16xf64/main.c b/kernels/fill/16x16xf64/main.c new file mode 100644 index 00000000..bf5023c2 --- /dev/null +++ b/kernels/fill/16x16xf64/main.c @@ -0,0 +1,36 @@ +#include "data.h" + +#include + +#include + +// Kernel provided via external definition +void fill(double x, double *y); + +int main() { + // Allocate shared local memory + // By avoiding allocators and bumping by a known offset a base pointer + // (snrt_l1_next()) that is the same for all the cores in the cluster, we are + // essentially providing the same memory regions to all the cores in this cluster. + double local_x = X; + double *local_y = (double *)snrt_l1_next(); + + snrt_cluster_hw_barrier(); + + // Launch kernel: from this point on only core 0 is required to be alive. + int thiscore = snrt_cluster_core_idx(); + if (thiscore != 0) return 0; + + (void)snrt_mcycle(); + fill(local_x, local_y); + (void)snrt_mcycle(); + + // Correctness check + int nerr = 0; + for (int i = 0; i < M * N; i++) { + double d = fabs(local_y[i] - Y[i]); + nerr += !(d <= 1E-2f); // Make sure to take into account NaNs (e.g.: happy path + // on the taken branch) + } + return nerr; +} diff --git a/kernels/fill/gendata.py b/kernels/fill/gendata.py new file mode 100644 index 00000000..2846452f --- /dev/null +++ b/kernels/fill/gendata.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 + +import numpy as np +import argparse +import sys + + +C_TYPES = { + "32": "float", + "64": "double", +} + +NUMPY_TYPES = { + "32": np.single, + "64": np.double, +} + +MLIR_TYPES = { + "32": "f32", + "64": "f64", +} + +MEMREF_GLOBAL = """ +memref.global constant @{symbol} : memref<{shape}x{type}> = dense<[ +{initializer} +]> +""" + + +ARRAY_GLOBAL = """ +const {type} {symbol}[{shape}] = {{ +{initializer} +}}; +""" + + +def array_to_memref_initializer(array: np.array): + return ",\n".join(f" {np.array2string(row, separator=', ')}" for row in array) + + +def array_to_memref(array: np.array, precision: int, shape=None, symbol=None): + return MEMREF_GLOBAL.format( + symbol=symbol or "array", + type=MLIR_TYPES[str(precision)], + shape=shape or "x".join(str(dim) for dim in array.shape), + initializer=array_to_memref_initializer(array), + ) + + +def array_to_c_initializer(array: np.array): + return np.array2string(array.flatten(), separator=",\n").strip(" []") + + +def array_to_c(array: np.array, *, precision: int, shape=None, symbol=None): + return ARRAY_GLOBAL.format( + symbol=symbol or "array", + type=C_TYPES[str(precision)], + shape=shape or "*".join(str(dim) for dim in array.shape), + initializer=array_to_c_initializer(array), + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + prog="gendata.py", + description="Generate literal initializers for a fictional BLAS matmul " + "(matrix-matrix single precision multiplication) on 2d memrefs", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "-r", + "--range", + type=float, + nargs=2, + default=(-1000.0, 1000.0), + help="uniform distribution range", + ) + parser.add_argument("-m", "--rows", type=int, default=16, help="number of rows") + parser.add_argument( + "-n", "--columns", type=int, default=16, help="number of columns" + ) + parser.add_argument( + "-k", + "--inner-dimension", + type=int, + default=16, + help="size of inner dimension", + ) + parser.add_argument( + "--format", default="c", choices=["mlir", "c"], help="output format" + ) + parser.add_argument( + "--precision", + type=int, + default=64, + choices=[32, 64], + help="floating-point precision to use", + ) + args = parser.parse_args() + + rmin, rmax = args.range + m = args.rows + n = args.columns + + val = 4.0 # chosen by fair dice roll. guaranteed to be random + + y = np.empty((m, n)) + y.fill(val) + + printopts = {"linewidth": None, "threshold": sys.maxsize} + if args.format == "c": + fmt = array_to_c + print(f"#define M {m}") + print(f"#define N {n}") + printopts["formatter"] = {"double ": lambda x: f"{x:+}f"} + else: + assert args.format == "mlir" + fmt = array_to_memref + printopts["sign"] = "+" + np.set_printoptions(**printopts) + print(fmt(y, shape="M * N", precision=args.precision, symbol="Y")) diff --git a/xdsl b/xdsl index bd0de928..6a9d83ca 160000 --- a/xdsl +++ b/xdsl @@ -1 +1 @@ -Subproject commit bd0de9285a21d5ca081c79710fc1d5eda0a289d1 +Subproject commit 6a9d83ca561182afb9135daa5cca5e2bf8397158