From fc4a53c555a676ed631e4874215945cc8ca620ec Mon Sep 17 00:00:00 2001 From: Navaneeth-Kunhi Purayil Date: Fri, 17 Jan 2025 19:14:55 +0100 Subject: [PATCH] [spatz_vrf] experimenting with normal VRF layout instead of barber pole Having a normal layout means that the conflict occuring at the VRF are structural, on the other hand for barber pole, depending on the registers used by instructions, there may or may not be conflicts. But since we add buffers on FPU and VLSU1, we can afford to have a normal layout and the conflict happens only initially as can be seen for the dotp kernel Performance of kernels (4k , 32k): 1) axpy_4096 : 52.0 % / 56.0 % 2) dotp_4096 : 75.6 % / 96.1 % 3) fmatmul_64x64x64 : 97.8 % / 97.8 % --- hw/ip/spatz/src/spatz_vrf.sv | 2 +- sw/spatzBenchmarks/dp-fdotp/main.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/hw/ip/spatz/src/spatz_vrf.sv b/hw/ip/spatz/src/spatz_vrf.sv index 4c132a06..a3773f4f 100644 --- a/hw/ip/spatz/src/spatz_vrf.sv +++ b/hw/ip/spatz/src/spatz_vrf.sv @@ -53,7 +53,7 @@ module spatz_vrf automatic logic [1:0] vreg8 = addr[$clog2(8*NrWordsPerVector) +: 2]; // Barber's pole. Advance the starting bank of each vector by one every eight vector registers. - f_bank = addr[$clog2(NrVRFBanks)-1:0] + vreg8; + f_bank = addr[$clog2(NrVRFBanks)-1:0]; endfunction: f_bank ///////////// diff --git a/sw/spatzBenchmarks/dp-fdotp/main.c b/sw/spatzBenchmarks/dp-fdotp/main.c index 1d291718..639000c9 100644 --- a/sw/spatzBenchmarks/dp-fdotp/main.c +++ b/sw/spatzBenchmarks/dp-fdotp/main.c @@ -94,6 +94,10 @@ int main() { result[0] = acc; } + // End timer and check if new best runtime + if (cid == 0) + timer = benchmark_get_cycle() - timer; + // Wait for all cores to finish snrt_cluster_hw_barrier(); @@ -101,10 +105,6 @@ int main() { if (cid == 0) stop_kernel(); - // End timer and check if new best runtime - if (cid == 0) - timer = benchmark_get_cycle() - timer; - // Check and display results if (cid == 0) { long unsigned int performance = 1000 * 2 * dotp_l.M / timer;