Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sparse DMA (COO) Tests for Gemmini #20

Open
wants to merge 45 commits into
base: master
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
6ceb4dd
first attempt
hngenc Jun 25, 2020
87e889a
Add residual layer
hngenc Jul 7, 2020
7c5c005
fixed stride bug in resadd
hngenc Jul 12, 2020
6d8e516
Added matrix-add tests
hngenc Sep 14, 2020
51d04e7
We pass bareMetalC tests on Spike now, but not imagenet tests
hngenc Sep 16, 2020
8a9e4d0
Updated params
hngenc Sep 17, 2020
f6335a8
merged header
Sep 20, 2020
94bf285
merged header
Sep 20, 2020
0017247
merge with A_stride
Sep 22, 2020
05436ac
merging and OS works
Sep 22, 2020
f79e76d
Made resadd use mvin_scale
hngenc Oct 16, 2020
616a7d1
First attempt to add hardware FSM for matmuls
hngenc Oct 23, 2020
25ee327
Added mvin mvout full tests
hngenc Nov 4, 2020
f2faf51
Added explicit gemmini_config_st to all bareMetalC tests
hngenc Nov 5, 2020
4358df6
Added double-buffering
hngenc Nov 9, 2020
c9a3eb3
Merge branch 'fsm' of https://github.com/ucb-bar/gemmini-rocc-tests i…
hngenc Nov 9, 2020
49babf0
Allow OS tiled matmuls to proceed
hngenc Nov 13, 2020
4334b30
Added explicit store configs to baremetal tests
hngenc Nov 14, 2020
0233af8
removed stale files
hngenc Nov 14, 2020
3ab001c
Update gemmini_params.h and other updates
hngenc Nov 15, 2020
6109644
Merge branch 'fsm' of https://github.com/ucb-bar/gemmini-rocc-tests i…
hngenc Nov 15, 2020
13d5835
Merged dev and fsm together
hngenc Nov 15, 2020
bbb7b5e
Updated params to make ACC_SCALE identity function, and removed print…
hngenc Nov 19, 2020
4a38dff
Made mobilenet use the CPU dw conv when convs aren't enabled
hngenc Nov 25, 2020
2df45dc
Add test rules
jerryz123 Nov 23, 2020
d3b703b
Added test for zero stride mvin
hngenc Nov 26, 2020
820799a
Added tranpose tests
hngenc Nov 29, 2020
c92cce8
Reduced size of transpose tests
hngenc Nov 29, 2020
7e87624
Add transpose options to loop_ws command
hngenc Nov 30, 2020
ce9688c
Add FAST versions of select tests
jerryz123 Nov 30, 2020
94e7074
Merge commit '7e87624a05c84862c3bd48e12ebcfaa3dfd04fa0' into ci
jerryz123 Dec 1, 2020
06153b2
Add relu6 back into cpu matmul
hngenc Dec 2, 2020
8af9156
Add full_C option to tiled_matmul
hngenc Dec 2, 2020
3cdddd9
Remove FAST exit(0)
jerryz123 Dec 3, 2020
7d15716
Added fences around pool mvout
hngenc Dec 3, 2020
2973bee
Merge pull request #9 from ucb-bar/ci
hngenc Dec 3, 2020
6fb9a46
Add low_D option to tiled_matmul
hngenc Dec 5, 2020
89f9aef
Fix FAST tests when bias is present
hngenc Dec 7, 2020
4974847
initial tests
Apr 28, 2021
0015b1a
sparse mvin tests, dummy gcn test
May 14, 2021
2e08459
gcn dummy tests
May 14, 2021
9a6f351
merge
May 14, 2021
1fa6bcf
updated params for 16x16 systolic array
May 15, 2021
f59419d
working with segfault protection
May 27, 2021
788c7e3
merging
Jun 7, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add FAST versions of select tests
jerryz123 committed Dec 1, 2020
commit ce9688ccc4205ee0bd6aa8a13e51ed0ddda85f27
40 changes: 36 additions & 4 deletions bareMetalC/conv.c
Original file line number Diff line number Diff line change
@@ -17,13 +17,21 @@
#define PADDING 1
#define STRIDE 2
#else
#define BATCH_SIZE 3
#ifdef FAST
#define IN_DIM 9
#define IN_CHANNELS 5
#define OUT_CHANNELS 7
#else
#define IN_DIM 23
#define IN_CHANNELS 17
#define OUT_CHANNELS 31
#endif

#define BATCH_SIZE 3
#define KERNEL_DIM 3
#define PADDING 1
#define STRIDE 2

#endif

#define NO_BIAS false
@@ -112,16 +120,26 @@ bool vec_is_equal(elem_t * a, elem_t * b, int len) {
}

void init_random(elem_t * buf, int len) {
elem_t i = 0;
for (elem_t * ptr = buf; ptr < buf + len; ptr++) {
// *ptr = (rand() % 32) - 16;
*ptr = (rand() % 5) - 2;
#ifdef FAST
*ptr = 1;
#else
*ptr = (rand() % 5) - 2;
#endif
}
}

void init_random_acc(acc_t * buf, int len) {
elem_t i = 0;
for (acc_t * ptr = buf; ptr < buf + len; ptr++) {
// *ptr = (rand() % 32) - 16;
*ptr = (rand() % 5) - 2;
#ifdef FAST
*ptr = 1;
#else
*ptr = (rand() % 5) - 2;
#endif
}
}

@@ -164,6 +182,7 @@ int main() {

printf("CPU conv...\n");
uint64_t start_cpu = read_cycles();
#ifndef FAST
conv(BATCH_SIZE, IN_CHANNELS, IN_DIM,
OUT_CHANNELS, KERNEL_DIM,
OUT_DIM,
@@ -172,6 +191,7 @@ int main() {
weights,
bias,
output);
#endif
uint64_t end_cpu = read_cycles();
printf("CPU conv took %llu cycles\n", end_cpu - start_cpu);

@@ -204,7 +224,20 @@ int main() {

assert(sizeof(output_mat) == sizeof(output));

#ifdef FAST
bool success = true;
for (int orow = 0; orow < BATCH_SIZE * OUT_DIM * OUT_DIM; orow++) {
for (int ocol = 0; ocol < OUT_CHANNELS; ocol++) {
elem_t v = output_mat[orow][ocol];
if (v != 21 && v != 31 && v != 46) {
success = false;
break;
}
}
}
#else
bool success = vec_is_equal(&output[0][0][0][0], &output_mat[0][0], sizeof(output) / sizeof(elem_t));
#endif

if (!success) {
// return 1;
@@ -294,4 +327,3 @@ int main() {

return 0;
}

36 changes: 33 additions & 3 deletions bareMetalC/conv_with_pool.c
Original file line number Diff line number Diff line change
@@ -24,10 +24,17 @@

#else

#define BATCH_SIZE 3
#ifdef FAST
#define IN_DIM 9
#define IN_CHANNELS 5
#define OUT_CHANNELS 7
#else
#define IN_DIM 23
#define IN_CHANNELS 17
#define OUT_CHANNELS 31
#endif

#define BATCH_SIZE 3
#define KERNEL_DIM 3
#define PADDING 1
#define STRIDE 2
@@ -46,7 +53,7 @@

#define POOL_OUT_DIM ((OUT_DIM + 2*POOL_PADDING - POOL_SIZE) / POOL_STRIDE + 1)

#define NO_POOL false
#define NO_POOL false

#if NO_POOL == true && !(POOL_SIZE == 1 && POOL_STRIDE == 1 && POOL_PADDING == 0)
#error NO_POOL is not set correctly
@@ -274,16 +281,26 @@ bool vec_is_equal(elem_t * a, elem_t * b, int len) {
}

void init_random(elem_t * buf, int len) {
elem_t i = 0;
for (elem_t * ptr = buf; ptr < buf + len; ptr++) {
// *ptr = (rand() % 32) - 16;
#ifdef FAST
*ptr = 1;
#else
*ptr = (rand() % 5) - 2;
#endif
}
}

void init_random_acc(acc_t * buf, int len) {
elem_t i = 0;
for (acc_t * ptr = buf; ptr < buf + len; ptr++) {
// *ptr = (rand() % 32) - 16;
#ifdef FAST
*ptr = 1;
#else
*ptr = (rand() % 5) - 2;
#endif
}
}

@@ -327,6 +344,7 @@ int main() {
else
init_random_acc(&bias[0], sizeof(bias) / sizeof(acc_t));

#ifndef FAST
printf("CPU conv...\n");
uint64_t start_cpu = read_cycles();
conv(BATCH_SIZE, IN_CHANNELS, IN_DIM,
@@ -350,6 +368,7 @@ int main() {
printf("CPU pool took %llu cycles\n", end_cpu_pool - start_cpu_pool);

printf("CPU conv+pool took %llu cycles\n", end_cpu_pool - start_cpu_pool + end_cpu - start_cpu);
#endif

static elem_t weights_mat[PATCH_SIZE][OUT_CHANNELS];
static elem_t output_mat[N_PATCHES][OUT_CHANNELS];
@@ -388,7 +407,19 @@ int main() {

assert(sizeof(pool_output_mat) == sizeof(pool_output));

#ifdef FAST
bool success = true;
for (int orow = 0; orow < BATCH_SIZE * POOL_OUT_DIM * POOL_OUT_DIM; orow++) {
for (int ocol = 0; ocol < OUT_CHANNELS; ocol++) {
if (pool_output_mat[orow][ocol] != 46) {
success = false;
break;
}
}
}
#else
bool success = vec_is_equal(&pool_output[0][0][0][0], &pool_output_mat[0][0], sizeof(pool_output) / sizeof(elem_t));
#endif

if (!success) {
// return 1;
@@ -500,4 +531,3 @@ int main() {

return 0;
}

61 changes: 43 additions & 18 deletions bareMetalC/matmul.c
Original file line number Diff line number Diff line change
@@ -12,16 +12,29 @@
#include <time.h>
#include "include/gemmini_testutils.h"

static elem_t ZERO[DIM][DIM];

#ifdef FAST
#define AINIT 2
#define SINIT 4
#define RAND (rand())
#define N 1
#else
#define AINIT 0
#define SINIT 0
#define RAND (rand())
#define N 2
#endif

static elem_t ZERO[DIM][DIM];

void operands(int c, int * a, int * b, int * d) {
*d = c % N;
*b = (c / N) % N;
*a = c / (N*N);
}



void test_os (bool A_transpose, bool B_transpose) {
// Output stationary
printf("Output-stationary\n");
@@ -43,8 +56,8 @@ void test_os (bool A_transpose, bool B_transpose) {
matmul_full_ptr = &matmul_full_AB_transposed;
}

for (int activation = 0; activation <= 2; ++activation) {
for (int shift = 0; shift <= 4; shift += 4) {
for (int activation = AINIT; activation <= 2; ++activation) {
for (int shift = SINIT; shift <= 4; shift += 4) {
// printf("activation: %d, shift: %d\n", activation, shift);

static elem_t A[N][DIM][DIM] row_align(1);
@@ -91,13 +104,19 @@ void test_os (bool A_transpose, bool B_transpose) {
for (size_t n = 0; n < N; ++n) {
for (size_t i = 0; i < DIM; ++i) {
for (size_t j = 0; j < DIM; ++j) {
A[n][i][j] = (rand() % 64) - 32;
B[n][i][j] = (rand() % 64) - 32;
D[n][i][j] = (rand() % 64) - 32;
A[n][i][j] = (RAND % 64) - 32;
B[n][i][j] = (RAND % 64) - 32;
D[n][i][j] = (RAND % 64) - 32;
}
}
}

#ifdef FAST1
for (size_t i = 0; i < DIM; ++i) {
for (size_t j = 0; j < DIM; ++j) {
gold[0][i][j] = 1;
}
}
#else
for (size_t g = 0; g < N*N*N; ++g) {
int a, b, d;
operands(g, &a, &b, &d);
@@ -118,7 +137,7 @@ void test_os (bool A_transpose, bool B_transpose) {
else if (activation == RELU6)
matrelu6(gold[g], gold[g], 1 << relu6_shift);
}

#endif
int A_addr = 0;
int B_addr = N*DIM;
int D_addr = 2*N*DIM;
@@ -222,8 +241,8 @@ void test_ws(bool A_transpose, bool B_transpose) {
return;
}

for (int activation = 0; activation <= 2; ++activation) {
for (int scale = 0; scale <= 4; scale += 4) {
for (int activation = AINIT; activation <= 2; ++activation) {
for (int scale = SINIT; scale <= 4; scale += 4) {
static elem_t A[N][DIM][DIM] row_align(1);
static elem_t B[N][DIM][DIM] row_align(1);
static elem_t D[N][DIM][DIM] row_align(1);
@@ -238,17 +257,17 @@ void test_ws(bool A_transpose, bool B_transpose) {
// ...taking into account whether we preload new weights or re-use the old ones
static int preload[N*N*N] = {1};
for (int i = 1; i < N*N*N; ++i)
preload[i] = rand() % 2;
preload[i] = RAND % 2;

// ...whether we pass in a D or just use zeros
static int add_to_zeros[N*N*N];
for (int i = 0; i < N*N*N; ++i)
add_to_zeros[i] = rand() % 2;
add_to_zeros[i] = RAND % 2;

// ...and whether we accumulate on top of the previous result
static int accumulate[N*N*N] = {0};
for (int i = 1; i < N*N*N; ++i)
accumulate[i] = rand() % 2;
accumulate[i] = RAND % 2;

static int no_output[N*N*N];
for (int i = 0; i < N*N*N-1; ++i)
@@ -276,13 +295,19 @@ void test_ws(bool A_transpose, bool B_transpose) {
for (size_t n = 0; n < N; ++n) {
for (size_t i = 0; i < DIM; ++i) {
for (size_t j = 0; j < DIM; ++j) {
A[n][i][j] = (rand() % 64) - 32;
B[n][i][j] = (rand() % 64) - 32;
D[n][i][j] = (rand() % 64) - 32;
A[n][i][j] = (RAND % 64) - 32;
B[n][i][j] = (RAND % 64) - 32;
D[n][i][j] = (RAND % 64) - 32;
}
}
}

#ifdef FAST1
for (size_t i = 0; i < DIM; ++i) {
for (size_t j = 0; j < DIM; ++j) {
gold[0][i][j] = 64;
}
}
#else
for (size_t g = 0; g < N*N*N; ++g) {
int a, b, d;
operands(g, &a, &b, &d);
@@ -313,7 +338,7 @@ void test_ws(bool A_transpose, bool B_transpose) {
else if (activation == RELU6)
matrelu6(gold[g], gold[g], 1 << relu6_shift);
}

#endif
uint32_t A_addr = 0;
uint32_t B_addr = N*DIM;
uint32_t D_addr = 2*N*DIM;
14 changes: 11 additions & 3 deletions bareMetalC/matmul_os.c
Original file line number Diff line number Diff line change
@@ -11,7 +11,15 @@
#include <time.h>
#include "include/gemmini_testutils.h"

#define N (2)
#ifdef FAST
#define AINIT 2
#define SINIT 12
#define N 1
#else
#define AINIT 0
#define SINIT 0
#define N 2
#endif

void operands(int c, int * a, int * b, int * d) {
*d = c % N;
@@ -37,8 +45,8 @@ int main() {

static elem_t ZERO[DIM][DIM];

for (int activation = 0; activation <= 2; ++activation) {
for (int shift = 0; shift <= 12; shift += 4) {
for (int activation = AINIT; activation <= 2; ++activation) {
for (int shift = SINIT; shift <= 12; shift += 4) {
// printf("activation: %d, shift: %d\n", activation, shift);

static elem_t A[N][DIM][DIM] row_align(1);
16 changes: 13 additions & 3 deletions bareMetalC/matmul_ws.c
Original file line number Diff line number Diff line change
@@ -11,7 +11,17 @@
#include <time.h>
#include "include/gemmini_testutils.h"

#define N (2)

#ifdef FAST
#define AINIT 2
#define SINIT 12
#define N 1
#else
#define AINIT 0
#define SINIT 0
#define N 2
#endif


void operands(int c, int * a, int * b, int * d) {
*d = c % N;
@@ -37,11 +47,11 @@ int main() {
gemmini_config_ld(DIM * sizeof(elem_t));
gemmini_config_st(DIM * sizeof(elem_t));

for (int activation = 0; activation <= 2; ++activation) {
for (int activation = AINIT; activation <= 2; ++activation) {
#ifdef ACC_SCALE_T_IS_FLOAT
for (acc_scale_t scale = 0; scale <= 1.5; scale += 0.5) {
#else
for (acc_scale_t scale = 0; scale <= 12; scale += 4) {
for (acc_scale_t scale = SINIT; scale <= 12; scale += 4) {
#endif
static elem_t A[N][DIM][DIM] row_align(1);
static elem_t B[N][DIM][DIM] row_align(1);
Loading