From 54b739fb80f60e0ec5738c4b7807a2487ff9282b Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Sat, 17 Feb 2024 01:53:09 +0100 Subject: [PATCH] Furhter fix SW lint. --- .github/workflows/lint.yml | 3 +- sw/archi_redmule.h | 37 +++++----- sw/hal_redmule.h | 64 +++++------------ sw/redmule.c | 25 +++---- sw/redmule_complex.c | 32 ++++----- sw/utils/redmule_utils.h | 144 +++++++++++++++++++------------------ 6 files changed, 138 insertions(+), 167 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 1bd7184..9cad719 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -65,9 +65,10 @@ jobs: ColumnLimit: 100, AlignEscapedNewlines: DontAlign, SortIncludes: false, - AllowShortFunctionsOnASingleLine: None, + AllowShortFunctionsOnASingleLine: true, AllowShortIfStatementsOnASingleLine: true, AllowShortLoopsOnASingleLine: true } exclude: | ./sw/inc/* + ./sw/utils/tinyprintf.h diff --git a/sw/archi_redmule.h b/sw/archi_redmule.h index 9e74220..b8fdbd6 100644 --- a/sw/archi_redmule.h +++ b/sw/archi_redmule.h @@ -49,29 +49,28 @@ #define ARCHI_CL_EVT_ACC0 0 #define ARCHI_CL_EVT_ACC1 1 -#define __builtin_bitinsert(a,b,c,d) (a | (((b << (32-c)) >> (32-c)) << d)) // RedMulE architecture -#define ADDR_WIDTH 32 -#define DATA_WIDTH 512 -#define REDMULE_FMT 16 +#define ADDR_WIDTH 32 +#define DATA_WIDTH 512 +#define REDMULE_FMT 16 #define ARRAY_HEIGHT 8 -#define PIPE_REGS 3 -#define ARRAY_WIDTH 24 /* Superior limit is ARRAY_HEIGHT*PIPE_REGS */ +#define PIPE_REGS 3 +#define ARRAY_WIDTH 24 /* Superior limit is ARRAY_HEIGHT*PIPE_REGS */ // Base address #define REDMULE_BASE_ADD 0x00100000 // Commands -#define REDMULE_TRIGGER 0x00 -#define REDMULE_ACQUIRE 0x04 -#define REDMULE_FINISHED 0x08 -#define REDMULE_STATUS 0x0C +#define REDMULE_TRIGGER 0x00 +#define REDMULE_ACQUIRE 0x04 +#define REDMULE_FINISHED 0x08 +#define REDMULE_STATUS 0x0C #define REDMULE_RUNNING_JOB 0x10 -#define REDMULE_SOFT_CLEAR 0x14 +#define REDMULE_SOFT_CLEAR 0x14 // Registers -#define REDMULE_REG_OFFS 0x40 +#define REDMULE_REG_OFFS 0x40 #define REDMULE_REG_X_PTR 0x00 #define REDMULE_REG_W_PTR 0x04 #define REDMULE_REG_Z_PTR 0x08 @@ -81,7 +80,7 @@ // OPs definition #define MATMUL 0x0 -#define GEMM 0x1 +#define GEMM 0x1 #define ADDMAX 0x2 #define ADDMIN 0x3 #define MULMAX 0x4 @@ -90,15 +89,15 @@ #define MINMAX 0x7 // GEMM formats -#define Float8 0x0 -#define Float16 0x1 -#define Float8Alt 0x2 +#define Float8 0x0 +#define Float16 0x1 +#define Float8Alt 0x2 #define Float16Alt 0x3 // FP Formats encoding -#define FP16 0x2 -#define FP8 0x3 +#define FP16 0x2 +#define FP8 0x3 #define FP16ALT 0x4 -#define FP8ALT 0x5 +#define FP8ALT 0x5 #endif diff --git a/sw/hal_redmule.h b/sw/hal_redmule.h index a552480..5cc8e09 100644 --- a/sw/hal_redmule.h +++ b/sw/hal_redmule.h @@ -10,92 +10,66 @@ #include "tensor_dim.h" -/* - * - * For control, generic configuration register layout, - * and job-dependent register map, look at redmule_archi.h - * - */ - /* LOW-LEVEL HAL */ #define REDMULE_ADDR_BASE REDMULE_BASE_ADD #define REDMULE_ADDR_SPACE 0x00000100 -// For all the following functions we use __builtin_pulp_OffsetedWrite and __builtin_pulp_OffsetedRead -// instead of classic load/store because otherwise the compiler is not able to correctly factorize -// the HWPE base in case several accesses are done, ending up with twice more code - #define HWPE_WRITE(value, offset) *(int *)(REDMULE_ADDR_BASE + offset) = value #define HWPE_READ(offset) *(int *)(REDMULE_ADDR_BASE + offset) -static inline void redmule_x_add_set (unsigned int value) { +static inline void redmule_x_add_set(unsigned int value) { HWPE_WRITE(value, REDMULE_REG_OFFS + REDMULE_REG_X_PTR); } -static inline void redmule_w_add_set (unsigned int value) { +static inline void redmule_w_add_set(unsigned int value) { HWPE_WRITE(value, REDMULE_REG_OFFS + REDMULE_REG_W_PTR); } -static inline void redmule_z_add_set (unsigned int value) { +static inline void redmule_z_add_set(unsigned int value) { HWPE_WRITE(value, REDMULE_REG_OFFS + REDMULE_REG_Z_PTR); } -static inline void redmule_mcfg_set (uint32_t mcfg0, uint32_t mcfg1) { +static inline void redmule_mcfg_set(uint32_t mcfg0, uint32_t mcfg1) { HWPE_WRITE(mcfg0, REDMULE_REG_OFFS + REDMULE_MCFG0_PTR); HWPE_WRITE(mcfg1, REDMULE_REG_OFFS + REDMULE_MCFG1_PTR); } -static inline void redmule_arith_set (uint32_t arith) { +static inline void redmule_arith_set(uint32_t arith) { HWPE_WRITE(arith, REDMULE_REG_OFFS + REDMULE_ARITH_PTR); } -static inline void hwpe_trigger_job() { - HWPE_WRITE(0, REDMULE_TRIGGER); -} +static inline void hwpe_trigger_job() { HWPE_WRITE(0, REDMULE_TRIGGER); } -static inline int hwpe_acquire_job() { - return HWPE_READ(REDMULE_ACQUIRE); -} +static inline int hwpe_acquire_job() { return HWPE_READ(REDMULE_ACQUIRE); } -static inline unsigned int hwpe_get_status() { - return HWPE_READ(REDMULE_STATUS); -} +static inline unsigned int hwpe_get_status() { return HWPE_READ(REDMULE_STATUS); } static inline void hwpe_soft_clear() { volatile int i; HWPE_WRITE(0, REDMULE_SOFT_CLEAR); } -static inline void hwpe_cg_enable() { - return; -} +static inline void hwpe_cg_enable() { return; } -static inline void hwpe_cg_disable() { - return; -} +static inline void hwpe_cg_disable() { return; } -void redmule_cfg (unsigned int x, unsigned int w, unsigned int z, - uint16_t m_size, uint16_t n_size, uint16_t k_size, - uint8_t gemm_op, uint8_t gemm_fmt){ +void redmule_cfg(unsigned int x, unsigned int w, unsigned int z, uint16_t m_size, uint16_t n_size, + uint16_t k_size, uint8_t gemm_op, uint8_t gemm_fmt) { uint32_t mcfg_reg0 = 0; uint32_t mcfg_reg1 = 0; uint32_t arith_reg = 0; - mcfg_reg0 = (k_size << 16) | - (m_size << 0); + mcfg_reg0 = (k_size << 16) | (m_size << 0); mcfg_reg1 = n_size << 0; - arith_reg = (gemm_op << 10) | - (gemm_fmt << 7); + arith_reg = (gemm_op << 10) | (gemm_fmt << 7); - redmule_x_add_set ((unsigned int) x); - redmule_w_add_set ((unsigned int) w); - redmule_z_add_set ((unsigned int) z); - redmule_mcfg_set ((unsigned int) mcfg_reg0, - (unsigned int) mcfg_reg1); - redmule_arith_set ((unsigned int) arith_reg); - + redmule_x_add_set((unsigned int)x); + redmule_w_add_set((unsigned int)w); + redmule_z_add_set((unsigned int)z); + redmule_mcfg_set((unsigned int)mcfg_reg0, (unsigned int)mcfg_reg1); + redmule_arith_set((unsigned int)arith_reg); } #endif diff --git a/sw/redmule.c b/sw/redmule.c index a9c57a6..68e6b26 100644 --- a/sw/redmule.c +++ b/sw/redmule.c @@ -29,8 +29,8 @@ int main() { volatile int errors = 0; int gold_sum = 0, check_sum = 0; - int i,j; - + int i, j; + int offload_id_tmp, offload_id; // Enable RedMulE @@ -38,29 +38,26 @@ int main() { hwpe_soft_clear(); - while( ( offload_id_tmp = hwpe_acquire_job() ) < 0); - - redmule_cfg ((unsigned int) x, - (unsigned int) w, - (unsigned int) y, - m_size, n_size, k_size, - (uint8_t) GEMM, - (uint8_t) Float16); + while ((offload_id_tmp = hwpe_acquire_job()) < 0) + ; + + redmule_cfg((unsigned int)x, (unsigned int)w, (unsigned int)y, m_size, n_size, k_size, + (uint8_t)GEMM, (uint8_t)Float16); // Start RedMulE operation hwpe_trigger_job(); // Wait for end of computation - asm volatile ("wfi" ::: "memory"); + asm volatile("wfi" ::: "memory"); // Disable RedMulE hwpe_cg_disable(); - errors = redmule16_compare_int(y, golden, m_size*k_size/2); + errors = redmule16_compare_int(y, golden, m_size * k_size / 2); - *(int *) 0x80000000 = errors; + *(int *)0x80000000 = errors; - tfp_printf ("Terminated test with %d errors. See you!\n", errors); + tfp_printf("Terminated test with %d errors. See you!\n", errors); return errors; } diff --git a/sw/redmule_complex.c b/sw/redmule_complex.c index bce6c60..f70cb6c 100644 --- a/sw/redmule_complex.c +++ b/sw/redmule_complex.c @@ -26,16 +26,16 @@ int main() { uint16_t n_size = N_SIZE; uint16_t k_size = K_SIZE; - uint32_t x_addr = *(uint32_t *) &x; - uint32_t w_addr = *(uint32_t *) &w; - uint32_t y_addr = *(uint32_t *) &y; + uint32_t x_addr = *(uint32_t *)&x; + uint32_t w_addr = *(uint32_t *)&w; + uint32_t y_addr = *(uint32_t *)&y; uint32_t cfg_reg0 = ((k_size << 16) | (m_size << 0)); uint32_t cfg_reg1 = (n_size << 0); - asm volatile ("addi t0, %0, 0" :: "r"(x_addr)); - asm volatile ("addi t1, %0, 0" :: "r"(w_addr)); - asm volatile ("addi t2, %0, 0" :: "r"(y_addr)); - asm volatile ("addi t3, %0, 0" :: "r"(cfg_reg0)); - asm volatile ("addi t4, %0, 0" :: "r"(cfg_reg1)); + asm volatile("addi t0, %0, 0" ::"r"(x_addr)); + asm volatile("addi t1, %0, 0" ::"r"(w_addr)); + asm volatile("addi t2, %0, 0" ::"r"(y_addr)); + asm volatile("addi t3, %0, 0" ::"r"(cfg_reg0)); + asm volatile("addi t4, %0, 0" ::"r"(cfg_reg1)); /* mcnfig instruction */ // asm volatile( @@ -44,9 +44,8 @@ int main() { // (0b11100 << 15) | \ /* Rs1 */ // (0x00 << 7) | \ /* Empty */ // (0b0001011 << 0) \n"); /* OpCode */ - - asm volatile( - ".word (0x0 << 25) | \ + + asm volatile(".word (0x0 << 25) | \ (0b11101 << 20) | \ (0b11100 << 15) | \ (0x00 << 7) | \ @@ -66,8 +65,7 @@ int main() { // (0b001 << 7) | \ /* Data format */ // (0b0101011 << 0) \n"); /* OpCode */ - asm volatile( - ".word (0b00111 << 27) | \ + asm volatile(".word (0b00111 << 27) | \ (0b00 << 25) | \ (0b00110 << 20) | \ (0b00101 << 15) | \ @@ -78,13 +76,13 @@ int main() { (0b0101011 << 0) \n"); // Wait for end of computation - asm volatile ("wfi" ::: "memory"); + asm volatile("wfi" ::: "memory"); - errors = redmule16_compare_int(y, golden, m_size*k_size/2); + errors = redmule16_compare_int(y, golden, m_size * k_size / 2); - *(int *) 0x80000000 = errors; + *(int *)0x80000000 = errors; - tfp_printf ("Terminated test with %d errors. See you!\n", errors); + tfp_printf("Terminated test with %d errors. See you!\n", errors); return errors; } diff --git a/sw/utils/redmule_utils.h b/sw/utils/redmule_utils.h index 9a8469f..b1edf2a 100644 --- a/sw/utils/redmule_utils.h +++ b/sw/utils/redmule_utils.h @@ -10,8 +10,9 @@ #ifndef REDMULE_UTILS_H #define REDMULE_UTILS_H +#define ERR 0x0011 + int redmule16_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { - #define ERR 0x0011 uint32_t actual_word = 0; uint16_t actual_MSHWord, actual_LSHWord; uint32_t golden_word = 0; @@ -22,150 +23,151 @@ int redmule16_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { int errors = 0; int error; - for (int i=0; i golden_LSHWord) ? (actual_LSHWord - golden_LSHWord) : - (actual_LSHWord < golden_LSHWord) ? (golden_LSHWord - actual_LSHWord) : 0; + diff = (actual_LSHWord > golden_LSHWord) ? (actual_LSHWord - golden_LSHWord) + : (actual_LSHWord < golden_LSHWord) ? (golden_LSHWord - actual_LSHWord) + : 0; if (diff > ERR) { error = 1; - #ifdef VERBOSE - tfp_printf ("diff: 0x%08x\n", diff); - tfp_printf ("LSW: Error!\n"); - #endif +#ifdef VERBOSE + tfp_printf("diff: 0x%08x\n", diff); + tfp_printf("LSW: Error!\n"); +#endif } // Checking Most Significant Half-Word actual_MSHWord = (uint16_t)((actual_word >> 16) & 0x0000FFFF); golden_MSHWord = (uint16_t)((golden_word >> 16) & 0x0000FFFF); - diff = (actual_MSHWord > golden_MSHWord) ? (actual_MSHWord - golden_MSHWord) : - (actual_MSHWord < golden_MSHWord) ? (golden_MSHWord - actual_MSHWord) : 0; + diff = (actual_MSHWord > golden_MSHWord) ? (actual_MSHWord - golden_MSHWord) + : (actual_MSHWord < golden_MSHWord) ? (golden_MSHWord - actual_MSHWord) + : 0; if (diff > ERR) { error = 1; - #ifdef VERBOSE - tfp_printf ("diff: 0x%08x\n", diff); - tfp_printf ("MSW: Error!\n"); - #endif +#ifdef VERBOSE + tfp_printf("diff: 0x%08x\n", diff); + tfp_printf("MSW: Error!\n"); +#endif } - + errors += error; - #ifdef DEBUG - tfp_printf(" Golden: 0x%08x; Actual: 0x%08x,\n", golden_word, actual_word); - #endif +#ifdef DEBUG + tfp_printf("Golden: 0x%08x; Actual: 0x%08x,\n", golden_word, actual_word); +#endif - #ifdef VERBOSE - if(error) { - if(errors==1) tfp_printf(" golden <- actual @ address @ index\n"); - tfp_printf(" 0x%08x <- 0x%08x @ 0x%08x @ 0x%08x\n", golden_word, actual_word, (actual_z+i), i*4); +#ifdef VERBOSE + if (error) { + if (errors == 1) tfp_printf(" golden <- actual @ address @ index\n"); + tfp_printf("0x%08x <- 0x%08x @ 0x%08x @ 0x%08x\n", golden_word, actual_word, (actual_z + i), + i * 4); } - #endif +#endif } return errors; } int redmule8_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { - #define ERR 0x0011 uint32_t actual_word = 0; - uint8_t actual_Byte0, - actual_Byte1, - actual_Byte2, - actual_Byte3; + uint8_t actual_Byte0, actual_Byte1, actual_Byte2, actual_Byte3; uint32_t golden_word = 0; - uint8_t golden_Byte0, - golden_Byte1, - golden_Byte2, - golden_Byte3; + uint8_t golden_Byte0, golden_Byte1, golden_Byte2, golden_Byte3; uint32_t actual = 0; uint32_t golden = 0; int errors = 0; int error; - for (int i=0; i golden_Byte0) ? (actual_Byte0 - golden_Byte0) : - (actual_Byte0 < golden_Byte0) ? (golden_Byte0 - actual_Byte0) : 0; + diff = (actual_Byte0 > golden_Byte0) ? (actual_Byte0 - golden_Byte0) + : (actual_Byte0 < golden_Byte0) ? (golden_Byte0 - actual_Byte0) + : 0; if (diff > ERR) { error = 1; - tfp_printf ("diff: 0x%08x\n", diff); - tfp_printf ("Byte0: Error!\n"); + tfp_printf("diff: 0x%08x\n", diff); + tfp_printf("Byte0: Error!\n"); } // Cheching Byte1 - actual_Byte1 = (uint8_t)( (actual_word >> 8 ) & 0x000000FF); - golden_Byte1 = (uint8_t)( (golden_word >> 8 ) & 0x000000FF); + actual_Byte1 = (uint8_t)((actual_word >> 8) & 0x000000FF); + golden_Byte1 = (uint8_t)((golden_word >> 8) & 0x000000FF); - diff = (actual_Byte1 > golden_Byte1) ? (actual_Byte1 - golden_Byte1) : - (actual_Byte1 < golden_Byte1) ? (golden_Byte1 - actual_Byte1) : 0; + diff = (actual_Byte1 > golden_Byte1) ? (actual_Byte1 - golden_Byte1) + : (actual_Byte1 < golden_Byte1) ? (golden_Byte1 - actual_Byte1) + : 0; if (diff > ERR) { error = 1; - tfp_printf ("diff: 0x%08x\n", diff); - tfp_printf ("Byte1: Error!\n"); + tfp_printf("diff: 0x%08x\n", diff); + tfp_printf("Byte1: Error!\n"); } // Cheching Byte2 - actual_Byte2 = (uint8_t)( (actual_word >> 16 ) & 0x000000FF); - golden_Byte2 = (uint8_t)( (golden_word >> 16 ) & 0x000000FF); + actual_Byte2 = (uint8_t)((actual_word >> 16) & 0x000000FF); + golden_Byte2 = (uint8_t)((golden_word >> 16) & 0x000000FF); - diff = (actual_Byte2 > golden_Byte2) ? (actual_Byte2 - golden_Byte2) : - (actual_Byte2 < golden_Byte2) ? (golden_Byte2 - actual_Byte2) : 0; + diff = (actual_Byte2 > golden_Byte2) ? (actual_Byte2 - golden_Byte2) + : (actual_Byte2 < golden_Byte2) ? (golden_Byte2 - actual_Byte2) + : 0; if (diff > ERR) { error = 1; - tfp_printf ("diff: 0x%08x\n", diff); - tfp_printf ("Byte2: Error!\n"); + tfp_printf("diff: 0x%08x\n", diff); + tfp_printf("Byte2: Error!\n"); } // Cheching Byte3 - actual_Byte3 = (uint8_t)( (actual_word >> 24 ) & 0x000000FF); - golden_Byte3 = (uint8_t)( (golden_word >> 24 ) & 0x000000FF); + actual_Byte3 = (uint8_t)((actual_word >> 24) & 0x000000FF); + golden_Byte3 = (uint8_t)((golden_word >> 24) & 0x000000FF); - diff = (actual_Byte3 > golden_Byte3) ? (actual_Byte3 - golden_Byte3) : - (actual_Byte3 < golden_Byte3) ? (golden_Byte3 - actual_Byte3) : 0; + diff = (actual_Byte3 > golden_Byte3) ? (actual_Byte3 - golden_Byte3) + : (actual_Byte3 < golden_Byte3) ? (golden_Byte3 - actual_Byte3) + : 0; if (diff > ERR) { error = 1; - tfp_printf ("diff: 0x%08x\n", diff); - tfp_printf ("Byte3: Error!\n"); + tfp_printf("diff: 0x%08x\n", diff); + tfp_printf("Byte3: Error!\n"); } - + errors += error; - #ifdef DEBUG - tfp_printf(" Golden: 0x%08x; Actual: 0x%08x,\n", golden_word, actual_word); - #endif +#ifdef DEBUG + tfp_printf("Golden: 0x%08x; Actual: 0x%08x,\n", golden_word, actual_word); +#endif - #ifdef VERBOSE - if(error) { - if(errors==1) tfp_printf(" golden <- actual @ address @ index\n"); - tfp_printf(" 0x%08x <- 0x%08x @ 0x%08x @ 0x%08x\n", golden_word, actual_word, (actual_z+i), i*4); - } - #endif +#ifdef VERBOSE + if (error) { + if (errors == 1) tfp_printf(" golden <- actual @ address @ index\n"); + tfp_printf(" 0x%08x <- 0x%08x @ 0x%08x @ 0x%08x\n", golden_word, actual_word, (actual_z + i), + i * 4); + } +#endif } return errors; }