diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index fdf6fb6..fd0e04d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -18,18 +18,16 @@ init: stage: init extends: .base script: - - cd golden-model; source setup-py.sh + - cd golden-model; source setup-py.sh; cd .. artifacts: when: always expire_in: 1 week - paths: - - ./golden-model/venv + paths: [./golden-model/venv] .redmule-build-tpl: extends: .base stage: build - dependencies: - - init + needs: [init] script: - SETUP_CONFIG=${SETUP_CONFIG} - source scripts/${SETUP_CONFIG}.sh @@ -38,10 +36,7 @@ init: artifacts: when: always expire_in: 1 week - paths: - - ./.bender - - ./scripts/compile.tcl - - ./vsim/* + paths: [./.bender, ./scripts/compile.tcl, ./vsim/*] redmule-build-hwpe: extends: .redmule-build-tpl @@ -56,6 +51,7 @@ redmule-build-complex: .redmule-vsim-tpl: extends: .base stage: test + needs: [build] script: - SETUP_CONFIG=${SETUP_CONFIG} - source scripts/${SETUP_CONFIG}.sh @@ -67,8 +63,7 @@ redmule-build-complex: hwpe-test: extends: .redmule-vsim-tpl - dependencies: - - redmule-build-hwpe + needs: [init, redmule-build-hwpe] variables: SETUP_CONFIG: "setup-hwpe" parallel: @@ -91,11 +86,24 @@ hwpe-test: - { OP: gemm, M: 23, N: 31, K: 31, FMT: FP16 } - { OP: gemm, M: 24, N: 17, K: 32, FMT: FP16 } - { OP: gemm, M: 24, N: 20, K: 32, FMT: FP16 } + - { OP: gemm, M: 96, N: 96, K: 96, FMT: FP8 } + - { OP: gemm, M: 128, N: 128, K: 128, FMT: FP8 } + - { OP: gemm, M: 12, N: 16, K: 16, FMT: FP8 } + - { OP: gemm, M: 24, N: 16, K: 16, FMT: FP8 } + - { OP: gemm, M: 48, N: 32, K: 32, FMT: FP8 } + - { OP: gemm, M: 30, N: 32, K: 17, FMT: FP8 } + - { OP: gemm, M: 24, N: 32, K: 1, FMT: FP8 } + - { OP: gemm, M: 31, N: 32, K: 16, FMT: FP8 } + - { OP: gemm, M: 17, N: 32, K: 16, FMT: FP8 } + - { OP: gemm, M: 5, N: 32, K: 17, FMT: FP8 } + - { OP: gemm, M: 36, N: 31, K: 32, FMT: FP8 } + - { OP: gemm, M: 12, N: 31, K: 16, FMT: FP8 } + - { OP: gemm, M: 24, N: 17, K: 32, FMT: FP8 } + - { OP: gemm, M: 24, N: 20, K: 32, FMT: FP8 } complex-test: extends: .redmule-vsim-tpl - dependencies: - - redmule-build-complex + needs: [init, redmule-build-complex] variables: SETUP_CONFIG: "setup-complex" parallel: diff --git a/Bender.lock b/Bender.lock index 8c910c6..333d36e 100644 --- a/Bender.lock +++ b/Bender.lock @@ -7,8 +7,8 @@ packages: dependencies: - common_cells common_cells: - revision: 2bd027cb87eaa9bf7d17196ec5f69864b35b630f - version: 1.32.0 + revision: 0d67563b6b592549542544f1abc0f43e5d4ee8b4 + version: 1.35.0 source: Git: https://github.com/pulp-platform/common_cells.git dependencies: @@ -67,8 +67,8 @@ packages: dependencies: - tech_cells_generic hwpe-stream: - revision: 4c2ef8c33a6e2a8c88127e2153013d4f2dc3f448 - version: 1.7.0 + revision: 65c99a4a2f37a79acee800ab0151f67dfb1edef1 + version: 1.8.0 source: Git: https://github.com/pulp-platform/hwpe-stream.git dependencies: diff --git a/rtl/redmule_pkg.sv b/rtl/redmule_pkg.sv index d1b8ac7..a35e10a 100644 --- a/rtl/redmule_pkg.sv +++ b/rtl/redmule_pkg.sv @@ -30,7 +30,7 @@ package redmule_pkg; parameter fpnew_pkg::ifmt_logic_t IntFmtConfig = 4'b1000; parameter fpnew_pkg::operation_e CAST_OP = fpnew_pkg::F2F; parameter int unsigned MIN_FMT = fpnew_pkg::min_fp_width(FpFmtConfig); - parameter int unsigned DW_CUT = DATA_W - ARRAY_HEIGHT*(PIPE_REGS + 1)*MIN_FMT; + parameter int unsigned DW_CUT = DATAW - ARRAY_HEIGHT*(PIPE_REGS + 1)*MIN_FMT; // Register File mapping /********************** diff --git a/sw/redmule.c b/sw/redmule.c index 3e26e52..6d5d896 100644 --- a/sw/redmule.c +++ b/sw/redmule.c @@ -26,6 +26,7 @@ int main() { uint8_t *w = w_inp; uint8_t *y = y_inp; uint8_t *z = z_oup; // golden_out //1c010000 + uint32_t *gold = golden; uint8_t float_fmt = (SRC_FMT == FP8) ? (uint8_t)Float8 : (SRC_FMT == FP8ALT) ? (uint8_t)Float8Alt @@ -33,6 +34,8 @@ int main() { : (SRC_FMT == FP16ALT) ? (uint8_t)Float16Alt : (uint8_t)Float16; + int golden_size = (float_fmt == (Float8 | Float8Alt)) ? m_size*k_size/4 : m_size*k_size/2; + volatile int errors = 0; int gold_sum = 0, check_sum = 0; int i, j; @@ -65,7 +68,7 @@ int main() { if (float_fmt == Float16 || float_fmt == Float16Alt) errors = redmule16_compare_int(y, golden, m_size * k_size / 2); else if (float_fmt == Float8 || float_fmt == Float8Alt) - errors = redmule8_compare_int(y, golden, m_size * k_size / 4); + errors = redmule8_compare_int(y, gold, m_size, k_size); *(int *)0x80000000 = errors; diff --git a/sw/utils/redmule_utils.h b/sw/utils/redmule_utils.h index fc072f9..eefff79 100644 --- a/sw/utils/redmule_utils.h +++ b/sw/utils/redmule_utils.h @@ -66,13 +66,13 @@ int redmule16_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { errors += error; #ifdef DEBUG - tfp_printf("Golden: 0x%08x; Actual: 0x%08x,\n", golden_word, actual_word); + tfp_printf("Golden: 0x%08lx @ 0x%08ln; Actual: 0x%08lx @ 0x%08ln,\n", golden_word, (golden_z + i), actual_word, (actual_z + i)); #endif #ifdef VERBOSE if (error) { if (errors == 1) tfp_printf(" golden <- actual @ address @ index\n"); - tfp_printf("0x%08x <- 0x%08x @ 0x%08x @ 0x%08x\n", golden_word, actual_word, (actual_z + i), + tfp_printf("0x%08lx <- 0x%08lx @ 0x%08ln @ 0x%08x\n", golden_word, actual_word, (actual_z + i), i * 4); } #endif @@ -80,7 +80,7 @@ int redmule16_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { return errors; } -int redmule8_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { +int redmule8_compare_int(uint32_t *actual_z, uint32_t *golden_z, int m, int k) { uint32_t actual_word = 0; uint8_t actual_Byte0, actual_Byte1, actual_Byte2, actual_Byte3; uint32_t golden_word = 0; @@ -88,94 +88,99 @@ int redmule8_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { uint32_t actual = 0; uint32_t golden = 0; + #define BytePerWord 4 + #define FpFormat 8 + uint32_t jump = k*FpFormat/BytePerWord; int errors = 0; int error; - for (int i = 0; i < len; i++) { - error = 0; - actual_word = *(actual_z + i); - golden_word = *(golden_z + i); + for (int j = 0; j < m; j++) { + for (int i = 0; i < k/4; i++) { + error = 0; + actual_word = *(actual_z + i + j*jump); + golden_word = *(golden_z + i + j); - // int error = ((actual_word ^ golden_word) & ~IGNORE_BITS_COMPARE) ? 1 : 0; - uint8_t diff = 0; + // int error = ((actual_word ^ golden_word) & ~IGNORE_BITS_COMPARE) ? 1 : 0; + uint8_t diff = 0; - // Cheching Byte0 - actual_Byte0 = (uint8_t)(actual_word & 0x000000FF); - golden_Byte0 = (uint8_t)(golden_word & 0x000000FF); + // Cheching Byte0 + actual_Byte0 = (uint8_t)(actual_word & 0x000000FF); + golden_Byte0 = (uint8_t)(golden_word & 0x000000FF); - diff = (actual_Byte0 > golden_Byte0) ? (actual_Byte0 - golden_Byte0) - : (actual_Byte0 < golden_Byte0) ? (golden_Byte0 - actual_Byte0) - : 0; + diff = (actual_Byte0 > golden_Byte0) ? (actual_Byte0 - golden_Byte0) + : (actual_Byte0 < golden_Byte0) ? (golden_Byte0 - actual_Byte0) + : 0; - if (diff > ERR) { - error = 1; + if (diff > ERR) { + error = 1; #ifdef VERBOSE tfp_printf("diff: 0x%08x\n", diff); tfp_printf("Byte0: Error!\n"); #endif - } + } - // Cheching Byte1 - actual_Byte1 = (uint8_t)((actual_word >> 8) & 0x000000FF); - golden_Byte1 = (uint8_t)((golden_word >> 8) & 0x000000FF); + // Cheching Byte1 + actual_Byte1 = (uint8_t)((actual_word >> 8) & 0x000000FF); + golden_Byte1 = (uint8_t)((golden_word >> 8) & 0x000000FF); - diff = (actual_Byte1 > golden_Byte1) ? (actual_Byte1 - golden_Byte1) - : (actual_Byte1 < golden_Byte1) ? (golden_Byte1 - actual_Byte1) - : 0; + diff = (actual_Byte1 > golden_Byte1) ? (actual_Byte1 - golden_Byte1) + : (actual_Byte1 < golden_Byte1) ? (golden_Byte1 - actual_Byte1) + : 0; - if (diff > ERR) { - error = 1; + if (diff > ERR) { + error = 1; #ifdef VERBOSE tfp_printf("diff: 0x%08x\n", diff); tfp_printf("Byte1: Error!\n"); #endif - } - - // Cheching Byte2 - actual_Byte2 = (uint8_t)((actual_word >> 16) & 0x000000FF); - golden_Byte2 = (uint8_t)((golden_word >> 16) & 0x000000FF); - - diff = (actual_Byte2 > golden_Byte2) ? (actual_Byte2 - golden_Byte2) - : (actual_Byte2 < golden_Byte2) ? (golden_Byte2 - actual_Byte2) - : 0; - - if (diff > ERR) { - error = 1; + } + + // Cheching Byte2 + actual_Byte2 = (uint8_t)((actual_word >> 16) & 0x000000FF); + golden_Byte2 = (uint8_t)((golden_word >> 16) & 0x000000FF); + + diff = (actual_Byte2 > golden_Byte2) ? (actual_Byte2 - golden_Byte2) + : (actual_Byte2 < golden_Byte2) ? (golden_Byte2 - actual_Byte2) + : 0; + + if (diff > ERR) { + error = 1; #ifdef VERBOSE tfp_printf("diff: 0x%08x\n", diff); tfp_printf("Byte2: Error!\n"); #endif - } + } - // Cheching Byte3 - actual_Byte3 = (uint8_t)((actual_word >> 24) & 0x000000FF); - golden_Byte3 = (uint8_t)((golden_word >> 24) & 0x000000FF); + // Cheching Byte3 + actual_Byte3 = (uint8_t)((actual_word >> 24) & 0x000000FF); + golden_Byte3 = (uint8_t)((golden_word >> 24) & 0x000000FF); - diff = (actual_Byte3 > golden_Byte3) ? (actual_Byte3 - golden_Byte3) - : (actual_Byte3 < golden_Byte3) ? (golden_Byte3 - actual_Byte3) - : 0; + diff = (actual_Byte3 > golden_Byte3) ? (actual_Byte3 - golden_Byte3) + : (actual_Byte3 < golden_Byte3) ? (golden_Byte3 - actual_Byte3) + : 0; - if (diff > ERR) { - error = 1; + if (diff > ERR) { + error = 1; #ifdef VERBOSE tfp_printf("diff: 0x%08x\n", diff); tfp_printf("Byte3: Error!\n"); #endif - } - - errors += error; + } + + errors += error; #ifdef DEBUG - tfp_printf("Golden: 0x%08x; Actual: 0x%08x,\n", golden_word, actual_word); + tfp_printf("Golden: 0x%08x @ 0x%08x; Actual: 0x%08x @ 0x%08x,\n", golden_word, (golden_z + i), actual_word, (actual_z + i)); #endif #ifdef VERBOSE - if (error) { - if (errors == 1) tfp_printf(" golden <- actual @ address @ index\n"); - tfp_printf(" 0x%08x <- 0x%08x @ 0x%08x @ 0x%08x\n", golden_word, actual_word, (actual_z + i), - i * 4); - } + if (error) { + if (errors == 1) tfp_printf(" golden <- actual @ address @ index\n"); + tfp_printf(" 0x%x <- 0x%x @ 0x%x @ 0x%x\n", golden_word, actual_word, (actual_z + i), + i * 4); + } #endif + } } return errors; }