From ea06f15a30aa4305831b3fbef933043c3e8a1e34 Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Mon, 26 Feb 2024 16:49:57 +0100 Subject: [PATCH] Addi CI - step 3. --- .gitlab-ci.yml | 127 +++++++++++++++++++----------- Makefile | 137 ++++++++++++--------------------- README.md | 2 +- rtl/redmule_complex.sv | 23 +----- rtl/redmule_complex_wrap.sv | 56 ++++++++------ rtl/redmule_ctrl.sv | 3 +- rtl/redmule_inst_decoder.sv | 42 +++++----- rtl/redmule_pkg.sv | 29 ++++--- rtl/redmule_tiler.sv | 4 +- rtl/redmule_top.sv | 73 ++++++------------ rtl/redmule_wrap.sv | 135 ++++++++++++++------------------ scripts/non-regression_test.sh | 13 ++-- scripts/parse_s19.pl | 1 + sw/archi_redmule.h | 6 +- sw/redmule.c | 20 ++++- sw/utils/redmule_utils.h | 8 ++ tb/redmule_complex_tb.sv | 55 ++++++------- tb/redmule_tb.sv | 23 +++--- 18 files changed, 367 insertions(+), 390 deletions(-) diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 4d474d8..a8cd408 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -4,78 +4,117 @@ # # Yvan Tortorella +.base: + artifacts: + when: always + expire_in: 1 week + stages: - init - build + - test init: stage: init + extends: .base script: - cd golden-model; source setup-py.sh artifacts: when: always expire_in: 1 week - paths: [ golden-model/venv ] - - -.base: - artifacts: - when: always - expire_in: 1 week + paths: + - ./golden-model/venv .redmule-build-tpl: extends: .base - needs: [ init ] stage: build + dependencies: + - init script: - SETUP_CONFIG=${SETUP_CONFIG} - source scripts/${SETUP_CONFIG}.sh - make update-ips - make build-hw + artifacts: + when: always + expire_in: 1 week + paths: + - ./.bender + - ./scripts/compile.tcl + - ./vsim/* redmule-build-hwpe: extends: .redmule-build-tpl variables: SETUP_CONFIG: "setup-hwpe" - artifacts: - when: always - expire_in: 1 week - paths: [ golden-model/venv, .bender, scripts/compile.tcl, work ] redmule-build-complex: extends: .redmule-build-tpl variables: SETUP_CONFIG: "setup-complex" - artifacts: - when: always - expire_in: 1 week - paths: [ golden-model/venv, .bender, scripts/compile.tcl, work ] -# .redmule-vsim-tpl: -# extends: .base -# needs: [ build ] -# stage: test -# script: -# - SETUP-CONFIG=${SETUP-CONFIG} -# - source scripts/${SETUP-CONFIG}.sh -# - make goden M=${M} N=${N} K=${K} -# - make clean all -# - make run -# - '(grep -rn Success!" ./work/transcript)' -# - (! grep -rn "Fail!" ./work/transcript) -# -# hwpe-test: -# extends: .redmule-vsim-tpl -# variables: -# SETUP-CONFIG: "setup-hwpe" -# parallel: -# matrix: -# - { M: 32, N=32, K=32 } -# -# complex-test: -# extends: .redmule-vsim-tpl -# variables: -# SETUP-CONFIG: "setup-complex" -# parallel: -# matrix: -# - { M: 32, N=32, K=32 } +.redmule-vsim-tpl: + extends: .base + stage: test + script: + - SETUP_CONFIG=${SETUP_CONFIG} + - source scripts/${SETUP_CONFIG}.sh + - make golden OP=${OP} M=${M} N=${N} K=${K} fp_fmt=${FMT} + - make clean-sw build-sw + - make run + - '(grep -rn "Success!" ./vsim/transcript)' + - (! grep -rn "Fail!" ./vsim/transcript) + +hwpe-test: + extends: .redmule-vsim-tpl + dependencies: + - redmule-build-hwpe + variables: + SETUP_CONFIG: "setup-hwpe" + parallel: + matrix: + - { OP: gemm, M: 96, N: 96, K: 96, FMT: FP16 } + - { OP: gemm, M: 128, N: 128, K: 128, FMT: FP16 } + - { OP: gemm, M: 12, N: 16, K: 16, FMT: FP16 } + - { OP: gemm, M: 24, N: 16, K: 16, FMT: FP16 } + - { OP: gemm, M: 48, N: 32, K: 32, FMT: FP16 } + - { OP: gemm, M: 30, N: 32, K: 17, FMT: FP16 } + - { OP: gemm, M: 24, N: 32, K: 1, FMT: FP16 } + - { OP: gemm, M: 31, N: 32, K: 16, FMT: FP16 } + - { OP: gemm, M: 17, N: 32, K: 16, FMT: FP16 } + - { OP: gemm, M: 31, N: 32, K: 31, FMT: FP16 } + - { OP: gemm, M: 17, N: 32, K: 3, FMT: FP16 } + - { OP: gemm, M: 5, N: 32, K: 17, FMT: FP16 } + - { OP: gemm, M: 5, N: 32, K: 3, FMT: FP16 } + - { OP: gemm, M: 36, N: 31, K: 32, FMT: FP16 } + - { OP: gemm, M: 12, N: 31, K: 16, FMT: FP16 } + - { OP: gemm, M: 23, N: 31, K: 31, FMT: FP16 } + - { OP: gemm, M: 24, N: 17, K: 32, FMT: FP16 } + - { OP: gemm, M: 24, N: 20, K: 32, FMT: FP16 } + +complex-test: + extends: .redmule-vsim-tpl + dependencies: + - redmule-build-complex + variables: + SETUP_CONFIG: "setup-complex" + parallel: + matrix: + - { OP: gemm, M: 96, N: 96, K: 96, FMT: FP16 } + - { OP: gemm, M: 128, N: 128, K: 128, FMT: FP16 } + - { OP: gemm, M: 12, N: 16, K: 16, FMT: FP16 } + - { OP: gemm, M: 24, N: 16, K: 16, FMT: FP16 } + - { OP: gemm, M: 48, N: 32, K: 32, FMT: FP16 } + - { OP: gemm, M: 30, N: 32, K: 17, FMT: FP16 } + - { OP: gemm, M: 24, N: 32, K: 1, FMT: FP16 } + - { OP: gemm, M: 31, N: 32, K: 16, FMT: FP16 } + - { OP: gemm, M: 17, N: 32, K: 16, FMT: FP16 } + - { OP: gemm, M: 31, N: 32, K: 31, FMT: FP16 } + - { OP: gemm, M: 17, N: 32, K: 3, FMT: FP16 } + - { OP: gemm, M: 5, N: 32, K: 17, FMT: FP16 } + - { OP: gemm, M: 5, N: 32, K: 3, FMT: FP16 } + - { OP: gemm, M: 36, N: 31, K: 32, FMT: FP16 } + - { OP: gemm, M: 12, N: 31, K: 16, FMT: FP16 } + - { OP: gemm, M: 23, N: 31, K: 31, FMT: FP16 } + - { OP: gemm, M: 24, N: 17, K: 32, FMT: FP16 } + - { OP: gemm, M: 24, N: 20, K: 32, FMT: FP16 } diff --git a/Makefile b/Makefile index b7b9d21..8a6f808 100644 --- a/Makefile +++ b/Makefile @@ -8,9 +8,10 @@ # Paths to folders mkfile_path := $(dir $(abspath $(firstword $(MAKEFILE_LIST)))) -SW ?= $(mkfile_path)/sw -BUILD_DIR ?= $(mkfile_path)/work -QUESTA ?= questa-2020.1 +SW ?= $(mkfile_path)sw +BUILD_DIR ?= $(SW)/build +VSIM_DIR ?= $(mkfile_path)vsim +QUESTA ?= questa-2023.4 BENDER_DIR ?= . BENDER ?= bender ISA ?= riscv @@ -19,17 +20,17 @@ XLEN ?= 32 XTEN ?= imc ifeq ($(REDMULE_COMPLEX),1) - TEST_SRCS := sw/redmule_complex.c + TEST_SRCS := $(SW)/redmule_complex.c else - TEST_SRCS := sw/redmule.c + TEST_SRCS := $(SW)/redmule.c endif -compile_script ?= scripts/compile.tcl -compile_script_synth ?= scripts/synth_compile.tcl -compile_flag ?= -suppress 2583 -suppress 13314 +compile_script ?= $(mkfile_path)scripts/compile.tcl +compile_script_synth ?= $(mkfile_path)scripts/synth_compile.tcl +compile_flag ?= +acc -permissive -suppress 2583 -suppress 13314 -INI_PATH = $(mkfile_path)/modelsim.ini -WORK_PATH = $(BUILD_DIR) +INI_PATH = $(mkfile_path)modelsim.ini +WORK_PATH = $(VSIM_DIR)/work # Useful Parameters gui ?= 0 @@ -45,12 +46,12 @@ ifeq ($(debug),1) endif # Include directories -INC += -Isw -INC += -Isw/inc -INC += -Isw/utils +INC += -I$(SW) +INC += -I$(SW)/inc +INC += -I$(SW)/utils -BOOTSCRIPT := sw/kernel/crt0.S -LINKSCRIPT := sw/kernel/link.ld +BOOTSCRIPT := $(SW)/kernel/crt0.S +LINKSCRIPT := $(SW)/kernel/link.ld CC=$(ISA)$(XLEN)-unknown-elf-gcc LD=$(CC) @@ -60,21 +61,17 @@ LD_OPTS=-march=$(ARCH)$(XLEN)$(XTEN) -mabi=ilp32 -D__$(ISA)__ -MMD -MP -nostartf # Setup build object dirs CRT=$(BUILD_DIR)/crt0.o -OBJ=$(BUILD_DIR)/$(TEST_SRCS)/verif.o -BIN=$(BUILD_DIR)/$(TEST_SRCS)/verif -DUMP=$(BUILD_DIR)/$(TEST_SRCS)/verif.dump -STIM_INSTR=$(BUILD_DIR)/$(TEST_SRCS)/stim_instr.txt -STIM_DATA=$(BUILD_DIR)/$(TEST_SRCS)/stim_data.txt -VSIM_INI=$(BUILD_DIR)/$(TEST_SRCS)/modelsim.ini -VSIM_LIBS=$(BUILD_DIR)/$(TEST_SRCS)/work +OBJ=$(BUILD_DIR)/verif.o +BIN=$(BUILD_DIR)/verif +DUMP=$(BUILD_DIR)/verif.dump +STIM_INSTR=$(VSIM_DIR)/stim_instr.txt +STIM_DATA=$(VSIM_DIR)/stim_data.txt # Build implicit rules $(STIM_INSTR) $(STIM_DATA): $(BIN) objcopy --srec-len 1 --output-target=srec $(BIN) $(BIN).s19 scripts/parse_s19.pl $(BIN).s19 > $(BIN).txt python scripts/s19tomem.py $(BIN).txt $(STIM_INSTR) $(STIM_DATA) - ln -sfn $(INI_PATH) $(VSIM_INI) - ln -sfn $(WORK_PATH) $(VSIM_LIBS) $(BIN): $(CRT) $(OBJ) $(LD) $(LD_OPTS) -o $(BIN) $(CRT) $(OBJ) -T$(LINKSCRIPT) @@ -82,35 +79,32 @@ $(BIN): $(CRT) $(OBJ) $(CRT): $(BUILD_DIR) $(CC) $(CC_OPTS) -c $(BOOTSCRIPT) -o $(CRT) -$(OBJ): $(TEST_SRCS) $(BUILD_DIR)/$(TEST_SRCS) +$(OBJ): $(TEST_SRCS) $(CC) $(CC_OPTS) -c $(TEST_SRCS) $(FLAGS) $(INC) -o $(OBJ) -$(BUILD_DIR)/$(TEST_SRCS): - mkdir -p $(BUILD_DIR)/$(TEST_SRCS) - $(BUILD_DIR): mkdir -p $(BUILD_DIR) SHELL := /bin/bash # Generate instructions and data stimuli -all: $(STIM_INSTR) $(STIM_DATA) dis +build-sw: $(STIM_INSTR) $(STIM_DATA) dis # Run the simulation run: $(CRT) ifeq ($(gui), 0) - cd $(BUILD_DIR)/$(TEST_SRCS); \ - $(QUESTA) vsim -c vopt_tb -do "run -a" \ - -gSTIM_INSTR=stim_instr.txt \ - -gSTIM_DATA=stim_data.txt \ + cd $(VSIM_DIR); \ + $(QUESTA) vsim -c $(tb)_opt -do "run -a" \ + -gSTIM_INSTR=$(STIM_INSTR) \ + -gSTIM_DATA=$(STIM_DATA) \ -gPROB_STALL=$(P_STALL) else - cd $(BUILD_DIR)/$(TEST_SRCS); \ - $(QUESTA) vsim vopt_tb \ - -do "add log -r sim:/$(tb)/*" \ + cd $(VSIM_DIR); \ + $(QUESTA) vsim $(tb)_opt \ + -do "log -r /*" \ -do "source $(WAVES)" \ - -gSTIM_INSTR=stim_instr.txt \ - -gSTIM_DATA=stim_data.txt \ + -gSTIM_INSTR=$(STIM_INSTR) \ + -gSTIM_DATA=$(STIM_DATA) \ -gPROB_STALL=$(P_STALL) endif @@ -123,30 +117,26 @@ include bender_common.mk include bender_sim.mk include bender_synth.mk -bender_defs += -D COREV_ASSERT_OFF - -bender_targs += -t rtl -bender_targs += -t test -bender_targs += -t cv32e40p_exclude_tracer - ifeq ($(REDMULE_COMPLEX),1) tb := redmule_complex_tb - WAVES := $(mkfile_path)/wave_complex_xif.do - bender_targs += -t redmule_complex + WAVES := $(mkfile_path)wave_complex_xif.do else tb := redmule_tb - WAVES := $(mkfile_path)/wave.do - bender_targs += -t redmule_hwpe + WAVES := $(mkfile_path)wave.do endif -update-ips: +$(VSIM_DIR): + mkdir -p $(VSIM_DIR) + +update-ips: $(VSIM_DIR) $(BENDER) update $(BENDER) script vsim \ --vlog-arg="$(compile_flag)" \ --vcom-arg="-pedanticerrors" \ - $(bender_targs) $(bender_defs) \ - $(sim_targs) $(sim_deps) \ + $(common_targs) $(common_defs) \ + $(sim_targs) \ > ${compile_script} + echo 'vopt $(compile_flag) $(tb) -o $(tb)_opt' >> ${compile_script} synth-ips: $(BENDER) update @@ -155,18 +145,11 @@ synth-ips: $(synth_targs) $(synth_defs) \ > ${compile_script_synth} -build-hw: hw-all - -sdk: - cd $(SW); \ - git clone \ - git@github.com:pulp-platform/pulp-sdk.git - -clean-sdk: - rm -rf $(SW)/pulp-sdk +clean-sw: + rm -rf $(BUILD_DIR) -clean: - rm -rf $(BUILD_DIR)/$(TEST_SRCS) +clean-hw: + rm -rf $(VSIM_DIR) dis: $(OBJDUMP) -d $(BIN) > $(DUMP) @@ -184,30 +167,10 @@ golden-clean: $(MAKE) -C golden-model golden-clean # Hardware rules -hw-clean-all: - rm -rf $(BUILD_DIR) - rm -rf .bender +clean-all: clean-hw clean-sw + rm -rf $(mkfile_path).bender rm -rf $(compile_script) - rm -rf modelsim.ini - rm -rf *.log - rm -rf transcript - rm -rf .cached_ipdb.json - -hw-opt: - $(QUESTA) vopt +acc=npr -o vopt_tb $(tb) -floatparameters+$(tb) -work $(BUILD_DIR) - -hw-compile: - $(QUESTA) vsim -c +incdir+$(UVM_HOME) -do 'quit -code [source $(compile_script)]' - -hw-lib: - @touch modelsim.ini - @mkdir -p $(BUILD_DIR) - @$(QUESTA) vlib $(BUILD_DIR) - @$(QUESTA) vmap work $(BUILD_DIR) - @chmod +w modelsim.ini - -hw-clean: - rm -rf transcript - rm -rf modelsim.ini -hw-all: hw-clean hw-lib hw-compile hw-opt +build-hw: $(VSIM_DIR) + cd $(VSIM_DIR); \ + $(QUESTA) vsim -c -do 'quit -code [source $(compile_script)]' diff --git a/README.md b/README.md index 9a8a6bd..9bc1bf8 100644 --- a/README.md +++ b/README.md @@ -247,7 +247,7 @@ make build-hw To run the available tests, just do: ```bash -make all +make build-sw make run (gui=1 to open the Questasim Graphic User Interface) ``` It is possible to run the test introducing a parametric probability of stall by explicitly passing the `P_STALL` parameter while running the test (`P_STALL=0.1` means a stall probability of the 10%). diff --git a/rtl/redmule_complex.sv b/rtl/redmule_complex.sv index 617d009..b36bd4e 100644 --- a/rtl/redmule_complex.sv +++ b/rtl/redmule_complex.sv @@ -5,8 +5,6 @@ // Yvan Tortorella // -`include "hwpe-ctrl/typedef.svh" - module redmule_complex import cv32e40x_pkg::*; import fpnew_pkg::*; @@ -31,8 +29,6 @@ module redmule_complex parameter type core_data_rsp_t = logic, parameter type core_inst_req_t = logic, parameter type core_inst_rsp_t = logic, - parameter type redmule_data_req_t = logic, - parameter type redmule_data_rsp_t = logic, // Data format (default is FP16) localparam fp_format_e FpFormat = FPFORMAT, // Number of PEs within a row @@ -58,8 +54,7 @@ module redmule_complex output core_inst_req_t core_inst_req_o , input core_data_rsp_t core_data_rsp_i , output core_data_req_t core_data_req_o , - input redmule_data_rsp_t redmule_data_rsp_i, - output redmule_data_req_t redmule_data_req_o + hci_core_intf.master tcdm ); localparam int unsigned SysDataWidth = (CoreType == CVA6) ? 64 : 32; @@ -69,11 +64,6 @@ logic busy; logic s_clk, s_clk_en; logic [N_CORES-1:0][1:0] evt; -// verilog_lint: waive-start line-length -`HWPE_CTRL_TYPEDEF_REQ_T(redmule_ctrl_req_t, logic [31:0], logic [31:0], logic [3:0], logic [ID_WIDTH-1:0]) -`HWPE_CTRL_TYPEDEF_RSP_T(redmule_ctrl_rsp_t, logic [31:0], logic [ID_WIDTH-1:0]) -// verilog_lint: waive-stop line-length - core_inst_req_t core_inst_req; core_inst_rsp_t core_inst_rsp; @@ -276,21 +266,14 @@ redmule_top #( .DW ( DW ), .X_EXT ( XExt ), .SysInstWidth ( SysInstWidth ), - .SysDataWidth ( SysDataWidth ), - .redmule_data_req_t ( redmule_data_req_t ), - .redmule_data_rsp_t ( redmule_data_rsp_t ), - .redmule_ctrl_req_t ( redmule_ctrl_req_t ), - .redmule_ctrl_rsp_t ( redmule_ctrl_rsp_t ) + .SysDataWidth ( SysDataWidth ) ) i_redmule_top ( .clk_i ( s_clk ), .rst_ni ( rst_ni ), .test_mode_i ( test_mode_i ), .evt_o ( evt ), .busy_o ( busy ), - .data_rsp_i ( redmule_data_rsp_i ), - .data_req_o ( redmule_data_req_o ), - .ctrl_req_i ( '0 ), - .ctrl_rsp_o ( ), + .tcdm ( tcdm ), .xif_issue_if_i ( core_xif.coproc_issue ), .xif_result_if_o ( core_xif.coproc_result ), .xif_compressed_if_i( core_xif.coproc_compressed ), diff --git a/rtl/redmule_complex_wrap.sv b/rtl/redmule_complex_wrap.sv index 61b42c7..75c6179 100644 --- a/rtl/redmule_complex_wrap.sv +++ b/rtl/redmule_complex_wrap.sv @@ -42,19 +42,23 @@ core_default_inst_rsp_t core_inst_rsp; core_default_inst_req_t core_inst_req; core_default_data_rsp_t core_data_rsp; core_default_data_req_t core_data_req; -redmule_default_data_rsp_t redmule_data_rsp; -redmule_default_data_req_t redmule_data_req; + +hci_core_intf #(.DW(DW)) tcdm (.clk(clk_i)); always_ff @(posedge clk_i, negedge rst_ni) begin if (~rst_ni) begin // Inputs - test_mode <= '0; - fetch_enable <= '0; - boot_addr <= '0; - irq <= '0; - core_inst_rsp <= '0; - core_data_rsp <= '0; - redmule_data_rsp <= '0; + test_mode <= '0; + fetch_enable <= '0; + boot_addr <= '0; + irq <= '0; + core_inst_rsp <= '0; + core_data_rsp <= '0; + tcdm.gnt <= '0; + tcdm.r_valid <= '0; + tcdm.r_data <= '0; + tcdm.r_opc <= '0; + tcdm.r_user <= '0; // Outputs irq_id_o <= '0; irq_ack_o <= '0; @@ -64,20 +68,31 @@ always_ff @(posedge clk_i, negedge rst_ni) begin redmule_data_req_o <= '0; end else begin // Inputs - test_mode <= test_mode_i ; - fetch_enable <= fetch_enable_i ; - boot_addr <= boot_addr_i ; - irq <= irq_i ; - core_inst_rsp <= core_inst_rsp_i ; - core_data_rsp <= core_data_rsp_i ; - redmule_data_rsp <= redmule_data_rsp_i; + test_mode <= test_mode_i ; + fetch_enable <= fetch_enable_i ; + boot_addr <= boot_addr_i ; + irq <= irq_i ; + core_inst_rsp <= core_inst_rsp_i ; + core_data_rsp <= core_data_rsp_i ; + tcdm.gnt <= redmule_data_rsp_i.gnt ; + tcdm.r_valid <= redmule_data_rsp_i.r_valid; + tcdm.r_data <= redmule_data_rsp_i.r_data ; + tcdm.r_opc <= redmule_data_rsp_i.r_opc ; + tcdm.r_user <= redmule_data_rsp_i.r_user ; // Outputs irq_id_o <= irq_id ; irq_ack_o <= irq_ack ; core_sleep_o <= core_sleep ; core_inst_req_o <= core_inst_req ; core_data_req_o <= core_data_req ; - redmule_data_req_o <= redmule_data_req; + redmule_data_req_o.req <= tcdm.req ; + redmule_data_req_o.wen <= tcdm.wen ; + redmule_data_req_o.be <= tcdm.be ; + redmule_data_req_o.boffs <= tcdm.boffs; + redmule_data_req_o.add <= tcdm.add ; + redmule_data_req_o.data <= tcdm.data ; + redmule_data_req_o.lrdy <= tcdm.lrdy ; + redmule_data_req_o.user <= tcdm.user ; end end @@ -92,9 +107,7 @@ redmule_complex #( .core_data_req_t ( core_default_data_req_t ), .core_data_rsp_t ( core_default_data_rsp_t ), .core_inst_req_t ( core_default_inst_req_t ), - .core_inst_rsp_t ( core_default_inst_rsp_t ), - .redmule_data_req_t ( redmule_default_data_req_t ), - .redmule_data_rsp_t ( redmule_default_data_rsp_t ) + .core_inst_rsp_t ( core_default_inst_rsp_t ) ) i_redmule_complex ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), @@ -109,8 +122,7 @@ redmule_complex #( .core_inst_req_o ( core_inst_req ), .core_data_rsp_i ( core_data_rsp ), .core_data_req_o ( core_data_req ), - .redmule_data_rsp_i ( redmule_data_rsp ), - .redmule_data_req_o ( redmule_data_req ) + .tcdm ( tcdm ) ); endmodule : redmule_complex_wrap diff --git a/rtl/redmule_ctrl.sv b/rtl/redmule_ctrl.sv index d202cc9..29d53dd 100644 --- a/rtl/redmule_ctrl.sv +++ b/rtl/redmule_ctrl.sv @@ -84,8 +84,7 @@ module redmule_ctrl .N_CONTEXT ( N_CONTEXT ), .N_IO_REGS ( REDMULE_REGS ), .N_GENERIC_REGS ( 6 ), - .ID_WIDTH ( ID_WIDTH ), - .DATA_WIDTH ( SysDataWidth ) + .ID_WIDTH ( ID_WIDTH ) ) i_slave ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), diff --git a/rtl/redmule_inst_decoder.sv b/rtl/redmule_inst_decoder.sv index d65e90e..60ebc9a 100644 --- a/rtl/redmule_inst_decoder.sv +++ b/rtl/redmule_inst_decoder.sv @@ -16,8 +16,6 @@ module redmule_inst_decoder parameter int unsigned FormatWidth = 3 , parameter int unsigned OpCodeWidth = 7 , parameter int unsigned NumCfgRegs = 6 , - parameter type redmule_ctrl_req_t = logic , - parameter type redmule_ctrl_rsp_t = logic , localparam int unsigned SizeLarge = SysDataWidth/2, localparam int unsigned SizeSmall = SysDataWidth/4 )( @@ -28,8 +26,7 @@ module redmule_inst_decoder cv32e40x_if_xif.coproc_result xif_result_if_o, cv32e40x_if_xif.coproc_compressed xif_compressed_if_i, cv32e40x_if_xif.coproc_mem xif_mem_if_o, - output redmule_ctrl_req_t cfg_req_o , - input redmule_ctrl_rsp_t cfg_rsp_i , + hwpe_ctrl_intf_periph.master periph , input logic cfg_complete_i , output logic start_cfg_o ); @@ -171,7 +168,12 @@ always_comb begin : cfg_fsm count_rst = '0; count_update = '0; next = current; - cfg_req_o = '0; + periph.req = '0; + periph.wen = '0; + periph.be = '0; + periph.add = '0; + periph.id = '0; + periph.data = '0; start_cfg_o = 1'b0; case (current) @@ -181,13 +183,13 @@ always_comb begin : cfg_fsm end WriteCfg: begin - cfg_req_o.req = 1'b1; - cfg_req_o.wen = 1'b0; - cfg_req_o.be = '1; - cfg_req_o.add = 'h40 + 4*reg_offs; - cfg_req_o.id = '0; - cfg_req_o.data = cfg_reg_q[reg_offs]; - if (cfg_rsp_i.gnt) begin + periph.req = 1'b1; + periph.wen = 1'b0; + periph.be = '1; + periph.add = 'h40 + 4*reg_offs; + periph.id = '0; + periph.data = cfg_reg_q[reg_offs]; + if (periph.gnt) begin count_update = 1'b1; if (reg_offs == NumCfgRegs - 1) begin next = Trigger; @@ -199,14 +201,14 @@ always_comb begin : cfg_fsm Trigger: begin if (cfg_complete_i) begin - cfg_req_o.req = 1'b1; - cfg_req_o.wen = 1'b0; - cfg_req_o.be = '1; - cfg_req_o.add = '0; - cfg_req_o.id = '0; - cfg_req_o.data = '0; - - if (cfg_rsp_i.gnt) + periph.req = 1'b1; + periph.wen = 1'b0; + periph.be = '1; + periph.add = '0; + periph.id = '0; + periph.data = '0; + + if (periph.gnt) next = Idle; end end diff --git a/rtl/redmule_pkg.sv b/rtl/redmule_pkg.sv index 91b36a6..d1b8ac7 100644 --- a/rtl/redmule_pkg.sv +++ b/rtl/redmule_pkg.sv @@ -5,17 +5,13 @@ // Yvan Tortorella // -`include "hci/typedef.svh" -`include "hci/assign.svh" -`include "hwpe-ctrl/typedef.svh" - import fpnew_pkg::*; import hci_package::*; import hwpe_stream_package::*; package redmule_pkg; - parameter int unsigned DATA_W = 544; // TCDM port dimension (in bits) + parameter int unsigned DATA_W = 288; // TCDM port dimension (in bits) parameter int unsigned MemDw = 32; parameter int unsigned NumByte = MemDw/8; parameter int unsigned ADDR_W = hci_package::DEFAULT_AW; @@ -24,7 +20,7 @@ package redmule_pkg; parameter int unsigned N_CONTEXT = 2; parameter fpnew_pkg::fp_format_e FPFORMAT = fpnew_pkg::FP16; parameter int unsigned BITW = fpnew_pkg::fp_width(FPFORMAT); - parameter int unsigned ARRAY_HEIGHT = 8; + parameter int unsigned ARRAY_HEIGHT = 4; parameter int unsigned PIPE_REGS = 3; parameter int unsigned ARRAY_WIDTH = ARRAY_HEIGHT*PIPE_REGS; // Superior limit, smaller values are allowed. parameter int unsigned TOT_DEPTH = DATAW/BITW; @@ -302,10 +298,23 @@ package redmule_pkg; logic [31:0] data; } core_default_data_rsp_t; - `HCI_TYPEDEF_REQ_T(redmule_default_data_req_t, logic [31:0], logic [DATA_W-1:0], logic [DATA_W/8-1:0], logic signed [DATA_W/32-1:0][31:0], logic) - `HCI_TYPEDEF_RSP_T(redmule_default_data_rsp_t, logic [DATA_W-1:0], logic) + typedef struct packed { + logic req; + logic wen; + logic [DATA_W/8-1:0] be; + logic signed [DATA_W/32-1:0][31:0]boffs; + logic [31:0] add; + logic [DATA_W-1:0] data; + logic lrdy; + logic user; + } redmule_default_data_req_t; - `HWPE_CTRL_TYPEDEF_REQ_T(redmule_default_ctrl_req_t, logic [31:0], logic [31:0], logic [3:0], logic [ID-1:0]) - `HWPE_CTRL_TYPEDEF_RSP_T(redmule_default_ctrl_rsp_t, logic [31:0], logic [ID-1:0]) + typedef struct packed { + logic gnt; + logic r_valid; + logic [DATA_W-1:0] r_data; + logic r_opc; + logic r_user; + } redmule_default_data_rsp_t; endpackage diff --git a/rtl/redmule_tiler.sv b/rtl/redmule_tiler.sv index feee6e0..be5e298 100644 --- a/rtl/redmule_tiler.sv +++ b/rtl/redmule_tiler.sv @@ -187,11 +187,11 @@ assign config_d.stage_2_op = FPU_MINMAX; assign config_d.input_format = config_d.gemm_input_fmt == Float16 ? FPU_FP16 : config_d.gemm_input_fmt == Float8 ? FPU_FP8 : config_d.gemm_input_fmt == Float16Alt ? FPU_FP16ALT : - FPU_FP8ALT; + FPU_FP8ALT; assign config_d.computing_format = config_d.gemm_output_fmt == Float16 ? FPU_FP16 : config_d.gemm_output_fmt == Float8 ? FPU_FP8 : config_d.gemm_output_fmt == Float16Alt ? FPU_FP16ALT : - FPU_FP8ALT; + FPU_FP8ALT; assign config_d.gemm_selection = config_d.gemm_ops == MATMUL ? 1'b0 : 1'b1; assign config_d.x_d1_stride = ((NumByte*BITW)/ADDR_W)*(((DATAW/BITW)*x_cols_iter_nolftovr) + config_d.x_cols_lftovr); diff --git a/rtl/redmule_top.sv b/rtl/redmule_top.sv index 5f7797c..19a3627 100644 --- a/rtl/redmule_top.sv +++ b/rtl/redmule_top.sv @@ -5,11 +5,6 @@ // Yvan Tortorella // -`include "hci/typedef.svh" -`include "hci/assign.svh" -`include "hwpe-ctrl/typedef.svh" -`include "hwpe-ctrl/assign.svh" - module redmule_top import fpnew_pkg::*; import redmule_pkg::*; @@ -17,24 +12,20 @@ module redmule_top import hwpe_ctrl_package::*; import hwpe_stream_package::*; #( -parameter int unsigned ID_WIDTH = 8 , -parameter int unsigned N_CORES = 8 , -parameter int unsigned DW = DATA_W , // TCDM port dimension (in bits) -parameter int unsigned UW = 1 , -parameter int unsigned X_EXT = 0 , -parameter int unsigned SysInstWidth = 32 , -parameter int unsigned SysDataWidth = 32 , -parameter type redmule_data_req_t = logic , -parameter type redmule_data_rsp_t = logic , -parameter type redmule_ctrl_req_t = logic , -parameter type redmule_ctrl_rsp_t = logic , -localparam int unsigned NumContext = N_CONTEXT , // Number of sequential jobs for the slave device -localparam fp_format_e FpFormat = FPFORMAT , // Data format (default is FP16) -localparam int unsigned Height = ARRAY_HEIGHT , // Number of PEs within a row -localparam int unsigned Width = ARRAY_WIDTH , // Number of parallel rows -localparam int unsigned NumPipeRegs = PIPE_REGS , // Number of pipeline registers within each PE -localparam pipe_config_t PipeConfig = DISTRIBUTED , -localparam int unsigned BITW = fp_width(FpFormat) // Number of bits for the given format + parameter int unsigned ID_WIDTH = 8 , + parameter int unsigned N_CORES = 8 , + parameter int unsigned DW = DATA_W , // TCDM port dimension (in bits) + parameter int unsigned UW = 1 , + parameter int unsigned X_EXT = 0 , + parameter int unsigned SysInstWidth = 32 , + parameter int unsigned SysDataWidth = 32 , + localparam int unsigned NumContext = N_CONTEXT , // Number of sequential jobs for the slave device + localparam fp_format_e FpFormat = FPFORMAT , // Data format (default is FP16) + localparam int unsigned Height = ARRAY_HEIGHT , // Number of PEs within a row + localparam int unsigned Width = ARRAY_WIDTH , // Number of parallel rows + localparam int unsigned NumPipeRegs = PIPE_REGS , // Number of pipeline registers within each PE + localparam pipe_config_t PipeConfig = DISTRIBUTED , + localparam int unsigned BITW = fp_width(FpFormat) // Number of bits for the given format )( input logic clk_i , input logic rst_ni , @@ -46,13 +37,12 @@ localparam int unsigned BITW = fp_width(FpFormat) // Number of b cv32e40x_if_xif.coproc_result xif_result_if_o, cv32e40x_if_xif.coproc_compressed xif_compressed_if_i, cv32e40x_if_xif.coproc_mem xif_mem_if_o, -`endif - // TCDM interface towards the memory - output redmule_data_req_t data_req_o , - input redmule_data_rsp_t data_rsp_i , +`elsif TARGET_REDMULE_HWPE // Periph slave port for the controller side - input redmule_ctrl_req_t ctrl_req_i, - output redmule_ctrl_rsp_t ctrl_rsp_o + hwpe_ctrl_intf_periph.slave periph, +`endif + // TCDM master ports for the memory side + hci_core_intf.master tcdm ); localparam int unsigned DATAW_ALIGN = DATAW; @@ -73,31 +63,18 @@ logic [$clog2(TOT_DEPTH):0] w_cols_lftovr, logic [$clog2(Height):0] w_rows_lftovr; logic [$clog2(Width):0] y_rows_lftovr; -hci_core_intf #( .DW ( DW ), - .UW ( UW ) ) tcdm ( .clk ( clk_i ) ); - -hwpe_ctrl_intf_periph #( .AddrWidth ( 32 ), - .DataWidth ( 32 ), - .ID_WIDTH (ID_WIDTH) ) periph ( .clk(clk_i) ); - -`HCI_ASSIGN_FROM_INTF(tcdm, data_req_o, data_rsp_i) - `ifdef TARGET_REDMULE_HWPE /* If there is no Xif we directly plug the control port into the hwpe-slave device */ - `HWPE_CTRL_ASSIGN_TO_INTF(periph, ctrl_req_i, ctrl_rsp_o) assign start_cfg = ((periph.req) && (periph.add[7:0] == 'h54) && (!periph.wen) && (periph.gnt)) ? 1'b1 : 1'b0; -`else +`elsif TARGET_REDMULE_COMPLEX + hwpe_ctrl_intf_periph #( .ID_WIDTH (ID_WIDTH) ) periph ( .clk(clk_i) ); /* If there is the Xif, we pass through the instruction decoder and then enter into the hwpe slave device */ - - redmule_ctrl_req_t instr_req; - redmule_ctrl_rsp_t instr_rsp; - `HWPE_CTRL_ASSIGN_TO_INTF(periph, instr_req, instr_rsp) logic [SysDataWidth-1:0] cfg_reg; logic [SysDataWidth-1:0] sizem, sizen, sizek; logic [SysDataWidth-1:0] x_addr, w_addr, y_addr, z_addr; @@ -105,9 +82,7 @@ hwpe_ctrl_intf_periph #( .AddrWidth ( 32 ), redmule_inst_decoder #( .SysInstWidth ( SysInstWidth ), .SysDataWidth ( SysDataWidth ), - .NumRfReadPrts ( 3 ), // FIXME: parametric - .redmule_ctrl_req_t ( redmule_ctrl_req_t ), - .redmule_ctrl_rsp_t ( redmule_ctrl_rsp_t ) + .NumRfReadPrts ( 3 ) // FIXME: parametric ) i_inst_decoder ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), @@ -116,13 +91,11 @@ hwpe_ctrl_intf_periph #( .AddrWidth ( 32 ), .xif_result_if_o ( xif_result_if_o ), .xif_compressed_if_i ( xif_compressed_if_i ), .xif_mem_if_o ( xif_mem_if_o ), - .cfg_req_o ( instr_req ), - .cfg_rsp_i ( instr_rsp ), + .periph ( periph ), .cfg_complete_i ( cfg_complete ), .start_cfg_o ( start_cfg ) ); - assign ctrl_rsp_o = '0; `endif // Streamer control signals and flags diff --git a/rtl/redmule_wrap.sv b/rtl/redmule_wrap.sv index 7b4d2d2..c4d5eac 100644 --- a/rtl/redmule_wrap.sv +++ b/rtl/redmule_wrap.sv @@ -5,10 +5,6 @@ // Yvan Tortorella // -`include "hci/typedef.svh" -`include "hci/assign.svh" -`include "hwpe-ctrl/typedef.svh" - module redmule_wrap import fpnew_pkg::*; import hci_package::*; @@ -58,15 +54,9 @@ module redmule_wrap output logic [ID_WIDTH-1:0] periph_r_id_o ); -`HCI_TYPEDEF_REQ_T(redmule_data_req_t, logic [31:0], logic [DW-1:0], logic [DW/8-1:0], logic signed [DW/32-1:0][31:0], logic) -`HCI_TYPEDEF_RSP_T(redmule_data_rsp_t, logic [DW-1:0], logic) -`HWPE_CTRL_TYPEDEF_REQ_T(redmule_ctrl_req_t, logic [31:0], logic [31:0], logic [3:0], logic [ID-1:0]) -`HWPE_CTRL_TYPEDEF_RSP_T(redmule_ctrl_rsp_t, logic [31:0], logic [ID-1:0]) +hci_core_intf #(.DW(DW)) tcdm (.clk(clk_i)); +hwpe_ctrl_intf_periph #(.ID_WIDTH(ID_WIDTH)) periph (.clk(clk_i)); -redmule_data_req_t data_req; -redmule_data_rsp_t data_rsp; -redmule_ctrl_req_t ctrl_req; -redmule_ctrl_rsp_t ctrl_rsp; logic busy; logic [N_CORES-1:0][1:0] evt; @@ -81,18 +71,18 @@ logic [N_CORES-1:0][1:0] evt; tcdm_be_o [ii] <= '0; tcdm_data_o [ii] <= '0; end - data_rsp.gnt <= '0; - data_rsp.r_valid <= '0; - data_rsp.r_data <= '0; - data_rsp.r_opc <= '0; - data_rsp.r_user <= '0; + tcdm.gnt <= '0; + tcdm.r_valid <= '0; + tcdm.r_data <= '0; + tcdm.r_opc <= '0; + tcdm.r_user <= '0; // Control port - ctrl_req.req <= '0; - ctrl_req.add <= '0; - ctrl_req.wen <= '0; - ctrl_req.be <= '0; - ctrl_req.data <= '0; - ctrl_req.id <= '0; + periph.req <= '0; + periph.add <= '0; + periph.wen <= '0; + periph.be <= '0; + periph.data <= '0; + periph.id <= '0; periph_gnt_o <= '0; periph_r_data_o <= '0; periph_r_valid_o <= '0; @@ -103,78 +93,71 @@ logic [N_CORES-1:0][1:0] evt; end else begin // TCDM port for (int ii = 0; ii < MP; ii++) begin - tcdm_req_o [ii] <= data_req.req; - tcdm_add_o [ii] <= data_req.add + ii*4; - tcdm_wen_o [ii] <= data_req.wen; - tcdm_be_o [ii] <= data_req.be[ii*4+:4]; - tcdm_data_o [ii] <= data_req.data[ii*32+:32]; + tcdm_req_o [ii] <= tcdm.req; + tcdm_add_o [ii] <= tcdm.add + ii*4; + tcdm_wen_o [ii] <= tcdm.wen; + tcdm_be_o [ii] <= tcdm.be[ii*4+:4]; + tcdm_data_o [ii] <= tcdm.data[ii*32+:32]; end - data_rsp.gnt <= &(tcdm_gnt_i); - data_rsp.r_valid <= &(tcdm_r_valid_i); - data_rsp.r_data <= { >> {tcdm_r_data_i} }; - data_rsp.r_opc <= tcdm_r_opc_i; - data_rsp.r_user <= tcdm_r_user_i; + tcdm.gnt <= &(tcdm_gnt_i); + tcdm.r_valid <= &(tcdm_r_valid_i); + tcdm.r_data <= { >> {tcdm_r_data_i} }; + tcdm.r_opc <= tcdm_r_opc_i; + tcdm.r_user <= tcdm_r_user_i; // Control port - ctrl_req.req <= periph_req_i; - ctrl_req.add <= periph_add_i; - ctrl_req.wen <= periph_wen_i; - ctrl_req.be <= periph_be_i; - ctrl_req.data <= periph_data_i; - ctrl_req.id <= periph_id_i; - periph_gnt_o <= ctrl_rsp.gnt; - periph_r_data_o <= ctrl_rsp.r_data; - periph_r_valid_o <= ctrl_rsp.r_valid; - periph_r_id_o <= ctrl_rsp.r_id; + periph.req <= periph_req_i; + periph.add <= periph_add_i; + periph.wen <= periph_wen_i; + periph.be <= periph_be_i; + periph.data <= periph_data_i; + periph.id <= periph_id_i; + periph_gnt_o <= periph.gnt; + periph_r_data_o <= periph.r_data; + periph_r_valid_o <= periph.r_valid; + periph_r_id_o <= periph.r_id; // Other busy_o <= busy; evt_o <= evt; end end `else - generate - for(genvar ii=0; ii> {tcdm_r_data_i} }; - assign data_rsp.r_opc = tcdm_r_opc_i; - assign data_rsp.r_user = tcdm_r_user_i; - endgenerate + for(genvar ii=0; ii> {tcdm_r_data_i} }; + assign tcdm.r_opc = tcdm_r_opc_i; + assign tcdm.r_user = tcdm_r_user_i; - assign ctrl_req.req = periph_req_i; - assign ctrl_req.add = periph_add_i; - assign ctrl_req.wen = periph_wen_i; - assign ctrl_req.be = periph_be_i; - assign ctrl_req.data = periph_data_i; - assign ctrl_req.id = periph_id_i; - assign periph_gnt_o = ctrl_rsp.gnt; - assign periph_r_data_o = ctrl_rsp.r_data; - assign periph_r_valid_o = ctrl_rsp.r_valid; - assign periph_r_id_o = ctrl_rsp.r_id; + assign periph.req = periph_req_i; + assign periph.add = periph_add_i; + assign periph.wen = periph_wen_i; + assign periph.be = periph_be_i; + assign periph.data = periph_data_i; + assign periph.id = periph_id_i; + assign periph_gnt_o = periph.gnt; + assign periph_r_data_o = periph.r_data; + assign periph_r_valid_o = periph.r_valid; + assign periph_r_id_o = periph.r_id; `endif + redmule_top #( .ID_WIDTH ( ID_WIDTH ), .N_CORES ( N_CORES ), - .DW ( DW ), - .redmule_data_req_t ( redmule_data_req_t ), - .redmule_data_rsp_t ( redmule_data_rsp_t ), - .redmule_ctrl_req_t ( redmule_ctrl_req_t ), - .redmule_ctrl_rsp_t ( redmule_ctrl_rsp_t ) + .DW ( DW ) ) i_redmule_top ( .clk_i ( clk_i ), .rst_ni ( rst_ni ), .test_mode_i ( test_mode_i ), .evt_o ( evt_o ), .busy_o ( busy_o ), - .data_req_o ( data_req ), - .data_rsp_i ( data_rsp ), - .ctrl_req_i ( ctrl_req ), - .ctrl_rsp_o ( ctrl_rsp ) + .tcdm ( tcdm ), + .periph ( periph ) ); endmodule: redmule_wrap diff --git a/scripts/non-regression_test.sh b/scripts/non-regression_test.sh index 37b00e8..9dc1975 100755 --- a/scripts/non-regression_test.sh +++ b/scripts/non-regression_test.sh @@ -49,15 +49,16 @@ do i=$(( $i + 3 )) - make golden M=$M N=$N K=$K > /dev/null - make all 1>/dev/null 2>&1 + make clean-hw build-hw 1>/dev/null 2>&1 + make golden M=$M N=$N K=$K 1>/dev/null 2>&1 + make clean-sw build-sw 1>/dev/null 2>&1 timeout $BASE_TIMEOUT make run 1>/dev/null 2>&1 - - if [[ $? -eq 124 ]] + grep -rn "Success!" $PWD/vsim/transcript 1>/dev/null 2>&1 + if [[ $? -eq 0 ]] then - echo -e "${Red}ERROR ${EndColor}: M=$M N=$N K=$K" + echo -e "${Green}OK ${EndColor}: M=$M N=$N K=$K" else - echo -e "${Green}OK ${EndColor}: M=$M N=$N K=$K" + echo -e "${Red}ERROR ${EndColor}: M=$M N=$N K=$K" fi done diff --git a/scripts/parse_s19.pl b/scripts/parse_s19.pl index aa99782..18ce531 100755 --- a/scripts/parse_s19.pl +++ b/scripts/parse_s19.pl @@ -1,3 +1,4 @@ +#!/usr/bin/perl -w # Copyright 2023 ETH Zurich and University of Bologna. # Licensed under the Apache License, Version 2.0, see LICENSE for details. # SPDX-License-Identifier: Apache-2.0 diff --git a/sw/archi_redmule.h b/sw/archi_redmule.h index b8fdbd6..6708c0f 100644 --- a/sw/archi_redmule.h +++ b/sw/archi_redmule.h @@ -52,11 +52,11 @@ // RedMulE architecture #define ADDR_WIDTH 32 -#define DATA_WIDTH 512 +#define DATA_WIDTH 256 #define REDMULE_FMT 16 -#define ARRAY_HEIGHT 8 +#define ARRAY_HEIGHT 4 #define PIPE_REGS 3 -#define ARRAY_WIDTH 24 /* Superior limit is ARRAY_HEIGHT*PIPE_REGS */ +#define ARRAY_WIDTH 12 /* Superior limit is ARRAY_HEIGHT*PIPE_REGS */ // Base address #define REDMULE_BASE_ADD 0x00100000 diff --git a/sw/redmule.c b/sw/redmule.c index 68e6b26..3e26e52 100644 --- a/sw/redmule.c +++ b/sw/redmule.c @@ -27,6 +27,12 @@ int main() { uint8_t *y = y_inp; uint8_t *z = z_oup; // golden_out //1c010000 + uint8_t float_fmt = (SRC_FMT == FP8) ? (uint8_t)Float8 + : (SRC_FMT == FP8ALT) ? (uint8_t)Float8Alt + : (SRC_FMT == FP16) ? (uint8_t)Float16 + : (SRC_FMT == FP16ALT) ? (uint8_t)Float16Alt + : (uint8_t)Float16; + volatile int errors = 0; int gold_sum = 0, check_sum = 0; int i, j; @@ -42,18 +48,24 @@ int main() { ; redmule_cfg((unsigned int)x, (unsigned int)w, (unsigned int)y, m_size, n_size, k_size, - (uint8_t)GEMM, (uint8_t)Float16); + (uint8_t)gemm_ops, float_fmt); - // Start RedMulE operation + // Start RedMulE operation and sleeping until the end of computation + printf("Triggering accelerator and going to sleep...\n"); hwpe_trigger_job(); - // Wait for end of computation asm volatile("wfi" ::: "memory"); + // At the end of accelerator's computation, we resume and check on results + printf("Resumed!\n"); + // Disable RedMulE hwpe_cg_disable(); - errors = redmule16_compare_int(y, golden, m_size * k_size / 2); + if (float_fmt == Float16 || float_fmt == Float16Alt) + errors = redmule16_compare_int(y, golden, m_size * k_size / 2); + else if (float_fmt == Float8 || float_fmt == Float8Alt) + errors = redmule8_compare_int(y, golden, m_size * k_size / 4); *(int *)0x80000000 = errors; diff --git a/sw/utils/redmule_utils.h b/sw/utils/redmule_utils.h index b1edf2a..fc072f9 100644 --- a/sw/utils/redmule_utils.h +++ b/sw/utils/redmule_utils.h @@ -109,8 +109,10 @@ int redmule8_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { if (diff > ERR) { error = 1; +#ifdef VERBOSE tfp_printf("diff: 0x%08x\n", diff); tfp_printf("Byte0: Error!\n"); +#endif } // Cheching Byte1 @@ -123,8 +125,10 @@ int redmule8_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { if (diff > ERR) { error = 1; +#ifdef VERBOSE tfp_printf("diff: 0x%08x\n", diff); tfp_printf("Byte1: Error!\n"); +#endif } // Cheching Byte2 @@ -137,8 +141,10 @@ int redmule8_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { if (diff > ERR) { error = 1; +#ifdef VERBOSE tfp_printf("diff: 0x%08x\n", diff); tfp_printf("Byte2: Error!\n"); +#endif } // Cheching Byte3 @@ -151,8 +157,10 @@ int redmule8_compare_int(uint32_t *actual_z, uint32_t *golden_z, int len) { if (diff > ERR) { error = 1; +#ifdef VERBOSE tfp_printf("diff: 0x%08x\n", diff); tfp_printf("Byte3: Error!\n"); +#endif } errors += error; diff --git a/tb/redmule_complex_tb.sv b/tb/redmule_complex_tb.sv index c82d35e..de85971 100644 --- a/tb/redmule_complex_tb.sv +++ b/tb/redmule_complex_tb.sv @@ -5,9 +5,6 @@ // Yvan Tortorella // -`include "hci/typedef.svh" -`include "hci/assign.svh" - timeunit 1ps; timeprecision 1ps; @@ -27,8 +24,8 @@ import redmule_pkg::*; parameter int unsigned PULP_ZFINX = 0; parameter logic [31:0] BASE_ADDR = 32'h1c000000; parameter logic [31:0] HWPE_ADDR_BASE_BIT = 20; - parameter string STIM_INSTR = "../../stim_instr.txt"; - parameter string STIM_DATA = "../../stim_data.txt"; + parameter string STIM_INSTR = "./stim_instr.txt"; + parameter string STIM_DATA = "./stim_data.txt"; // global signals logic clk; @@ -135,8 +132,7 @@ import redmule_pkg::*; logic [31:0] data; } core_data_rsp_t; - `HCI_TYPEDEF_REQ_T(redmule_data_req_t, logic [31:0], logic [DW-1:0], logic [DW/8-1:0], logic signed [DW/32-1:0][31:0], logic) - `HCI_TYPEDEF_RSP_T(redmule_data_rsp_t, logic [DW-1:0], logic) + hci_core_intf #(.DW(DW)) redmule_tcdm (.clk(clk_i)); core_inst_req_t core_inst_req; core_inst_rsp_t core_inst_rsp; @@ -144,9 +140,6 @@ import redmule_pkg::*; core_data_req_t core_data_req; core_data_rsp_t core_data_rsp; - redmule_data_req_t redmule_data_req; - redmule_data_rsp_t redmule_data_rsp; - // bindings always_comb begin : bind_periph // periph_req = core_data_req.req & core_data_req.addr[HWPE_ADDR_BASE_BIT]; @@ -188,20 +181,20 @@ import redmule_pkg::*; end for(genvar ii=0; ii> {tcdm_r_data} }; - assign redmule_data_rsp.r_valid = &tcdm_r_valid; - assign redmule_data_rsp.r_opc = '0; - assign redmule_data_rsp.r_user = '0; + assign redmule_tcdm.gnt = &tcdm_gnt; + assign redmule_tcdm.r_data = { >> {tcdm_r_data} }; + assign redmule_tcdm.r_valid = &tcdm_r_valid; + assign redmule_tcdm.r_opc = '0; + assign redmule_tcdm.r_user = '0; assign tcdm[MP].req = core_data_req.req & (core_data_req.addr[31:24] != '0) & @@ -290,9 +283,7 @@ import redmule_pkg::*; .core_data_req_t ( core_data_req_t ), .core_data_rsp_t ( core_data_rsp_t ), .core_inst_req_t ( core_inst_req_t ), - .core_inst_rsp_t ( core_inst_rsp_t ), - .redmule_data_req_t ( redmule_data_req_t ), - .redmule_data_rsp_t ( redmule_data_rsp_t ) + .core_inst_rsp_t ( core_inst_rsp_t ) ) i_dut ( .clk_i ( clk ), .rst_ni ( rst_n ), @@ -307,8 +298,7 @@ import redmule_pkg::*; .core_inst_req_o ( core_inst_req ), .core_data_rsp_i ( core_data_rsp ), .core_data_req_o ( core_data_req ), - .redmule_data_rsp_i ( redmule_data_rsp ), - .redmule_data_req_o ( redmule_data_req ) + .tcdm ( redmule_tcdm ) ); initial begin @@ -391,12 +381,15 @@ import redmule_pkg::*; redmule_complex_tb.i_dummy_dmemory.cnt_wr[7] + redmule_complex_tb.i_dummy_dmemory.cnt_wr[8]; - $display("cnt_rd=%-8d", cnt_rd); - $display("cnt_wr=%-8d", cnt_wr); - if(errors != 0) - $error("errors=%08x", errors); - else - $display("errors=%08x", errors); + $display("[TB] - cnt_rd=%-8d", cnt_rd); + $display("[TB] - cnt_wr=%-8d", cnt_wr); + if(errors != 0) begin + $error("[TB] - errors=%08x", errors); + $display("[TB] - Fail!"); + end else begin + $display("[TB] - errors=%08x", errors); + $display("[TB] - Success!"); + end $finish; end diff --git a/tb/redmule_tb.sv b/tb/redmule_tb.sv index 996fa3a..6e078cc 100644 --- a/tb/redmule_tb.sv +++ b/tb/redmule_tb.sv @@ -5,10 +5,6 @@ // Yvan Tortorella // -`include "hci/typedef.svh" -`include "hci/assign.svh" -`include "hwpe-ctrl/typedef.svh" - timeunit 1ps; timeprecision 1ps; @@ -28,8 +24,8 @@ import redmule_pkg::*; parameter int unsigned PULP_ZFINX = 0; parameter logic [31:0] BASE_ADDR = 32'h1c000000; parameter logic [31:0] HWPE_ADDR_BASE_BIT = 20; - parameter string STIM_INSTR = "../../stim_instr.txt"; - parameter string STIM_DATA = "../../stim_data.txt"; + parameter string STIM_INSTR = "./stim_instr.txt"; + parameter string STIM_DATA = "./stim_data.txt"; // global signals logic clk; @@ -384,12 +380,15 @@ import redmule_pkg::*; #(TCP); cnt_rd = redmule_tb.i_dummy_dmemory.cnt_rd[0] + redmule_tb.i_dummy_dmemory.cnt_rd[1] + redmule_tb.i_dummy_dmemory.cnt_rd[2] + redmule_tb.i_dummy_dmemory.cnt_rd[3] + redmule_tb.i_dummy_dmemory.cnt_rd[4] + redmule_tb.i_dummy_dmemory.cnt_rd[5] + redmule_tb.i_dummy_dmemory.cnt_rd[6] + redmule_tb.i_dummy_dmemory.cnt_rd[7] + redmule_tb.i_dummy_dmemory.cnt_rd[8]; cnt_wr = redmule_tb.i_dummy_dmemory.cnt_wr[0] + redmule_tb.i_dummy_dmemory.cnt_wr[1] + redmule_tb.i_dummy_dmemory.cnt_wr[2] + redmule_tb.i_dummy_dmemory.cnt_wr[3] + redmule_tb.i_dummy_dmemory.cnt_wr[4] + redmule_tb.i_dummy_dmemory.cnt_wr[5] + redmule_tb.i_dummy_dmemory.cnt_wr[6] + redmule_tb.i_dummy_dmemory.cnt_wr[7] + redmule_tb.i_dummy_dmemory.cnt_wr[8]; - $display("cnt_rd=%-8d", cnt_rd); - $display("cnt_wr=%-8d", cnt_wr); - if(errors != 0) - $error("errors=%08x", errors); - else - $display("errors=%08x", errors); + $display("[TB] - cnt_rd=%-8d", cnt_rd); + $display("[TB] - cnt_wr=%-8d", cnt_wr); + if(errors != 0) begin + $error("[TB] - errors=%08x", errors); + $display("[TB] - Fail!"); + end else begin + $display("[TB] - errors=%08x", errors); + $display("[TB] - Success!"); + end $finish; end