diff --git a/Bender.yml b/Bender.yml index c21ec54dc8..ac38d7ca66 100644 --- a/Bender.yml +++ b/Bender.yml @@ -80,8 +80,6 @@ sources: # Packages - core/include/wt_cache_pkg.sv - core/include/std_cache_pkg.sv - - core/include/acc_pkg.sv - # for all the below files use Flist.cva6 as baseline and also look at Makefile pd/synth # CVXIF - core/include/instr_tracer_pkg.sv diff --git a/Flist.ariane b/Flist.ariane index a1f4976382..e89f4fc990 100644 --- a/Flist.ariane +++ b/Flist.ariane @@ -24,7 +24,6 @@ core/include/riscv_pkg.sv corev_apu/riscv-dbg/src/dm_pkg.sv vendor/openhwgroup/cvfpu/src/fpnew_pkg.sv core/include/ariane_pkg.sv -core/include/acc_pkg.sv corev_apu/tb/ariane_soc_pkg.sv vendor/pulp-platform/axi/src/axi_pkg.sv corev_apu/tb/ariane_axi_pkg.sv diff --git a/common/local/util/instr_tracer.sv b/common/local/util/instr_tracer.sv index 17c11e5f14..38986d3909 100644 --- a/common/local/util/instr_tracer.sv +++ b/common/local/util/instr_tracer.sv @@ -17,9 +17,11 @@ `include "ex_trace_item.svh" `include "instr_trace_item.svh" -module instr_tracer ( +module instr_tracer #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty +)( instr_tracer_if tracer_if, - input logic[riscv::XLEN-1:0] hart_id_i + input logic[CVA6Cfg.XLEN-1:0] hart_id_i ); // keep the decoded instructions in a queue diff --git a/core/Flist.cva6 b/core/Flist.cva6 index 46a541603e..c9bc095144 100644 --- a/core/Flist.cva6 +++ b/core/Flist.cva6 @@ -56,9 +56,9 @@ ${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/norm_div_sqrt ${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/nrbd_nrsc_mvp.sv ${CVA6_REPO_DIR}/vendor/openhwgroup/cvfpu/src/fpu_div_sqrt_mvp/hdl/preprocess_mvp.sv +${CVA6_REPO_DIR}/core/include/riscv_pkg.sv ${CVA6_REPO_DIR}/core/include/config_pkg.sv ${CVA6_REPO_DIR}/core/include/${TARGET_CFG}_config_pkg.sv -${CVA6_REPO_DIR}/core/include/riscv_pkg.sv // Note: depends on fpnew_pkg, above ${CVA6_REPO_DIR}/core/include/ariane_pkg.sv ${CVA6_REPO_DIR}/vendor/pulp-platform/axi/src/axi_pkg.sv @@ -67,7 +67,6 @@ ${CVA6_REPO_DIR}/vendor/pulp-platform/axi/src/axi_pkg.sv ${CVA6_REPO_DIR}/core/include/wt_cache_pkg.sv ${CVA6_REPO_DIR}/core/include/std_cache_pkg.sv ${CVA6_REPO_DIR}/core/include/instr_tracer_pkg.sv -${CVA6_REPO_DIR}/core/include/acc_pkg.sv //CVXIF ${CVA6_REPO_DIR}/core/include/cvxif_pkg.sv diff --git a/core/acc_dispatcher.sv b/core/acc_dispatcher.sv index 998993fbe8..5c4be375c6 100644 --- a/core/acc_dispatcher.sv +++ b/core/acc_dispatcher.sv @@ -18,8 +18,40 @@ module acc_dispatcher import riscv::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, - parameter type acc_req_t = acc_pkg::accelerator_req_t, - parameter type acc_resp_t = acc_pkg::accelerator_resp_t, + parameter type exception_t = logic, + parameter type fu_data_t = logic, + parameter type scoreboard_entry_t = logic, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic, + parameter type acc_req_t = struct packed { + logic req_valid; + logic resp_ready; + riscv::instruction_t insn; + logic [CVA6Cfg.XLEN-1:0] rs1; + logic [CVA6Cfg.XLEN-1:0] rs2; + fpnew_pkg::roundmode_e frm; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id; + logic store_pending; + // Invalidation interface + logic acc_cons_en; + logic inval_ready; + }, + parameter type acc_resp_t = struct packed { + logic req_ready; + logic resp_valid; + logic [CVA6Cfg.XLEN-1:0] result; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id; + logic error; + // Metadata + logic store_pending; + logic store_complete; + logic load_complete; + logic [4:0] fflags; + logic fflags_valid; + // Invalidation interface + logic inval_valid; + logic [63:0] inval_addr; + }, parameter type acc_cfg_t = logic, parameter acc_cfg_t AccCfg = '0 ) ( @@ -38,8 +70,8 @@ module acc_dispatcher output logic issue_stall_o, input fu_data_t fu_data_i, input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, - output logic [TRANS_ID_BITS-1:0] acc_trans_id_o, - output xlen_t acc_result_o, + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] acc_trans_id_o, + output logic [CVA6Cfg.XLEN-1:0] acc_result_o, output logic acc_valid_o, output exception_t acc_exception_o, // Interface with the execute stage @@ -123,14 +155,15 @@ module acc_dispatcher logic acc_insn_queue_empty; logic [idx_width(InstructionQueueDepth)-1:0] acc_insn_queue_usage; logic acc_commit; - logic [ TRANS_ID_BITS-1:0] acc_commit_trans_id; + logic [ CVA6Cfg.TRANS_ID_BITS-1:0] acc_commit_trans_id; assign acc_data = acc_valid_ex_o ? fu_data_i : '0; fifo_v3 #( .DEPTH (InstructionQueueDepth), .FALL_THROUGH(1'b1), - .dtype (fu_data_t) + .dtype (fu_data_t), + .FPGA_EN (CVA6Cfg.FPGA_EN) ) i_acc_insn_queue ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -153,13 +186,13 @@ module acc_dispatcher **********************************/ // Keep track of the instructions that were received by the dispatcher. - logic [NR_SB_ENTRIES-1:0] insn_pending_d, insn_pending_q; + logic [CVA6Cfg.NR_SB_ENTRIES-1:0] insn_pending_d, insn_pending_q; `FF(insn_pending_q, insn_pending_d, '0) // Only non-speculative instructions can be issued to the accelerators. // The following block keeps track of which transaction IDs reached the // top of the scoreboard, and are therefore no longer speculative. - logic [NR_SB_ENTRIES-1:0] insn_ready_d, insn_ready_q; + logic [CVA6Cfg.NR_SB_ENTRIES-1:0] insn_ready_d, insn_ready_q; `FF(insn_ready_q, insn_ready_d, '0) always_comb begin : p_non_speculative_ff @@ -186,13 +219,13 @@ module acc_dispatcher * Accelerator request * *************************/ - acc_pkg::accelerator_req_t acc_req; + acc_req_t acc_req; logic acc_req_valid; logic acc_req_ready; - acc_pkg::accelerator_req_t acc_req_int; + acc_req_t acc_req_int; fall_through_register #( - .T(acc_pkg::accelerator_req_t) + .T(acc_req_t) ) i_accelerator_req_register ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -223,7 +256,7 @@ module acc_dispatcher acc_req = '0; acc_req_valid = 1'b0; - // Unpack fu_data_t into accelerator_req_t + // Unpack fu_data_t into acc_req_t if (!acc_insn_queue_empty) begin acc_req = '{ // Instruction is forwarded from the decoder as an immediate diff --git a/core/alu.sv b/core/alu.sv index 01533b6c0a..4a51d70e5b 100644 --- a/core/alu.sv +++ b/core/alu.sv @@ -21,34 +21,35 @@ module alu import ariane_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type fu_data_t = logic ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low input fu_data_t fu_data_i, - output riscv::xlen_t result_o, + output logic [CVA6Cfg.XLEN-1:0] result_o, output logic alu_branch_res_o ); - riscv::xlen_t operand_a_rev; + logic [CVA6Cfg.XLEN-1:0] operand_a_rev; logic [ 31:0] operand_a_rev32; - logic [ riscv::XLEN:0] operand_b_neg; - logic [riscv::XLEN+1:0] adder_result_ext_o; + logic [ CVA6Cfg.XLEN:0] operand_b_neg; + logic [CVA6Cfg.XLEN+1:0] adder_result_ext_o; logic less; // handles both signed and unsigned forms logic [ 31:0] rolw; // Rotate Left Word logic [ 31:0] rorw; // Rotate Right Word logic [31:0] orcbw, rev8w; - logic [ $clog2(riscv::XLEN) : 0] cpop; // Count Population - logic [$clog2(riscv::XLEN)-1 : 0] lz_tz_count; // Count Leading Zeros + logic [ $clog2(CVA6Cfg.XLEN) : 0] cpop; // Count Population + logic [$clog2(CVA6Cfg.XLEN)-1 : 0] lz_tz_count; // Count Leading Zeros logic [ 4:0] lz_tz_wcount; // Count Leading Zeros Word logic lz_tz_empty, lz_tz_wempty; - riscv::xlen_t orcbw_result, rev8w_result; + logic [CVA6Cfg.XLEN-1:0] orcbw_result, rev8w_result; // bit reverse operand_a for left shifts and bit counting generate genvar k; - for (k = 0; k < riscv::XLEN; k++) - assign operand_a_rev[k] = fu_data_i.operand_a[riscv::XLEN-1-k]; + for (k = 0; k < CVA6Cfg.XLEN; k++) + assign operand_a_rev[k] = fu_data_i.operand_a[CVA6Cfg.XLEN-1-k]; for (k = 0; k < 32; k++) assign operand_a_rev32[k] = fu_data_i.operand_a[31-k]; endgenerate @@ -58,9 +59,9 @@ module alu // ------ logic adder_op_b_negate; logic adder_z_flag; - logic [riscv::XLEN:0] adder_in_a, adder_in_b; - riscv::xlen_t adder_result; - logic [riscv::XLEN-1:0] operand_a_bitmanip, bit_indx; + logic [CVA6Cfg.XLEN:0] adder_in_a, adder_in_b; + logic [CVA6Cfg.XLEN-1:0] adder_result; + logic [CVA6Cfg.XLEN-1:0] operand_a_bitmanip, bit_indx; always_comb begin adder_op_b_negate = 1'b0; @@ -75,8 +76,8 @@ module alu always_comb begin operand_a_bitmanip = fu_data_i.operand_a; - if (ariane_pkg::BITMANIP) begin - if (riscv::IS_XLEN64) begin + if (CVA6Cfg.BITMANIP) begin + if (CVA6Cfg.IS_XLEN64) begin unique case (fu_data_i.operation) SH1ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 1; SH2ADDUW: operand_a_bitmanip = fu_data_i.operand_a[31:0] << 2; @@ -100,12 +101,12 @@ module alu assign adder_in_a = {operand_a_bitmanip, 1'b1}; // prepare operand b - assign operand_b_neg = {fu_data_i.operand_b, 1'b0} ^ {riscv::XLEN + 1{adder_op_b_negate}}; + assign operand_b_neg = {fu_data_i.operand_b, 1'b0} ^ {CVA6Cfg.XLEN + 1{adder_op_b_negate}}; assign adder_in_b = operand_b_neg; // actual adder assign adder_result_ext_o = $unsigned(adder_in_a) + $unsigned(adder_in_b); - assign adder_result = adder_result_ext_o[riscv::XLEN:1]; + assign adder_result = adder_result_ext_o[CVA6Cfg.XLEN:1]; assign adder_z_flag = ~|adder_result; // get the right branch comparison result @@ -129,17 +130,17 @@ module alu logic shift_left; // should we shift left logic shift_arithmetic; - riscv::xlen_t shift_amt; // amount of shift, to the right - riscv::xlen_t shift_op_a; // input of the shifter + logic [CVA6Cfg.XLEN-1:0] shift_amt; // amount of shift, to the right + logic [CVA6Cfg.XLEN-1:0] shift_op_a; // input of the shifter logic [ 31:0] shift_op_a32; // input to the 32 bit shift operation - riscv::xlen_t shift_result; + logic [CVA6Cfg.XLEN-1:0] shift_result; logic [ 31:0] shift_result32; - logic [riscv::XLEN:0] shift_right_result; + logic [CVA6Cfg.XLEN:0] shift_right_result; logic [ 32:0] shift_right_result32; - riscv::xlen_t shift_left_result; + logic [CVA6Cfg.XLEN-1:0] shift_left_result; logic [ 31:0] shift_left_result32; assign shift_amt = fu_data_i.operand_b; @@ -149,14 +150,14 @@ module alu assign shift_arithmetic = (fu_data_i.operation == SRA) | (fu_data_i.operation == SRAW); // right shifts, we let the synthesizer optimize this - logic [riscv::XLEN:0] shift_op_a_64; + logic [CVA6Cfg.XLEN:0] shift_op_a_64; logic [32:0] shift_op_a_32; // choose the bit reversed or the normal input for shift operand a assign shift_op_a = shift_left ? operand_a_rev : fu_data_i.operand_a; assign shift_op_a32 = shift_left ? operand_a_rev32 : fu_data_i.operand_a[31:0]; - assign shift_op_a_64 = {shift_arithmetic & shift_op_a[riscv::XLEN-1], shift_op_a}; + assign shift_op_a_64 = {shift_arithmetic & shift_op_a[CVA6Cfg.XLEN-1], shift_op_a}; assign shift_op_a_32 = {shift_arithmetic & shift_op_a[31], shift_op_a32}; assign shift_right_result = $unsigned($signed(shift_op_a_64) >>> shift_amt[5:0]); @@ -165,14 +166,14 @@ module alu // bit reverse the shift_right_result for left shifts genvar j; generate - for (j = 0; j < riscv::XLEN; j++) - assign shift_left_result[j] = shift_right_result[riscv::XLEN-1-j]; + for (j = 0; j < CVA6Cfg.XLEN; j++) + assign shift_left_result[j] = shift_right_result[CVA6Cfg.XLEN-1-j]; for (j = 0; j < 32; j++) assign shift_left_result32[j] = shift_right_result32[31-j]; endgenerate - assign shift_result = shift_left ? shift_left_result : shift_right_result[riscv::XLEN-1:0]; + assign shift_result = shift_left ? shift_left_result : shift_right_result[CVA6Cfg.XLEN-1:0]; assign shift_result32 = shift_left ? shift_left_result32 : shift_right_result32[31:0]; // ------------ @@ -190,15 +191,15 @@ module alu (fu_data_i.operation == MIN)) sgn = 1'b1; - less = ($signed({sgn & fu_data_i.operand_a[riscv::XLEN-1], fu_data_i.operand_a}) < - $signed({sgn & fu_data_i.operand_b[riscv::XLEN-1], fu_data_i.operand_b})); + less = ($signed({sgn & fu_data_i.operand_a[CVA6Cfg.XLEN-1], fu_data_i.operand_a}) < + $signed({sgn & fu_data_i.operand_b[CVA6Cfg.XLEN-1], fu_data_i.operand_b})); end - if (ariane_pkg::BITMANIP) begin : gen_bitmanip + if (CVA6Cfg.BITMANIP) begin : gen_bitmanip // Count Population + Count population Word popcount #( - .INPUT_WIDTH(riscv::XLEN) + .INPUT_WIDTH(CVA6Cfg.XLEN) ) i_cpop_count ( .data_i (operand_a_bitmanip), .popcount_o(cpop) @@ -207,14 +208,14 @@ module alu // Count Leading/Trailing Zeros // 64b lzc #( - .WIDTH(riscv::XLEN), + .WIDTH(CVA6Cfg.XLEN), .MODE (1) ) i_clz_64b ( .in_i(operand_a_bitmanip), .cnt_o(lz_tz_count), .empty_o(lz_tz_empty) ); - if (riscv::IS_XLEN64) begin + if (CVA6Cfg.IS_XLEN64) begin //32b lzc #( .WIDTH(32), @@ -227,10 +228,10 @@ module alu end end - if (ariane_pkg::BITMANIP) begin : gen_orcbw_rev8w_results + if (CVA6Cfg.BITMANIP) begin : gen_orcbw_rev8w_results assign orcbw = {{8{|fu_data_i.operand_a[31:24]}}, {8{|fu_data_i.operand_a[23:16]}}, {8{|fu_data_i.operand_a[15:8]}}, {8{|fu_data_i.operand_a[7:0]}}}; assign rev8w = {{fu_data_i.operand_a[7:0]}, {fu_data_i.operand_a[15:8]}, {fu_data_i.operand_a[23:16]}, {fu_data_i.operand_a[31:24]}}; - if (riscv::IS_XLEN64) begin : gen_64b + if (CVA6Cfg.IS_XLEN64) begin : gen_64b assign orcbw_result = {{8{|fu_data_i.operand_a[63:56]}}, {8{|fu_data_i.operand_a[55:48]}}, {8{|fu_data_i.operand_a[47:40]}}, {8{|fu_data_i.operand_a[39:32]}}, orcbw}; assign rev8w_result = {rev8w , {fu_data_i.operand_a[39:32]}, {fu_data_i.operand_a[47:40]}, {fu_data_i.operand_a[55:48]}, {fu_data_i.operand_a[63:56]}}; end else begin : gen_32b @@ -244,49 +245,49 @@ module alu // ----------- always_comb begin result_o = '0; - if (riscv::IS_XLEN64) begin + if (CVA6Cfg.IS_XLEN64) begin unique case (fu_data_i.operation) // Add word: Ignore the upper bits and sign extend to 64 bit - ADDW, SUBW: result_o = {{riscv::XLEN - 32{adder_result[31]}}, adder_result[31:0]}; + ADDW, SUBW: result_o = {{CVA6Cfg.XLEN - 32{adder_result[31]}}, adder_result[31:0]}; SH1ADDUW, SH2ADDUW, SH3ADDUW: result_o = adder_result; // Shifts 32 bit - SLLW, SRLW, SRAW: result_o = {{riscv::XLEN - 32{shift_result32[31]}}, shift_result32[31:0]}; + SLLW, SRLW, SRAW: result_o = {{CVA6Cfg.XLEN - 32{shift_result32[31]}}, shift_result32[31:0]}; default: ; endcase end unique case (fu_data_i.operation) // Standard Operations - ANDL, ANDN: result_o = fu_data_i.operand_a & operand_b_neg[riscv::XLEN:1]; - ORL, ORN: result_o = fu_data_i.operand_a | operand_b_neg[riscv::XLEN:1]; - XORL, XNOR: result_o = fu_data_i.operand_a ^ operand_b_neg[riscv::XLEN:1]; + ANDL, ANDN: result_o = fu_data_i.operand_a & operand_b_neg[CVA6Cfg.XLEN:1]; + ORL, ORN: result_o = fu_data_i.operand_a | operand_b_neg[CVA6Cfg.XLEN:1]; + XORL, XNOR: result_o = fu_data_i.operand_a ^ operand_b_neg[CVA6Cfg.XLEN:1]; // Adder Operations ADD, SUB, ADDUW, SH1ADD, SH2ADD, SH3ADD: result_o = adder_result; // Shift Operations - SLL, SRL, SRA: result_o = (riscv::IS_XLEN64) ? shift_result : shift_result32; + SLL, SRL, SRA: result_o = (CVA6Cfg.IS_XLEN64) ? shift_result : shift_result32; // Comparison Operations - SLTS, SLTU: result_o = {{riscv::XLEN - 1{1'b0}}, less}; + SLTS, SLTU: result_o = {{CVA6Cfg.XLEN - 1{1'b0}}, less}; default: ; // default case to suppress unique warning endcase - if (ariane_pkg::BITMANIP) begin + if (CVA6Cfg.BITMANIP) begin // Index for Bitwise Rotation - bit_indx = 1 << (fu_data_i.operand_b & (riscv::XLEN - 1)); + bit_indx = 1 << (fu_data_i.operand_b & (CVA6Cfg.XLEN - 1)); // rolw, roriw, rorw - rolw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> (riscv::XLEN-32-fu_data_i.operand_b[4:0])); - rorw = ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> fu_data_i.operand_b[4:0]) | ({{riscv::XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << (riscv::XLEN-32-fu_data_i.operand_b[4:0])); - if (riscv::IS_XLEN64) begin + rolw = ({{CVA6Cfg.XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[4:0]) | ({{CVA6Cfg.XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> (CVA6Cfg.XLEN-32-fu_data_i.operand_b[4:0])); + rorw = ({{CVA6Cfg.XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} >> fu_data_i.operand_b[4:0]) | ({{CVA6Cfg.XLEN-32{1'b0}},fu_data_i.operand_a[31:0]} << (CVA6Cfg.XLEN-32-fu_data_i.operand_b[4:0])); + if (CVA6Cfg.IS_XLEN64) begin unique case (fu_data_i.operation) - CLZW, CTZW: result_o = (lz_tz_wempty) ? 32 : {{riscv::XLEN - 5{1'b0}}, lz_tz_wcount}; // change - ROLW: result_o = {{riscv::XLEN - 32{rolw[31]}}, rolw}; - RORW, RORIW: result_o = {{riscv::XLEN - 32{rorw[31]}}, rorw}; + CLZW, CTZW: result_o = (lz_tz_wempty) ? 32 : {{CVA6Cfg.XLEN - 5{1'b0}}, lz_tz_wcount}; // change + ROLW: result_o = {{CVA6Cfg.XLEN - 32{rolw[31]}}, rolw}; + RORW, RORIW: result_o = {{CVA6Cfg.XLEN - 32{rorw[31]}}, rorw}; default: ; endcase end unique case (fu_data_i.operation) // Left Shift 32 bit unsigned SLLIUW: - result_o = {{riscv::XLEN-32{1'b0}}, fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[5:0]; + result_o = {{CVA6Cfg.XLEN-32{1'b0}}, fu_data_i.operand_a[31:0]} << fu_data_i.operand_b[5:0]; // Integer minimum/maximum MAX: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a; MAXU: result_o = less ? fu_data_i.operand_b : fu_data_i.operand_a; @@ -295,29 +296,29 @@ module alu // Single bit instructions operations BCLR, BCLRI: result_o = fu_data_i.operand_a & ~bit_indx; - BEXT, BEXTI: result_o = {{riscv::XLEN - 1{1'b0}}, |(fu_data_i.operand_a & bit_indx)}; + BEXT, BEXTI: result_o = {{CVA6Cfg.XLEN - 1{1'b0}}, |(fu_data_i.operand_a & bit_indx)}; BINV, BINVI: result_o = fu_data_i.operand_a ^ bit_indx; BSET, BSETI: result_o = fu_data_i.operand_a | bit_indx; // Count Leading/Trailing Zeros CLZ, CTZ: - result_o = (lz_tz_empty) ? ({{riscv::XLEN - $clog2(riscv::XLEN) {1'b0}}, lz_tz_count} + 1) : - {{riscv::XLEN - $clog2(riscv::XLEN) {1'b0}}, lz_tz_count}; + result_o = (lz_tz_empty) ? ({{CVA6Cfg.XLEN - $clog2(CVA6Cfg.XLEN) {1'b0}}, lz_tz_count} + 1) : + {{CVA6Cfg.XLEN - $clog2(CVA6Cfg.XLEN) {1'b0}}, lz_tz_count}; // Count population - CPOP, CPOPW: result_o = {{(riscv::XLEN - ($clog2(riscv::XLEN) + 1)) {1'b0}}, cpop}; + CPOP, CPOPW: result_o = {{(CVA6Cfg.XLEN - ($clog2(CVA6Cfg.XLEN) + 1)) {1'b0}}, cpop}; // Sign and Zero Extend - SEXTB: result_o = {{riscv::XLEN - 8{fu_data_i.operand_a[7]}}, fu_data_i.operand_a[7:0]}; - SEXTH: result_o = {{riscv::XLEN - 16{fu_data_i.operand_a[15]}}, fu_data_i.operand_a[15:0]}; - ZEXTH: result_o = {{riscv::XLEN - 16{1'b0}}, fu_data_i.operand_a[15:0]}; + SEXTB: result_o = {{CVA6Cfg.XLEN - 8{fu_data_i.operand_a[7]}}, fu_data_i.operand_a[7:0]}; + SEXTH: result_o = {{CVA6Cfg.XLEN - 16{fu_data_i.operand_a[15]}}, fu_data_i.operand_a[15:0]}; + ZEXTH: result_o = {{CVA6Cfg.XLEN - 16{1'b0}}, fu_data_i.operand_a[15:0]}; // Bitwise Rotation ROL: - result_o = (riscv::IS_XLEN64) ? ((fu_data_i.operand_a << fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a << fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a >> (riscv::XLEN-fu_data_i.operand_b[4:0]))); + result_o = (CVA6Cfg.IS_XLEN64) ? ((fu_data_i.operand_a << fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a >> (CVA6Cfg.XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a << fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a >> (CVA6Cfg.XLEN-fu_data_i.operand_b[4:0]))); ROR, RORI: - result_o = (riscv::IS_XLEN64) ? ((fu_data_i.operand_a >> fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a >> fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a << (riscv::XLEN-fu_data_i.operand_b[4:0]))); + result_o = (CVA6Cfg.IS_XLEN64) ? ((fu_data_i.operand_a >> fu_data_i.operand_b[5:0]) | (fu_data_i.operand_a << (CVA6Cfg.XLEN-fu_data_i.operand_b[5:0]))) : ((fu_data_i.operand_a >> fu_data_i.operand_b[4:0]) | (fu_data_i.operand_a << (CVA6Cfg.XLEN-fu_data_i.operand_b[4:0]))); ORCB: result_o = orcbw_result; diff --git a/core/amo_buffer.sv b/core/amo_buffer.sv index 24a98ddb92..d2f37f36c3 100644 --- a/core/amo_buffer.sv +++ b/core/amo_buffer.sv @@ -24,8 +24,8 @@ module amo_buffer #( input logic valid_i, // AMO is valid output logic ready_o, // AMO unit is ready input ariane_pkg::amo_t amo_op_i, // AMO Operation - input logic [riscv::PLEN-1:0] paddr_i, // physical address of store which needs to be placed in the queue - input riscv::xlen_t data_i, // data which is placed in the queue + input logic [CVA6Cfg.PLEN-1:0] paddr_i, // physical address of store which needs to be placed in the queue + input logic [CVA6Cfg.XLEN-1:0] data_i, // data which is placed in the queue input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write) // D$ output ariane_pkg::amo_req_t amo_req_o, // request to cache subsytem @@ -39,8 +39,8 @@ module amo_buffer #( typedef struct packed { ariane_pkg::amo_t op; - logic [riscv::PLEN-1:0] paddr; - riscv::xlen_t data; + logic [CVA6Cfg.PLEN-1:0] paddr; + logic [CVA6Cfg.XLEN-1:0] data; logic [1:0] size; } amo_op_t; @@ -50,8 +50,8 @@ module amo_buffer #( assign amo_req_o.req = no_st_pending_i & amo_valid_commit_i & amo_valid; assign amo_req_o.amo_op = amo_data_out.op; assign amo_req_o.size = amo_data_out.size; - assign amo_req_o.operand_a = {{64 - riscv::PLEN{1'b0}}, amo_data_out.paddr}; - assign amo_req_o.operand_b = {{64 - riscv::XLEN{1'b0}}, amo_data_out.data}; + assign amo_req_o.operand_a = {{64 - CVA6Cfg.PLEN{1'b0}}, amo_data_out.paddr}; + assign amo_req_o.operand_b = {{64 - CVA6Cfg.XLEN{1'b0}}, amo_data_out.data}; assign amo_data_in.op = amo_op_i; assign amo_data_in.data = data_i; @@ -64,7 +64,8 @@ module amo_buffer #( fifo_v3 #( .DEPTH(1), - .dtype(amo_op_t) + .dtype(amo_op_t), + .FPGA_EN(CVA6Cfg.FPGA_EN) ) i_amo_fifo ( .clk_i (clk_i), .rst_ni (rst_ni), diff --git a/core/axi_shim.sv b/core/axi_shim.sv index c4d93bd9e2..60773fb7ae 100644 --- a/core/axi_shim.sv +++ b/core/axi_shim.sv @@ -269,7 +269,7 @@ module axi_shim #( // return path assign axi_req_o.r_ready = rd_rdy_i; assign rd_data_o = axi_resp_i.r.data; - if (ariane_pkg::AXI_USER_EN) begin + if (CVA6Cfg.AXI_USER_EN) begin assign rd_user_o = axi_resp_i.r.user; end else begin assign rd_user_o = '0; diff --git a/core/branch_unit.sv b/core/branch_unit.sv index 47004fe6bc..23b677e3ba 100644 --- a/core/branch_unit.sv +++ b/core/branch_unit.sv @@ -13,47 +13,51 @@ // Description: Branch target calculation and comparison module branch_unit #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type exception_t = logic, + parameter type bp_resolve_t = logic, + parameter type branchpredict_sbe_t = logic, + parameter type fu_data_t = logic ) ( input logic clk_i, input logic rst_ni, input logic debug_mode_i, - input ariane_pkg::fu_data_t fu_data_i, - input logic [riscv::VLEN-1:0] pc_i, // PC of instruction + input fu_data_t fu_data_i, + input logic [CVA6Cfg.VLEN-1:0] pc_i, // PC of instruction input logic is_compressed_instr_i, input logic fu_valid_i, // any functional unit is valid, check that there is no accidental mis-predict input logic branch_valid_i, input logic branch_comp_res_i, // branch comparison result from ALU - output logic [riscv::VLEN-1:0] branch_result_o, + output logic [CVA6Cfg.VLEN-1:0] branch_result_o, - input ariane_pkg::branchpredict_sbe_t branch_predict_i, // this is the address we predicted - output ariane_pkg::bp_resolve_t resolved_branch_o, // this is the actual address we are targeting + input branchpredict_sbe_t branch_predict_i, // this is the address we predicted + output bp_resolve_t resolved_branch_o, // this is the actual address we are targeting output logic resolve_branch_o, // to ID to clear that we resolved the branch and we can // accept new entries to the scoreboard - output ariane_pkg::exception_t branch_exception_o // branch exception out + output exception_t branch_exception_o // branch exception out ); - logic [riscv::VLEN-1:0] target_address; - logic [riscv::VLEN-1:0] next_pc; + logic [CVA6Cfg.VLEN-1:0] target_address; + logic [CVA6Cfg.VLEN-1:0] next_pc; // here we handle the various possibilities of mis-predicts always_comb begin : mispredict_handler // set the jump base, for JALR we need to look at the register, for all other control flow instructions we can take the current PC - automatic logic [riscv::VLEN-1:0] jump_base; + automatic logic [CVA6Cfg.VLEN-1:0] jump_base; // TODO(zarubaf): The ALU can be used to calculate the branch target - jump_base = (fu_data_i.operation == ariane_pkg::JALR) ? fu_data_i.operand_a[riscv::VLEN-1:0] : pc_i; + jump_base = (fu_data_i.operation == ariane_pkg::JALR) ? fu_data_i.operand_a[CVA6Cfg.VLEN-1:0] : pc_i; - target_address = {riscv::VLEN{1'b0}}; + target_address = {CVA6Cfg.VLEN{1'b0}}; resolve_branch_o = 1'b0; - resolved_branch_o.target_address = {riscv::VLEN{1'b0}}; + resolved_branch_o.target_address = {CVA6Cfg.VLEN{1'b0}}; resolved_branch_o.is_taken = 1'b0; resolved_branch_o.valid = branch_valid_i; resolved_branch_o.is_mispredict = 1'b0; resolved_branch_o.cf_type = branch_predict_i.cf; // calculate next PC, depending on whether the instruction is compressed or not this may be different // TODO(zarubaf): We already calculate this a couple of times, maybe re-use? - next_pc = pc_i + ((is_compressed_instr_i) ? {{riscv::VLEN-2{1'b0}}, 2'h2} : {{riscv::VLEN-3{1'b0}}, 3'h4}); + next_pc = pc_i + ((is_compressed_instr_i) ? {{CVA6Cfg.VLEN-2{1'b0}}, 2'h2} : {{CVA6Cfg.VLEN-3{1'b0}}, 3'h4}); // calculate target address simple 64 bit addition - target_address = $unsigned($signed(jump_base) + $signed(fu_data_i.imm[riscv::VLEN-1:0])); + target_address = $unsigned($signed(jump_base) + $signed(fu_data_i.imm[CVA6Cfg.VLEN-1:0])); // on a JALR we are supposed to reset the LSB to 0 (according to the specification) if (fu_data_i.operation == ariane_pkg::JALR) target_address[0] = 1'b0; // we need to put the branch target address into rd, this is the result of this unit @@ -96,7 +100,7 @@ module branch_unit #( ((ariane_pkg::op_is_branch(fu_data_i.operation)) && branch_comp_res_i); branch_exception_o.cause = riscv::INSTR_ADDR_MISALIGNED; branch_exception_o.valid = 1'b0; - branch_exception_o.tval = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i}; + branch_exception_o.tval = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{pc_i[CVA6Cfg.VLEN-1]}}, pc_i}; // Only throw instruction address misaligned exception if this is indeed a `taken` conditional branch or // an unconditional jump if (branch_valid_i && target_address[0] != 1'b0 && jump_taken) branch_exception_o.valid = 1'b1; diff --git a/core/cache_subsystem/axi_adapter.sv b/core/cache_subsystem/axi_adapter.sv index f9982759a3..de58d03b7c 100644 --- a/core/cache_subsystem/axi_adapter.sv +++ b/core/cache_subsystem/axi_adapter.sv @@ -31,7 +31,7 @@ module axi_adapter #( input ariane_pkg::ad_req_t type_i, input ariane_pkg::amo_t amo_i, output logic gnt_o, - input logic [riscv::XLEN-1:0] addr_i, + input logic [CVA6Cfg.XLEN-1:0] addr_i, input logic we_i, input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][CVA6Cfg.AxiDataWidth-1:0] wdata_i, input logic [(DATA_WIDTH/CVA6Cfg.AxiDataWidth)-1:0][(CVA6Cfg.AxiDataWidth/8)-1:0] be_i, diff --git a/core/cache_subsystem/cache_ctrl.sv b/core/cache_subsystem/cache_ctrl.sv index 554b7d63c3..c7b04659f1 100644 --- a/core/cache_subsystem/cache_ctrl.sv +++ b/core/cache_subsystem/cache_ctrl.sv @@ -22,7 +22,11 @@ module cache_ctrl import ariane_pkg::*; import std_cache_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic, + parameter type cache_line_t = logic, + parameter type cl_be_t = logic ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -33,15 +37,15 @@ module cache_ctrl input dcache_req_i_t req_port_i, output dcache_req_o_t req_port_o, // SRAM interface - output logic [DCACHE_SET_ASSOC-1:0] req_o, // req is valid - output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array + output logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] req_o, // req is valid + output logic [CVA6Cfg.DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array input logic gnt_i, output cache_line_t data_o, output cl_be_t be_o, - output logic [DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later - input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i, + output logic [CVA6Cfg.DCACHE_TAG_WIDTH-1:0] tag_o, //valid one cycle later + input cache_line_t [CVA6Cfg.DCACHE_SET_ASSOC-1:0] data_i, output logic we_o, - input logic [DCACHE_SET_ASSOC-1:0] hit_way_i, + input logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] hit_way_i, // Miss handling output miss_req_t miss_req_o, // return @@ -75,8 +79,8 @@ module cache_ctrl state_d, state_q; typedef struct packed { - logic [DCACHE_INDEX_WIDTH-1:0] index; - logic [DCACHE_TAG_WIDTH-1:0] tag; + logic [CVA6Cfg.DCACHE_INDEX_WIDTH-1:0] index; + logic [CVA6Cfg.DCACHE_TAG_WIDTH-1:0] tag; logic [DCACHE_TID_WIDTH-1:0] id; logic [7:0] be; logic [1:0] size; @@ -86,18 +90,18 @@ module cache_ctrl logic killed; } mem_req_t; - logic [DCACHE_SET_ASSOC-1:0] hit_way_d, hit_way_q; + logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] hit_way_d, hit_way_q; mem_req_t mem_req_d, mem_req_q; assign busy_o = (state_q != IDLE); assign tag_o = mem_req_d.tag; - logic [DCACHE_LINE_WIDTH-1:0] cl_i; + logic [CVA6Cfg.DCACHE_LINE_WIDTH-1:0] cl_i; always_comb begin : way_select cl_i = '0; - for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) if (hit_way_i[i]) cl_i = data_i[i].data; + for (int unsigned i = 0; i < CVA6Cfg.DCACHE_SET_ASSOC; i++) if (hit_way_i[i]) cl_i = data_i[i].data; // cl_i = data_i[one_hot_to_bin(hit_way_i)].data; end @@ -106,10 +110,10 @@ module cache_ctrl // Cache FSM // -------------- always_comb begin : cache_ctrl_fsm - automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset; + automatic logic [$clog2(CVA6Cfg.DCACHE_LINE_WIDTH)-1:0] cl_offset; // incoming cache-line -> this is needed as synthesis is not supporting +: indexing in a multi-dimensional array // cache-line offset -> multiple of 64 - cl_offset = mem_req_q.index[DCACHE_BYTE_OFFSET-1:3] << 6; // shift by 6 to the left + cl_offset = mem_req_q.index[CVA6Cfg.DCACHE_OFFSET_WIDTH-1:3] << 6; // shift by 6 to the left // default assignments state_d = state_q; mem_req_d = mem_req_q; @@ -252,7 +256,7 @@ module cache_ctrl // Check for cache-ability // ------------------------- if (!config_pkg::is_inside_cacheable_regions( - CVA6Cfg, {{{64 - riscv::PLEN} {1'b0}}, tag_o, {DCACHE_INDEX_WIDTH{1'b0}}} + CVA6Cfg, {{{64 - CVA6Cfg.PLEN} {1'b0}}, tag_o, {CVA6Cfg.DCACHE_INDEX_WIDTH{1'b0}}} )) begin mem_req_d.bypass = 1'b1; state_d = WAIT_REFILL_GNT; @@ -456,7 +460,7 @@ module cache_ctrl //pragma translate_off `ifndef VERILATOR initial begin - assert (DCACHE_LINE_WIDTH == 128) + assert (CVA6Cfg.DCACHE_LINE_WIDTH == 128) else $error( "Cacheline width has to be 128 for the moment. But only small changes required in data select logic" diff --git a/core/cache_subsystem/cva6_hpdcache_if_adapter.sv b/core/cache_subsystem/cva6_hpdcache_if_adapter.sv index b91eba349d..0d95d895e9 100644 --- a/core/cache_subsystem/cva6_hpdcache_if_adapter.sv +++ b/core/cache_subsystem/cva6_hpdcache_if_adapter.sv @@ -16,6 +16,8 @@ module cva6_hpdcache_if_adapter // {{{ #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic, parameter bit is_load_port = 1'b1 ) // }}} @@ -31,8 +33,8 @@ module cva6_hpdcache_if_adapter input hpdcache_pkg::hpdcache_req_sid_t hpdcache_req_sid_i, // Request/response ports from/to the CVA6 core - input ariane_pkg::dcache_req_i_t cva6_req_i, - output ariane_pkg::dcache_req_o_t cva6_req_o, + input dcache_req_i_t cva6_req_i, + output dcache_req_o_t cva6_req_o, input ariane_pkg::amo_req_t cva6_amo_req_i, output ariane_pkg::amo_resp_t cva6_amo_resp_o, @@ -65,9 +67,9 @@ module cva6_hpdcache_if_adapter assign hpdcache_req_is_uncacheable = !config_pkg::is_inside_cacheable_regions( CVA6Cfg, { - {64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}} + {64 - CVA6Cfg.DCACHE_TAG_WIDTH{1'b0}} , cva6_req_i.address_tag - , {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}} + , {CVA6Cfg.DCACHE_INDEX_WIDTH{1'b0}} } ); @@ -147,9 +149,9 @@ module cva6_hpdcache_if_adapter assign hpdcache_req_is_uncacheable = !config_pkg::is_inside_cacheable_regions( CVA6Cfg, { - {64 - ariane_pkg::DCACHE_TAG_WIDTH{1'b0}} + {64 - CVA6Cfg.DCACHE_TAG_WIDTH{1'b0}} , hpdcache_req_o.addr_tag, - {ariane_pkg::DCACHE_INDEX_WIDTH{1'b0}} + {CVA6Cfg.DCACHE_INDEX_WIDTH{1'b0}} } ); diff --git a/core/cache_subsystem/cva6_hpdcache_subsystem.sv b/core/cache_subsystem/cva6_hpdcache_subsystem.sv index 7e90b9149f..4f90d9aeb4 100644 --- a/core/cache_subsystem/cva6_hpdcache_subsystem.sv +++ b/core/cache_subsystem/cva6_hpdcache_subsystem.sv @@ -17,6 +17,14 @@ module cva6_hpdcache_subsystem // {{{ #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type icache_areq_t = logic, + parameter type icache_arsp_t = logic, + parameter type icache_dreq_t = logic, + parameter type icache_drsp_t = logic, + parameter type icache_req_t = logic, + parameter type icache_rtrn_t = logic, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic, parameter int NumPorts = 4, parameter int NrHwPrefetchers = 4, parameter type noc_req_t = logic, @@ -38,11 +46,11 @@ module cva6_hpdcache_subsystem input logic icache_flush_i, // flush the icache, flush and kill have to be asserted together output logic icache_miss_o, // to performance counter // address translation requests - input ariane_pkg::icache_areq_t icache_areq_i, // to/from frontend - output ariane_pkg::icache_arsp_t icache_areq_o, + input icache_areq_t icache_areq_i, // to/from frontend + output icache_arsp_t icache_areq_o, // data requests - input ariane_pkg::icache_dreq_t icache_dreq_i, // to/from frontend - output ariane_pkg::icache_drsp_t icache_dreq_o, + input icache_dreq_t icache_dreq_i, // to/from frontend + output icache_drsp_t icache_dreq_o, // }}} // D$ @@ -60,8 +68,8 @@ module cva6_hpdcache_subsystem input cmo_req_t dcache_cmo_req_i, // from CMO FU output cmo_rsp_t dcache_cmo_resp_o, // to CMO FU // Request ports - input ariane_pkg::dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // from LSU - output ariane_pkg::dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to LSU + input dcache_req_i_t [NumPorts-1:0] dcache_req_ports_i, // from LSU + output dcache_req_o_t [NumPorts-1:0] dcache_req_ports_o, // to LSU // Write Buffer status output logic wbuffer_empty_o, output logic wbuffer_not_ni_o, @@ -92,15 +100,21 @@ module cva6_hpdcache_subsystem // I$ instantiation // {{{ logic icache_miss_valid, icache_miss_ready; - wt_cache_pkg::icache_req_t icache_miss; + icache_req_t icache_miss; logic icache_miss_resp_valid; - wt_cache_pkg::icache_rtrn_t icache_miss_resp; + icache_rtrn_t icache_miss_resp; - localparam int ICACHE_RDTXID = 1 << (ariane_pkg::MEM_TID_WIDTH - 1); + localparam int ICACHE_RDTXID = 1 << (CVA6Cfg.MEM_TID_WIDTH - 1); cva6_icache #( .CVA6Cfg(CVA6Cfg), + .icache_areq_t(icache_areq_t), + .icache_arsp_t(icache_arsp_t), + .icache_dreq_t(icache_dreq_t), + .icache_drsp_t(icache_drsp_t), + .icache_req_t(icache_req_t), + .icache_rtrn_t(icache_rtrn_t), .RdTxId (ICACHE_RDTXID) ) i_cva6_icache ( .clk_i (clk_i), @@ -136,7 +150,7 @@ module cva6_hpdcache_subsystem localparam int HPDCACHE_NREQUESTERS = NumPorts + 2; typedef logic [CVA6Cfg.AxiAddrWidth-1:0] hpdcache_mem_addr_t; - typedef logic [ariane_pkg::MEM_TID_WIDTH-1:0] hpdcache_mem_id_t; + typedef logic [CVA6Cfg.MEM_TID_WIDTH-1:0] hpdcache_mem_id_t; typedef logic [CVA6Cfg.AxiDataWidth-1:0] hpdcache_mem_data_t; typedef logic [CVA6Cfg.AxiDataWidth/8-1:0] hpdcache_mem_be_t; `HPDCACHE_TYPEDEF_MEM_REQ_T(hpdcache_mem_req_t, hpdcache_mem_addr_t, hpdcache_mem_id_t); @@ -208,13 +222,15 @@ module cva6_hpdcache_subsystem hwpf_stride_pkg::hwpf_stride_throttle_t [NrHwPrefetchers-1:0] hwpf_throttle_out; generate - ariane_pkg::dcache_req_i_t dcache_req_ports[HPDCACHE_NREQUESTERS-1:0]; + dcache_req_i_t dcache_req_ports[HPDCACHE_NREQUESTERS-1:0]; for (genvar r = 0; r < (NumPorts - 1); r++) begin : cva6_hpdcache_load_if_adapter_gen assign dcache_req_ports[r] = dcache_req_ports_i[r]; cva6_hpdcache_if_adapter #( .CVA6Cfg (CVA6Cfg), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t), .is_load_port(1'b1) ) i_cva6_hpdcache_load_if_adapter ( .clk_i, @@ -241,6 +257,8 @@ module cva6_hpdcache_subsystem cva6_hpdcache_if_adapter #( .CVA6Cfg (CVA6Cfg), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t), .is_load_port(1'b0) ) i_cva6_hpdcache_store_if_adapter ( .clk_i, @@ -375,7 +393,7 @@ module cva6_hpdcache_subsystem hpdcache #( .NREQUESTERS (HPDCACHE_NREQUESTERS), - .HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH), + .HPDcacheMemIdWidth (CVA6Cfg.MEM_TID_WIDTH), .HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth), .hpdcache_mem_req_t (hpdcache_mem_req_t), .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), @@ -484,12 +502,15 @@ module cva6_hpdcache_subsystem `AXI_TYPEDEF_R_CHAN_T(axi_r_chan_t, axi_data_t, axi_id_t, axi_user_t) cva6_hpdcache_subsystem_axi_arbiter #( - .HPDcacheMemIdWidth (ariane_pkg::MEM_TID_WIDTH), + .CVA6Cfg (CVA6Cfg), + .HPDcacheMemIdWidth (CVA6Cfg.MEM_TID_WIDTH), .HPDcacheMemDataWidth (CVA6Cfg.AxiDataWidth), .hpdcache_mem_req_t (hpdcache_mem_req_t), .hpdcache_mem_req_w_t (hpdcache_mem_req_w_t), .hpdcache_mem_resp_r_t(hpdcache_mem_resp_r_t), .hpdcache_mem_resp_w_t(hpdcache_mem_resp_w_t), + .icache_req_t(icache_req_t), + .icache_rtrn_t(icache_rtrn_t), .AxiAddrWidth (CVA6Cfg.AxiAddrWidth), .AxiDataWidth (CVA6Cfg.AxiDataWidth), diff --git a/core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv b/core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv index 9eb0a8bc67..31619f553b 100644 --- a/core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv +++ b/core/cache_subsystem/cva6_hpdcache_subsystem_axi_arbiter.sv @@ -16,12 +16,15 @@ module cva6_hpdcache_subsystem_axi_arbiter // Parameters // {{{ #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter int HPDcacheMemIdWidth = 8, parameter int HPDcacheMemDataWidth = 512, parameter type hpdcache_mem_req_t = logic, parameter type hpdcache_mem_req_w_t = logic, parameter type hpdcache_mem_resp_r_t = logic, parameter type hpdcache_mem_resp_w_t = logic, + parameter type icache_req_t = logic, + parameter type icache_rtrn_t = logic, parameter int unsigned AxiAddrWidth = 1, parameter int unsigned AxiDataWidth = 1, @@ -47,11 +50,11 @@ module cva6_hpdcache_subsystem_axi_arbiter // {{{ input logic icache_miss_valid_i, output logic icache_miss_ready_o, - input wt_cache_pkg::icache_req_t icache_miss_i, + input icache_req_t icache_miss_i, input hpdcache_mem_id_t icache_miss_id_i, output logic icache_miss_resp_valid_o, - output wt_cache_pkg::icache_rtrn_t icache_miss_resp_o, + output icache_rtrn_t icache_miss_resp_o, // }}} // Interfaces from/to D$ @@ -128,19 +131,19 @@ module cva6_hpdcache_subsystem_axi_arbiter localparam int MEM_RESP_RT_DEPTH = (1 << HPDcacheMemIdWidth); typedef hpdcache_mem_id_t [MEM_RESP_RT_DEPTH-1:0] mem_resp_rt_t; - typedef logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] icache_resp_data_t; + typedef logic [CVA6Cfg.ICACHE_LINE_WIDTH-1:0] icache_resp_data_t; // }}} // Adapt the I$ interface to the HPDcache memory interface // {{{ - localparam int ICACHE_CL_WORDS = ariane_pkg::ICACHE_LINE_WIDTH / 64; + localparam int ICACHE_CL_WORDS = CVA6Cfg.ICACHE_LINE_WIDTH / 64; localparam int ICACHE_CL_WORD_INDEX = $clog2(ICACHE_CL_WORDS); - localparam int ICACHE_CL_SIZE = $clog2(ariane_pkg::ICACHE_LINE_WIDTH / 8); + localparam int ICACHE_CL_SIZE = $clog2(CVA6Cfg.ICACHE_LINE_WIDTH / 8); localparam int ICACHE_WORD_SIZE = 3; localparam int ICACHE_MEM_REQ_CL_LEN = - (ariane_pkg::ICACHE_LINE_WIDTH + HPDcacheMemDataWidth - 1)/HPDcacheMemDataWidth; + (CVA6Cfg.ICACHE_LINE_WIDTH + HPDcacheMemDataWidth - 1)/HPDcacheMemDataWidth; localparam int ICACHE_MEM_REQ_CL_SIZE = - (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH) ? + (HPDcacheMemDataWidth <= CVA6Cfg.ICACHE_LINE_WIDTH) ? $clog2( HPDcacheMemDataWidth / 8 ) : ICACHE_CL_SIZE; @@ -201,7 +204,7 @@ module cva6_hpdcache_subsystem_axi_arbiter icache_resp_data_t icache_miss_rdata; generate - if (HPDcacheMemDataWidth < ariane_pkg::ICACHE_LINE_WIDTH) begin + if (HPDcacheMemDataWidth < CVA6Cfg.ICACHE_LINE_WIDTH) begin hpdcache_fifo_reg #( .FIFO_DEPTH (1), .fifo_data_t(hpdcache_mem_id_t) @@ -220,7 +223,7 @@ module cva6_hpdcache_subsystem_axi_arbiter hpdcache_data_upsize #( .WR_WIDTH(HPDcacheMemDataWidth), - .RD_WIDTH(ariane_pkg::ICACHE_LINE_WIDTH), + .RD_WIDTH(CVA6Cfg.ICACHE_LINE_WIDTH), .DEPTH (1) ) i_icache_hpdcache_data_upsize ( .clk_i, @@ -261,7 +264,7 @@ module cva6_hpdcache_subsystem_axi_arbiter automatic logic [63:0] icache_miss_word; icache_miss_word_index = icache_miss_req_rdata.mem_req_addr[3+:ICACHE_CL_WORD_INDEX]; icache_miss_word = icache_miss_resp_data_rdata[icache_miss_word_index*64+:64]; - icache_miss_rdata = {{ariane_pkg::ICACHE_LINE_WIDTH - 64{1'b0}}, icache_miss_word}; + icache_miss_rdata = {{CVA6Cfg.ICACHE_LINE_WIDTH - 64{1'b0}}, icache_miss_word}; end else begin icache_miss_rdata = icache_miss_resp_data_rdata; end @@ -575,10 +578,10 @@ module cva6_hpdcache_subsystem_axi_arbiter "HPDcacheMemIdWidth shall be wide enough to identify all pending HPDcache cacheable writes and uncacheable writes" ); initial - assert (HPDcacheMemDataWidth <= ariane_pkg::ICACHE_LINE_WIDTH) + assert (HPDcacheMemDataWidth <= CVA6Cfg.ICACHE_LINE_WIDTH) else $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Icache line"); initial - assert (HPDcacheMemDataWidth <= ariane_pkg::DCACHE_LINE_WIDTH) + assert (HPDcacheMemDataWidth <= CVA6Cfg.DCACHE_LINE_WIDTH) else $fatal("HPDcacheMemDataWidth shall be less or equal to the width of a Dcache line"); // pragma translate_on // }}} diff --git a/core/cache_subsystem/cva6_icache.sv b/core/cache_subsystem/cva6_icache.sv index 72e3d45ece..37665db0fb 100644 --- a/core/cache_subsystem/cva6_icache.sv +++ b/core/cache_subsystem/cva6_icache.sv @@ -30,8 +30,14 @@ module cva6_icache import wt_cache_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type icache_areq_t = logic, + parameter type icache_arsp_t = logic, + parameter type icache_dreq_t = logic, + parameter type icache_drsp_t = logic, + parameter type icache_req_t = logic, + parameter type icache_rtrn_t = logic, /// ID to be used for read transactions - parameter logic [MEM_TID_WIDTH-1:0] RdTxId = 0 + parameter logic [CVA6Cfg.MEM_TID_WIDTH-1:0] RdTxId = 0 ) ( input logic clk_i, input logic rst_ni, @@ -56,10 +62,15 @@ module cva6_icache output icache_req_t mem_data_o ); + // Calculated parameter + localparam ICACHE_OFFSET_WIDTH = $clog2(CVA6Cfg.ICACHE_LINE_WIDTH / 8); + localparam ICACHE_NUM_WORDS = 2 ** (CVA6Cfg.ICACHE_INDEX_WIDTH - ICACHE_OFFSET_WIDTH); + localparam ICACHE_CL_IDX_WIDTH = $clog2(ICACHE_NUM_WORDS); // excluding byte offset + // functions - function automatic logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] icache_way_bin2oh( - input logic [L1I_WAY_WIDTH-1:0] in); - logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] out; + function automatic logic [CVA6Cfg.ICACHE_SET_ASSOC-1:0] icache_way_bin2oh( + input logic [CVA6Cfg.ICACHE_SET_ASSOC_WIDTH-1:0] in); + logic [CVA6Cfg.ICACHE_SET_ASSOC-1:0] out; out = '0; out[in] = 1'b1; return out; @@ -67,9 +78,9 @@ module cva6_icache // signals logic cache_en_d, cache_en_q; // cache is enabled - logic [riscv::VLEN-1:0] vaddr_d, vaddr_q; + logic [CVA6Cfg.VLEN-1:0] vaddr_d, vaddr_q; logic paddr_is_nc; // asserted if physical address is non-cacheable - logic [ICACHE_SET_ASSOC-1:0] cl_hit; // hit from tag compare + logic [CVA6Cfg.ICACHE_SET_ASSOC-1:0] cl_hit; // hit from tag compare logic cache_rden; // triggers cache lookup logic cache_wren; // triggers write to cacheline logic @@ -79,10 +90,10 @@ module cva6_icache // replacement strategy logic update_lfsr; // shift the LFSR - logic [$clog2(ICACHE_SET_ASSOC)-1:0] inv_way; // first non-valid encountered - logic [$clog2(ICACHE_SET_ASSOC)-1:0] rnd_way; // random index for replacement - logic [$clog2(ICACHE_SET_ASSOC)-1:0] repl_way; // way to replace - logic [ICACHE_SET_ASSOC-1:0] repl_way_oh_d, repl_way_oh_q; // way to replace (onehot) + logic [$clog2(CVA6Cfg.ICACHE_SET_ASSOC)-1:0] inv_way; // first non-valid encountered + logic [$clog2(CVA6Cfg.ICACHE_SET_ASSOC)-1:0] rnd_way; // random index for replacement + logic [$clog2(CVA6Cfg.ICACHE_SET_ASSOC)-1:0] repl_way; // way to replace + logic [CVA6Cfg.ICACHE_SET_ASSOC-1:0] repl_way_oh_d, repl_way_oh_q; // way to replace (onehot) logic all_ways_valid; // we need to switch repl strategy since all are valid // invalidations / flushing @@ -93,19 +104,19 @@ module cva6_icache // mem arrays logic cl_we; // write enable to memory array - logic [ ICACHE_SET_ASSOC-1:0] cl_req; // request to memory array + logic [ CVA6Cfg.ICACHE_SET_ASSOC-1:0] cl_req; // request to memory array logic [ICACHE_CL_IDX_WIDTH-1:0] cl_index; // this is a cache-line index, to memory array logic [ICACHE_OFFSET_WIDTH-1:0] cl_offset_d, cl_offset_q; // offset in cache line - logic [ICACHE_TAG_WIDTH-1:0] cl_tag_d, cl_tag_q; // this is the cache tag - logic [ICACHE_TAG_WIDTH-1:0] cl_tag_rdata [ICACHE_SET_ASSOC-1:0]; // these are the tags coming from the tagmem - logic [ICACHE_LINE_WIDTH-1:0] cl_rdata [ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the cache - logic [ICACHE_USER_LINE_WIDTH-1:0] cl_ruser[ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the user cache - logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0] cl_sel; // selected word from each cacheline - logic [ICACHE_SET_ASSOC-1:0][FETCH_USER_WIDTH-1:0] cl_user; // selected word from each cacheline - logic [ICACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs + logic [CVA6Cfg.ICACHE_TAG_WIDTH-1:0] cl_tag_d, cl_tag_q; // this is the cache tag + logic [CVA6Cfg.ICACHE_TAG_WIDTH-1:0] cl_tag_rdata [CVA6Cfg.ICACHE_SET_ASSOC-1:0]; // these are the tags coming from the tagmem + logic [CVA6Cfg.ICACHE_LINE_WIDTH-1:0] cl_rdata [CVA6Cfg.ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the cache + logic [CVA6Cfg.ICACHE_USER_LINE_WIDTH-1:0] cl_ruser[CVA6Cfg.ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the user cache + logic [CVA6Cfg.ICACHE_SET_ASSOC-1:0][CVA6Cfg.FETCH_WIDTH-1:0] cl_sel; // selected word from each cacheline + logic [CVA6Cfg.ICACHE_SET_ASSOC-1:0][CVA6Cfg.FETCH_USER_WIDTH-1:0] cl_user; // selected word from each cacheline + logic [CVA6Cfg.ICACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs logic vld_we; // valid bits write enable - logic [ICACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write - logic [ICACHE_SET_ASSOC-1:0] vld_rdata; // valid bits coming from valid regs + logic [CVA6Cfg.ICACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write + logic [CVA6Cfg.ICACHE_SET_ASSOC-1:0] vld_rdata; // valid bits coming from valid regs logic [ICACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit // cpmtroller FSM @@ -124,11 +135,11 @@ module cva6_icache /////////////////////////////////////////////////////// // extract tag from physical address, check if NC - assign cl_tag_d = (areq_i.fetch_valid) ? areq_i.fetch_paddr[ICACHE_TAG_WIDTH+ICACHE_INDEX_WIDTH-1:ICACHE_INDEX_WIDTH] : cl_tag_q; + assign cl_tag_d = (areq_i.fetch_valid) ? areq_i.fetch_paddr[CVA6Cfg.ICACHE_TAG_WIDTH+CVA6Cfg.ICACHE_INDEX_WIDTH-1:CVA6Cfg.ICACHE_INDEX_WIDTH] : cl_tag_q; // noncacheable if request goes to I/O space, or if cache is disabled assign paddr_is_nc = (~cache_en_q) | (~config_pkg::is_inside_cacheable_regions( - CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, cl_tag_d, {ICACHE_INDEX_WIDTH{1'b0}}} + CVA6Cfg, {{64 - CVA6Cfg.PLEN{1'b0}}, cl_tag_d, {CVA6Cfg.ICACHE_INDEX_WIDTH{1'b0}}} )); // pass exception through @@ -140,7 +151,7 @@ module cva6_icache assign areq_o.fetch_vaddr = {vaddr_q >> 2, 2'b0}; // split virtual address into index and offset to address cache arrays - assign cl_index = vaddr_d[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH]; + assign cl_index = vaddr_d[CVA6Cfg.ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH]; if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset @@ -149,16 +160,16 @@ module cva6_icache ( paddr_is_nc & mem_data_req_o ) ? cl_offset_q[2]<<2 : // needed since we transfer 32bit over a 64bit AXI bus in this case cl_offset_q; // request word address instead of cl address in case of NC access - assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:3], 3'b0} : // align to 64bit - {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl + assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[CVA6Cfg.ICACHE_INDEX_WIDTH-1:3], 3'b0} : // align to 64bit + {cl_tag_d, vaddr_q[CVA6Cfg.ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl end else begin : gen_piton_offset // icache fills are either cachelines or 4byte fills, depending on whether they go to the Piton I/O space or not. // since the piton cache system replicates the data, we can always index the full CL assign cl_offset_d = (dreq_o.ready & dreq_i.req) ? {dreq_i.vaddr >> 2, 2'b0} : cl_offset_q; // request word address instead of cl address in case of NC access - assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:2], 2'b0} : // align to 32bit - {cl_tag_d, vaddr_q[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl + assign mem_data_o.paddr = (paddr_is_nc) ? {cl_tag_d, vaddr_q[CVA6Cfg.ICACHE_INDEX_WIDTH-1:2], 2'b0} : // align to 32bit + {cl_tag_d, vaddr_q[CVA6Cfg.ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH], {ICACHE_OFFSET_WIDTH{1'b0}}}; // align to cl end @@ -176,7 +187,7 @@ module cva6_icache // main control logic /////////////////////////////////////////////////////// logic addr_ni; - assign addr_ni = config_pkg::is_inside_nonidempotent_regions(CVA6Cfg, {{64-riscv::PLEN{1'b0}}, areq_i.fetch_paddr}); + assign addr_ni = config_pkg::is_inside_nonidempotent_regions(CVA6Cfg, {{64-CVA6Cfg.PLEN{1'b0}}, areq_i.fetch_paddr}); always_comb begin : p_fsm // default assignment state_d = state_q; @@ -359,7 +370,7 @@ module cva6_icache // invalidation/clearing address // flushing takes precedence over invals assign vld_addr = (flush_en) ? flush_cnt_q : - (inv_en) ? mem_rtrn_i.inv.idx[ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH] : + (inv_en) ? mem_rtrn_i.inv.idx[CVA6Cfg.ICACHE_INDEX_WIDTH-1:ICACHE_OFFSET_WIDTH] : cl_index; assign vld_req = (flush_en || cache_rden) ? '1 : @@ -386,7 +397,7 @@ module cva6_icache // find invalid cache line lzc #( - .WIDTH(ICACHE_SET_ASSOC) + .WIDTH(CVA6Cfg.ICACHE_SET_ASSOC) ) i_lzc ( .in_i (~vld_rdata), .cnt_o (inv_way), @@ -396,7 +407,7 @@ module cva6_icache // generate random cacheline index lfsr #( .LfsrWidth(8), - .OutWidth ($clog2(ariane_pkg::ICACHE_SET_ASSOC)) + .OutWidth ($clog2(CVA6Cfg.ICACHE_SET_ASSOC)) ) i_lfsr ( .clk_i (clk_i), .rst_ni(rst_ni), @@ -409,17 +420,17 @@ module cva6_icache // tag comparison, hit generation /////////////////////////////////////////////////////// - logic [$clog2(ICACHE_SET_ASSOC)-1:0] hit_idx; + logic [$clog2(CVA6Cfg.ICACHE_SET_ASSOC)-1:0] hit_idx; - for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel + for (genvar i = 0; i < CVA6Cfg.ICACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i]; - assign cl_sel[i] = cl_rdata[i][{cl_offset_q, 3'b0}+:FETCH_WIDTH]; - assign cl_user[i] = cl_ruser[i][{cl_offset_q, 3'b0}+:FETCH_USER_WIDTH]; + assign cl_sel[i] = cl_rdata[i][{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_WIDTH]; + assign cl_user[i] = cl_ruser[i][{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_USER_WIDTH]; end lzc #( - .WIDTH(ICACHE_SET_ASSOC) + .WIDTH(CVA6Cfg.ICACHE_SET_ASSOC) ) i_lzc_hit ( .in_i (cl_hit), .cnt_o (hit_idx), @@ -431,8 +442,8 @@ module cva6_icache dreq_o.data = cl_sel[hit_idx]; dreq_o.user = cl_user[hit_idx]; end else begin - dreq_o.data = mem_rtrn_i.data[{cl_offset_q, 3'b0}+:FETCH_WIDTH]; - dreq_o.user = mem_rtrn_i.user[{cl_offset_q, 3'b0}+:FETCH_USER_WIDTH]; + dreq_o.data = mem_rtrn_i.data[{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_WIDTH]; + dreq_o.user = mem_rtrn_i.user[{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_USER_WIDTH]; end end @@ -441,13 +452,13 @@ module cva6_icache /////////////////////////////////////////////////////// - logic [ICACHE_TAG_WIDTH:0] cl_tag_valid_rdata[ICACHE_SET_ASSOC-1:0]; + logic [CVA6Cfg.ICACHE_TAG_WIDTH:0] cl_tag_valid_rdata[CVA6Cfg.ICACHE_SET_ASSOC-1:0]; - for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_sram + for (genvar i = 0; i < CVA6Cfg.ICACHE_SET_ASSOC; i++) begin : gen_sram // Tag RAM sram #( // tag + valid bit - .DATA_WIDTH(ICACHE_TAG_WIDTH + 1), + .DATA_WIDTH(CVA6Cfg.ICACHE_TAG_WIDTH + 1), .NUM_WORDS (ICACHE_NUM_WORDS) ) tag_sram ( .clk_i (clk_i), @@ -464,14 +475,14 @@ module cva6_icache .rdata_o(cl_tag_valid_rdata[i]) ); - assign cl_tag_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH-1:0]; - assign vld_rdata[i] = cl_tag_valid_rdata[i][ICACHE_TAG_WIDTH]; + assign cl_tag_rdata[i] = cl_tag_valid_rdata[i][CVA6Cfg.ICACHE_TAG_WIDTH-1:0]; + assign vld_rdata[i] = cl_tag_valid_rdata[i][CVA6Cfg.ICACHE_TAG_WIDTH]; // Data RAM sram #( - .USER_WIDTH(ICACHE_USER_LINE_WIDTH), - .DATA_WIDTH(ICACHE_LINE_WIDTH), - .USER_EN (ariane_pkg::FETCH_USER_EN), + .USER_WIDTH(CVA6Cfg.ICACHE_USER_LINE_WIDTH), + .DATA_WIDTH(CVA6Cfg.ICACHE_LINE_WIDTH), + .USER_EN (CVA6Cfg.FETCH_USER_EN), .NUM_WORDS (ICACHE_NUM_WORDS) ) data_sram ( .clk_i (clk_i), @@ -543,16 +554,16 @@ module cva6_icache else $fatal(1, "[l1 icache] cl_hit signal must be hot1"); // this is only used for verification! - logic vld_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0]; - logic [ariane_pkg::ICACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::ICACHE_NUM_WORDS-1:0][ariane_pkg::ICACHE_SET_ASSOC-1:0]; - logic [ariane_pkg::ICACHE_SET_ASSOC-1:0] tag_write_duplicate_test; + logic vld_mirror[ICACHE_NUM_WORDS-1:0][CVA6Cfg.ICACHE_SET_ASSOC-1:0]; + logic [CVA6Cfg.ICACHE_TAG_WIDTH-1:0] tag_mirror[ICACHE_NUM_WORDS-1:0][CVA6Cfg.ICACHE_SET_ASSOC-1:0]; + logic [CVA6Cfg.ICACHE_SET_ASSOC-1:0] tag_write_duplicate_test; always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror if (!rst_ni) begin vld_mirror <= '{default: '0}; tag_mirror <= '{default: '0}; end else begin - for (int i = 0; i < ICACHE_SET_ASSOC; i++) begin + for (int i = 0; i < CVA6Cfg.ICACHE_SET_ASSOC; i++) begin if (vld_req[i] & vld_we) begin vld_mirror[vld_addr][i] <= vld_wdata[i]; tag_mirror[vld_addr][i] <= cl_tag_q; @@ -561,7 +572,7 @@ module cva6_icache end end - for (genvar i = 0; i < ICACHE_SET_ASSOC; i++) begin : gen_tag_dupl + for (genvar i = 0; i < CVA6Cfg.ICACHE_SET_ASSOC; i++) begin : gen_tag_dupl assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == cl_tag_q) & vld_mirror[vld_addr][i] & (|vld_wdata); end @@ -573,7 +584,7 @@ module cva6_icache initial begin // assert wrong parameterizations - assert (ICACHE_INDEX_WIDTH <= 12) + assert (CVA6Cfg.ICACHE_INDEX_WIDTH <= 12) else $fatal(1, "[l1 icache] cache index width can be maximum 12bit since VM uses 4kB pages"); end `endif diff --git a/core/cache_subsystem/cva6_icache_axi_wrapper.sv b/core/cache_subsystem/cva6_icache_axi_wrapper.sv index 7579fe4b67..3800b38918 100644 --- a/core/cache_subsystem/cva6_icache_axi_wrapper.sv +++ b/core/cache_subsystem/cva6_icache_axi_wrapper.sv @@ -18,6 +18,12 @@ module cva6_icache_axi_wrapper import wt_cache_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type icache_areq_t = logic, + parameter type icache_arsp_t = logic, + parameter type icache_dreq_t = logic, + parameter type icache_drsp_t = logic, + parameter type icache_req_t = logic, + parameter type icache_rtrn_t = logic, parameter type axi_req_t = logic, parameter type axi_rsp_t = logic ) ( @@ -39,8 +45,8 @@ module cva6_icache_axi_wrapper input axi_rsp_t axi_resp_i ); - localparam AxiNumWords = (ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH > DCACHE_LINE_WIDTH) + - (DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ICACHE_LINE_WIDTH <= DCACHE_LINE_WIDTH) ; + localparam AxiNumWords = (CVA6Cfg.ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (CVA6Cfg.ICACHE_LINE_WIDTH > CVA6Cfg.DCACHE_LINE_WIDTH) + + (CVA6Cfg.DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (CVA6Cfg.ICACHE_LINE_WIDTH <= CVA6Cfg.DCACHE_LINE_WIDTH) ; logic icache_mem_rtrn_vld; icache_rtrn_t icache_mem_rtrn; @@ -65,7 +71,7 @@ module cva6_icache_axi_wrapper logic req_valid_d, req_valid_q; icache_req_t req_data_d, req_data_q; logic first_d, first_q; - logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] + logic [CVA6Cfg.ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] rd_shift_d, rd_shift_q; // Keep read request asserted until we have an AXI grant. This is not guaranteed by icache (but @@ -80,7 +86,7 @@ module cva6_icache_axi_wrapper assign axi_rd_addr = CVA6Cfg.AxiAddrWidth'(req_data_d.paddr); // Fetch a full cache line on a cache miss, or a single word on a bypassed access - assign axi_rd_blen = (req_data_d.nc) ? '0 : ariane_pkg::ICACHE_LINE_WIDTH / 64 - 1; + assign axi_rd_blen = (req_data_d.nc) ? '0 : CVA6Cfg.ICACHE_LINE_WIDTH / 64 - 1; assign axi_rd_size = $clog2(CVA6Cfg.AxiDataWidth / 8); // Maximum assign axi_rd_id_in = req_data_d.tid; assign axi_rd_rdy = 1'b1; @@ -102,6 +108,12 @@ module cva6_icache_axi_wrapper cva6_icache #( // use ID 0 for icache reads .CVA6Cfg(CVA6Cfg), + .icache_areq_t(icache_areq_t), + .icache_arsp_t(icache_arsp_t), + .icache_dreq_t(icache_dreq_t), + .icache_drsp_t(icache_drsp_t), + .icache_req_t(icache_req_t), + .icache_rtrn_t(icache_rtrn_t), .RdTxId (0) ) i_cva6_icache ( .clk_i (clk_i), @@ -171,10 +183,10 @@ module cva6_icache_axi_wrapper if (axi_rd_valid) begin first_d = axi_rd_last; - if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin + if (CVA6Cfg.ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin rd_shift_d = axi_rd_data; end else begin - rd_shift_d = {axi_rd_data, rd_shift_q[ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]}; + rd_shift_d = {axi_rd_data, rd_shift_q[CVA6Cfg.ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1]}; end // If this is a single word transaction, we need to make sure that word is placed at offset 0 diff --git a/core/cache_subsystem/miss_handler.sv b/core/cache_subsystem/miss_handler.sv index 228b3e37ee..0416f03762 100644 --- a/core/cache_subsystem/miss_handler.sv +++ b/core/cache_subsystem/miss_handler.sv @@ -23,7 +23,9 @@ module miss_handler parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter int unsigned NR_PORTS = 4, parameter type axi_req_t = logic, - parameter type axi_rsp_t = logic + parameter type axi_rsp_t = logic, + parameter type cache_line_t = logic, + parameter type cl_be_t = logic ) ( input logic clk_i, input logic rst_ni, @@ -58,17 +60,37 @@ module miss_handler input amo_req_t amo_req_i, output amo_resp_t amo_resp_o, // Port to SRAMs, for refill and eviction - output logic [DCACHE_SET_ASSOC-1:0] req_o, - output logic [DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array + output logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] req_o, + output logic [CVA6Cfg.DCACHE_INDEX_WIDTH-1:0] addr_o, // address into cache array output cache_line_t data_o, output cl_be_t be_o, - input cache_line_t [DCACHE_SET_ASSOC-1:0] data_i, + input cache_line_t [CVA6Cfg.DCACHE_SET_ASSOC-1:0] data_i, output logic we_o ); // Three MSHR ports + AMO port parameter NR_BYPASS_PORTS = NR_PORTS + 1; + // convert one hot to bin for -> needed for cache replacement + function automatic logic [CVA6Cfg.DCACHE_SET_ASSOC_WIDTH-1:0] one_hot_to_bin( + input logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] in); + for (int unsigned i = 0; i < CVA6Cfg.DCACHE_SET_ASSOC; i++) begin + if (in[i]) return i; + end + endfunction + // get the first bit set, returns one hot value + function automatic logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] get_victim_cl( + input logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] valid_dirty); + // one-hot return vector + logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] oh = '0; + for (int unsigned i = 0; i < CVA6Cfg.DCACHE_SET_ASSOC; i++) begin + if (valid_dirty[i]) begin + oh[i] = 1'b1; + return oh; + end + end + endfunction + // FSM states enum logic [3:0] { IDLE, // 0 @@ -90,8 +112,8 @@ module miss_handler // Registers mshr_t mshr_d, mshr_q; - logic [DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q; - logic [DCACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q; + logic [CVA6Cfg.DCACHE_INDEX_WIDTH-1:0] cnt_d, cnt_q; + logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] evict_way_d, evict_way_q; // cache line to evict cache_line_t evict_cl_d, evict_cl_q; @@ -120,20 +142,20 @@ module miss_handler // Cache Line Refill <-> AXI logic req_fsm_miss_valid; logic [ 63:0] req_fsm_miss_addr; - logic [ DCACHE_LINE_WIDTH-1:0] req_fsm_miss_wdata; + logic [ CVA6Cfg.DCACHE_LINE_WIDTH-1:0] req_fsm_miss_wdata; logic req_fsm_miss_we; - logic [ (DCACHE_LINE_WIDTH/8)-1:0] req_fsm_miss_be; + logic [ (CVA6Cfg.DCACHE_LINE_WIDTH/8)-1:0] req_fsm_miss_be; ariane_pkg::ad_req_t req_fsm_miss_req; logic [ 1:0] req_fsm_miss_size; logic gnt_miss_fsm; logic valid_miss_fsm; - logic [ (DCACHE_LINE_WIDTH/64)-1:0][63:0] data_miss_fsm; + logic [ (CVA6Cfg.DCACHE_LINE_WIDTH/64)-1:0][63:0] data_miss_fsm; // Cache Management <-> LFSR logic lfsr_enable; - logic [ DCACHE_SET_ASSOC-1:0] lfsr_oh; - logic [$clog2(DCACHE_SET_ASSOC-1)-1:0] lfsr_bin; + logic [ CVA6Cfg.DCACHE_SET_ASSOC-1:0] lfsr_oh; + logic [$clog2(CVA6Cfg.DCACHE_SET_ASSOC-1)-1:0] lfsr_bin; // AMOs ariane_pkg::amo_t amo_op; logic [ 63:0] amo_operand_b; @@ -142,9 +164,9 @@ module miss_handler // Cache Management // ------------------------------ always_comb begin : cache_management - automatic logic [DCACHE_SET_ASSOC-1:0] evict_way, valid_way; + automatic logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] evict_way, valid_way; - for (int unsigned i = 0; i < DCACHE_SET_ASSOC; i++) begin + for (int unsigned i = 0; i < CVA6Cfg.DCACHE_SET_ASSOC; i++) begin evict_way[i] = data_i[i].valid & data_i[i].dirty; valid_way[i] = data_i[i].valid; end @@ -233,7 +255,7 @@ module miss_handler mshr_d.valid = 1'b1; mshr_d.we = miss_req_we[i]; mshr_d.id = i; - mshr_d.addr = miss_req_addr[i][DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:0]; + mshr_d.addr = miss_req_addr[i][CVA6Cfg.DCACHE_TAG_WIDTH+CVA6Cfg.DCACHE_INDEX_WIDTH-1:0]; mshr_d.wdata = miss_req_wdata[i]; mshr_d.be = miss_req_be[i]; break; @@ -246,7 +268,7 @@ module miss_handler // 1. Check if there is an empty cache-line // 2. If not -> evict one req_o = '1; - addr_o = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0]; + addr_o = mshr_q.addr[CVA6Cfg.DCACHE_INDEX_WIDTH-1:0]; state_d = MISS_REPL; miss_o = 1'b1; end @@ -262,7 +284,7 @@ module miss_handler state_d = WB_CACHELINE_MISS; evict_cl_d.tag = data_i[lfsr_bin].tag; evict_cl_d.data = data_i[lfsr_bin].data; - cnt_d = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0]; + cnt_d = mshr_q.addr[CVA6Cfg.DCACHE_INDEX_WIDTH-1:0]; // no - we can request a cache line now end else state_d = REQ_CACHELINE; // we have at least one free way @@ -287,17 +309,17 @@ module miss_handler // ~> replace the cacheline SAVE_CACHELINE: begin // calculate cacheline offset - automatic logic [$clog2(DCACHE_LINE_WIDTH)-1:0] cl_offset; - cl_offset = mshr_q.addr[DCACHE_BYTE_OFFSET-1:3] << 6; + automatic logic [$clog2(CVA6Cfg.DCACHE_LINE_WIDTH)-1:0] cl_offset; + cl_offset = mshr_q.addr[CVA6Cfg.DCACHE_OFFSET_WIDTH-1:3] << 6; // we've got a valid response from refill unit if (valid_miss_fsm) begin - addr_o = mshr_q.addr[DCACHE_INDEX_WIDTH-1:0]; + addr_o = mshr_q.addr[CVA6Cfg.DCACHE_INDEX_WIDTH-1:0]; req_o = evict_way_q; we_o = 1'b1; be_o = '1; be_o.vldrty = evict_way_q; - data_o.tag = mshr_q.addr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH]; + data_o.tag = mshr_q.addr[CVA6Cfg.DCACHE_TAG_WIDTH+CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_INDEX_WIDTH]; data_o.data = data_miss_fsm; data_o.valid = 1'b1; data_o.dirty = 1'b0; @@ -328,8 +350,8 @@ module miss_handler req_fsm_miss_valid = 1'b1; req_fsm_miss_addr = { evict_cl_q.tag, - cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET], - {{DCACHE_BYTE_OFFSET} {1'b0}} + cnt_q[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH], + {{CVA6Cfg.DCACHE_OFFSET_WIDTH} {1'b0}} }; req_fsm_miss_be = '1; req_fsm_miss_we = 1'b1; @@ -370,14 +392,14 @@ module miss_handler // not dirty ~> increment and continue end else begin // increment and re-request - cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET); + cnt_d = cnt_q + (1'b1 << CVA6Cfg.DCACHE_OFFSET_WIDTH); state_d = FLUSH_REQ_STATUS; addr_o = cnt_q; req_o = 1'b1; be_o.vldrty = INVALIDATE_ON_FLUSH ? '1 : '0; we_o = 1'b1; // finished with flushing operation, go back to idle - if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS - 1) begin + if (cnt_q[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH] == CVA6Cfg.DCACHE_NUM_WORDS - 1) begin // only acknowledge if the flush wasn't triggered by an atomic flush_ack_o = ~serve_amo_q; state_d = IDLE; @@ -393,9 +415,9 @@ module miss_handler we_o = 1'b1; // only write the dirty array be_o.vldrty = '1; - cnt_d = cnt_q + (1'b1 << DCACHE_BYTE_OFFSET); + cnt_d = cnt_q + (1'b1 << CVA6Cfg.DCACHE_OFFSET_WIDTH); // finished initialization - if (cnt_q[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == DCACHE_NUM_WORDS - 1) state_d = IDLE; + if (cnt_q[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH] == CVA6Cfg.DCACHE_NUM_WORDS - 1) state_d = IDLE; end // ---------------------- // AMOs @@ -479,12 +501,12 @@ module miss_handler for (int i = 0; i < NR_PORTS; i++) begin // check mshr for potential matching of other units, exclude the unit currently being served - if (mshr_q.valid && mshr_addr_i[i][55:DCACHE_BYTE_OFFSET] == mshr_q.addr[55:DCACHE_BYTE_OFFSET]) begin + if (mshr_q.valid && mshr_addr_i[i][55:CVA6Cfg.DCACHE_OFFSET_WIDTH] == mshr_q.addr[55:CVA6Cfg.DCACHE_OFFSET_WIDTH]) begin mshr_addr_matches_o[i] = 1'b1; end // same as previous, but checking only the index - if (mshr_q.valid && mshr_addr_i[i][DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] == mshr_q.addr[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]) begin + if (mshr_q.valid && mshr_addr_i[i][CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH] == mshr_q.addr[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH]) begin mshr_index_matches_o[i] = 1'b1; end end @@ -569,13 +591,13 @@ module miss_handler // Bypass AXI Interface // ---------------------- // Cast bypass_adapter_req.addr to axi_adapter port size - logic [riscv::XLEN-1:0] bypass_addr; + logic [CVA6Cfg.XLEN-1:0] bypass_addr; assign bypass_addr = bypass_adapter_req.addr; axi_adapter #( .CVA6Cfg (CVA6Cfg), .DATA_WIDTH (64), - .CACHELINE_BYTE_OFFSET(DCACHE_BYTE_OFFSET), + .CACHELINE_BYTE_OFFSET(CVA6Cfg.DCACHE_OFFSET_WIDTH), .axi_req_t (axi_req_t), .axi_rsp_t (axi_rsp_t) ) i_bypass_axi_adapter ( @@ -604,13 +626,13 @@ module miss_handler // Cache Line AXI Refill // ---------------------- // Cast req_fsm_miss_addr to axi_adapter port size - logic [riscv::XLEN-1:0] miss_addr; + logic [CVA6Cfg.XLEN-1:0] miss_addr; assign miss_addr = req_fsm_miss_addr; axi_adapter #( .CVA6Cfg (CVA6Cfg), - .DATA_WIDTH (DCACHE_LINE_WIDTH), - .CACHELINE_BYTE_OFFSET(DCACHE_BYTE_OFFSET), + .DATA_WIDTH (CVA6Cfg.DCACHE_LINE_WIDTH), + .CACHELINE_BYTE_OFFSET(CVA6Cfg.DCACHE_OFFSET_WIDTH), .axi_req_t (axi_req_t), .axi_rsp_t (axi_rsp_t) ) i_miss_axi_adapter ( @@ -639,7 +661,7 @@ module miss_handler // Replacement LFSR // ----------------- lfsr_8bit #( - .WIDTH(DCACHE_SET_ASSOC) + .WIDTH(CVA6Cfg.DCACHE_SET_ASSOC) ) i_lfsr ( .en_i (lfsr_enable), .refill_way_oh (lfsr_oh), diff --git a/core/cache_subsystem/std_cache_subsystem.sv b/core/cache_subsystem/std_cache_subsystem.sv index 45ba8bd3c1..388490c882 100644 --- a/core/cache_subsystem/std_cache_subsystem.sv +++ b/core/cache_subsystem/std_cache_subsystem.sv @@ -20,6 +20,14 @@ module std_cache_subsystem import std_cache_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type icache_areq_t = logic, + parameter type icache_arsp_t = logic, + parameter type icache_dreq_t = logic, + parameter type icache_drsp_t = logic, + parameter type icache_req_t = logic, + parameter type icache_rtrn_t = logic, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic, parameter int unsigned NumPorts = 4, parameter type axi_ar_chan_t = logic, parameter type axi_aw_chan_t = logic, @@ -69,6 +77,12 @@ module std_cache_subsystem cva6_icache_axi_wrapper #( .CVA6Cfg (CVA6Cfg), + .icache_areq_t(icache_areq_t), + .icache_arsp_t(icache_arsp_t), + .icache_dreq_t(icache_dreq_t), + .icache_drsp_t(icache_drsp_t), + .icache_req_t(icache_req_t), + .icache_rtrn_t(icache_rtrn_t), .axi_req_t(axi_req_t), .axi_rsp_t(axi_rsp_t) ) i_cva6_icache_axi_wrapper ( @@ -93,6 +107,8 @@ module std_cache_subsystem // Port 3: Store Unit std_nbdcache #( .CVA6Cfg (CVA6Cfg), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t), .NumPorts (NumPorts), .axi_req_t(axi_req_t), .axi_rsp_t(axi_rsp_t) @@ -167,7 +183,8 @@ module std_cache_subsystem .DATA_WIDTH (2), // we can have a maximum of 4 oustanding transactions as each port is blocking .DEPTH (4), - .FALL_THROUGH(1'b1) + .FALL_THROUGH(1'b1), + .FPGA_EN(CVA6Cfg.FPGA_EN) ) i_fifo_w_channel ( .clk_i (clk_i), .rst_ni (rst_ni), diff --git a/core/cache_subsystem/std_nbdcache.sv b/core/cache_subsystem/std_nbdcache.sv index 367c67cb5d..c395c29be1 100644 --- a/core/cache_subsystem/std_nbdcache.sv +++ b/core/cache_subsystem/std_nbdcache.sv @@ -18,9 +18,22 @@ module std_nbdcache import ariane_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic, parameter int unsigned NumPorts = 4, parameter type axi_req_t = logic, - parameter type axi_rsp_t = logic + parameter type axi_rsp_t = logic, + parameter type cache_line_t = struct packed { + logic [CVA6Cfg.DCACHE_TAG_WIDTH-1:0] tag; // tag array + logic [CVA6Cfg.DCACHE_LINE_WIDTH-1:0] data; // data array + logic valid; // state array + logic dirty; // state array + }, + parameter type cl_be_t = struct packed { + logic [(CVA6Cfg.DCACHE_TAG_WIDTH+7)/8-1:0] tag; // byte enable into tag array + logic [(CVA6Cfg.DCACHE_LINE_WIDTH+7)/8-1:0] data; // byte enable into data array + logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] vldrty; // bit enable into state array (valid for a pair of dirty/valid bits) + } ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -42,7 +55,7 @@ module std_nbdcache input axi_rsp_t axi_bypass_i ); - import std_cache_pkg::*; + localparam int unsigned DCACHE_DIRTY_WIDTH = CVA6Cfg.DCACHE_SET_ASSOC * 2; // ------------------------------- // Controller <-> Arbiter @@ -52,16 +65,16 @@ module std_nbdcache // 3. Load Unit // 4. Accelerator // 5. Store unit - logic [ NumPorts:0][ DCACHE_SET_ASSOC-1:0] req; - logic [ NumPorts:0][DCACHE_INDEX_WIDTH-1:0] addr; + logic [ NumPorts:0][ CVA6Cfg.DCACHE_SET_ASSOC-1:0] req; + logic [ NumPorts:0][CVA6Cfg.DCACHE_INDEX_WIDTH-1:0] addr; logic [ NumPorts:0] gnt; - cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata; - logic [ NumPorts:0][ DCACHE_TAG_WIDTH-1:0] tag; + cache_line_t [ CVA6Cfg.DCACHE_SET_ASSOC-1:0] rdata; + logic [ NumPorts:0][ CVA6Cfg.DCACHE_TAG_WIDTH-1:0] tag; cache_line_t [ NumPorts:0] wdata; logic [ NumPorts:0] we; cl_be_t [ NumPorts:0] be; - logic [ DCACHE_SET_ASSOC-1:0] hit_way; + logic [ CVA6Cfg.DCACHE_SET_ASSOC-1:0] hit_way; // ------------------------------- // Controller <-> Miss unit // ------------------------------- @@ -82,11 +95,11 @@ module std_nbdcache // ------------------------------- // Arbiter <-> Datram, // ------------------------------- - logic [ DCACHE_SET_ASSOC-1:0] req_ram; - logic [DCACHE_INDEX_WIDTH-1:0] addr_ram; + logic [ CVA6Cfg.DCACHE_SET_ASSOC-1:0] req_ram; + logic [CVA6Cfg.DCACHE_INDEX_WIDTH-1:0] addr_ram; logic we_ram; cache_line_t wdata_ram; - cache_line_t [ DCACHE_SET_ASSOC-1:0] rdata_ram; + cache_line_t [ CVA6Cfg.DCACHE_SET_ASSOC-1:0] rdata_ram; cl_be_t be_ram; // ------------------ @@ -95,7 +108,11 @@ module std_nbdcache generate for (genvar i = 0; i < NumPorts; i++) begin : master_ports cache_ctrl #( - .CVA6Cfg(CVA6Cfg) + .CVA6Cfg(CVA6Cfg), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t), + .cache_line_t(cache_line_t), + .cl_be_t(cl_be_t) ) i_cache_ctrl ( .bypass_i (~enable_i), .busy_o (busy[i]), @@ -137,7 +154,9 @@ module std_nbdcache .CVA6Cfg (CVA6Cfg), .NR_PORTS (NumPorts), .axi_req_t(axi_req_t), - .axi_rsp_t(axi_rsp_t) + .axi_rsp_t(axi_rsp_t), + .cache_line_t(cache_line_t), + .cl_be_t(cl_be_t) ) i_miss_handler ( .flush_i (flush_i), .busy_i (|busy), @@ -173,15 +192,15 @@ module std_nbdcache // -------------- // Memory Arrays // -------------- - for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : sram_block + for (genvar i = 0; i < CVA6Cfg.DCACHE_SET_ASSOC; i++) begin : sram_block sram #( - .DATA_WIDTH(DCACHE_LINE_WIDTH), - .NUM_WORDS (DCACHE_NUM_WORDS) + .DATA_WIDTH(CVA6Cfg.DCACHE_LINE_WIDTH), + .NUM_WORDS (CVA6Cfg.DCACHE_NUM_WORDS) ) data_sram ( .req_i (req_ram[i]), .rst_ni (rst_ni), .we_i (we_ram), - .addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]), + .addr_i (addr_ram[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH]), .wuser_i('0), .wdata_i(wdata_ram.data), .be_i (be_ram.data), @@ -191,13 +210,13 @@ module std_nbdcache ); sram #( - .DATA_WIDTH(DCACHE_TAG_WIDTH), - .NUM_WORDS (DCACHE_NUM_WORDS) + .DATA_WIDTH(CVA6Cfg.DCACHE_TAG_WIDTH), + .NUM_WORDS (CVA6Cfg.DCACHE_NUM_WORDS) ) tag_sram ( .req_i (req_ram[i]), .rst_ni (rst_ni), .we_i (we_ram), - .addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]), + .addr_i (addr_ram[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH]), .wuser_i('0), .wdata_i(wdata_ram.tag), .be_i (be_ram.tag), @@ -217,7 +236,7 @@ module std_nbdcache // you can use it here to save the extra 4x overhead introduced by this workaround. logic [4*DCACHE_DIRTY_WIDTH-1:0] dirty_wdata, dirty_rdata; - for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin + for (genvar i = 0; i < CVA6Cfg.DCACHE_SET_ASSOC; i++) begin assign dirty_wdata[8*i] = wdata_ram.dirty; assign dirty_wdata[8*i+1] = wdata_ram.valid; assign rdata_ram[i].dirty = dirty_rdata[8*i]; @@ -227,13 +246,13 @@ module std_nbdcache sram #( .USER_WIDTH(1), .DATA_WIDTH(4 * DCACHE_DIRTY_WIDTH), - .NUM_WORDS (DCACHE_NUM_WORDS) + .NUM_WORDS (CVA6Cfg.DCACHE_NUM_WORDS) ) valid_dirty_sram ( .clk_i (clk_i), .rst_ni (rst_ni), .req_i (|req_ram), .we_i (we_ram), - .addr_i (addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET]), + .addr_i (addr_ram[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH]), .wuser_i('0), .wdata_i(dirty_wdata), .be_i (be_ram.vldrty), @@ -247,8 +266,9 @@ module std_nbdcache tag_cmp #( .CVA6Cfg (CVA6Cfg), .NR_PORTS (NumPorts + 1), - .ADDR_WIDTH (DCACHE_INDEX_WIDTH), - .DCACHE_SET_ASSOC(DCACHE_SET_ASSOC) + .ADDR_WIDTH (CVA6Cfg.DCACHE_INDEX_WIDTH), + .l_data_t (cache_line_t), + .l_be_t (cl_be_t) ) i_tag_cmp ( .req_i (req), .gnt_o (gnt), @@ -272,7 +292,7 @@ module std_nbdcache //pragma translate_off initial begin - assert (DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth inside {2, 4, 8, 16}) + assert (CVA6Cfg.DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth inside {2, 4, 8, 16}) else $fatal(1, "Cache line size needs to be a power of two multiple of AxiDataWidth"); end //pragma translate_on diff --git a/core/cache_subsystem/tag_cmp.sv b/core/cache_subsystem/tag_cmp.sv index a378c13b11..0488557a86 100644 --- a/core/cache_subsystem/tag_cmp.sv +++ b/core/cache_subsystem/tag_cmp.sv @@ -19,43 +19,42 @@ module tag_cmp #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter int unsigned NR_PORTS = 3, parameter int unsigned ADDR_WIDTH = 64, - parameter type l_data_t = std_cache_pkg::cache_line_t, - parameter type l_be_t = std_cache_pkg::cl_be_t, - parameter int unsigned DCACHE_SET_ASSOC = 8 + parameter type l_data_t = logic, + parameter type l_be_t = logic ) ( input logic clk_i, input logic rst_ni, - input logic [NR_PORTS-1:0][DCACHE_SET_ASSOC-1:0] req_i, + input logic [NR_PORTS-1:0][CVA6Cfg.DCACHE_SET_ASSOC-1:0] req_i, output logic [NR_PORTS-1:0] gnt_o, input logic [NR_PORTS-1:0][ADDR_WIDTH-1:0] addr_i, input l_data_t [NR_PORTS-1:0] wdata_i, input logic [NR_PORTS-1:0] we_i, input l_be_t [NR_PORTS-1:0] be_i, - output l_data_t [DCACHE_SET_ASSOC-1:0] rdata_o, - input logic [NR_PORTS-1:0][ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later - output logic [DCACHE_SET_ASSOC-1:0] hit_way_o, // we've got a hit on the corresponding way + output l_data_t [CVA6Cfg.DCACHE_SET_ASSOC-1:0] rdata_o, + input logic [NR_PORTS-1:0][CVA6Cfg.DCACHE_TAG_WIDTH-1:0] tag_i, // tag in - comes one cycle later + output logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] hit_way_o, // we've got a hit on the corresponding way - output logic [DCACHE_SET_ASSOC-1:0] req_o, + output logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] req_o, output logic [ ADDR_WIDTH-1:0] addr_o, output l_data_t wdata_o, output logic we_o, output l_be_t be_o, - input l_data_t [DCACHE_SET_ASSOC-1:0] rdata_i + input l_data_t [CVA6Cfg.DCACHE_SET_ASSOC-1:0] rdata_i ); assign rdata_o = rdata_i; // one hot encoded logic [NR_PORTS-1:0] id_d, id_q; - logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] sel_tag; + logic [CVA6Cfg.DCACHE_TAG_WIDTH-1:0] sel_tag; always_comb begin : tag_sel sel_tag = '0; for (int unsigned i = 0; i < NR_PORTS; i++) if (id_q[i]) sel_tag = tag_i[i]; end - for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : tag_cmp + for (genvar j = 0; j < CVA6Cfg.DCACHE_SET_ASSOC; j++) begin : tag_cmp assign hit_way_o[j] = (sel_tag == rdata_i[j].tag) ? rdata_i[j].valid : 1'b0; end diff --git a/core/cache_subsystem/wt_axi_adapter.sv b/core/cache_subsystem/wt_axi_adapter.sv index fce1e447f5..82a8cd860a 100644 --- a/core/cache_subsystem/wt_axi_adapter.sv +++ b/core/cache_subsystem/wt_axi_adapter.sv @@ -20,9 +20,14 @@ module wt_axi_adapter #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter int unsigned ReqFifoDepth = 2, - parameter int unsigned MetaFifoDepth = wt_cache_pkg::DCACHE_MAX_TX, + parameter int unsigned MetaFifoDepth = CVA6Cfg.DCACHE_MAX_TX, parameter type axi_req_t = logic, - parameter type axi_rsp_t = logic + parameter type axi_rsp_t = logic, + parameter type icache_req_t = logic, + parameter type icache_rtrn_t = logic, + parameter type dcache_req_t = logic, + parameter type dcache_rtrn_t = logic, + parameter type dcache_inval_t = logic ) ( input logic clk_i, input logic rst_ni, @@ -54,8 +59,8 @@ module wt_axi_adapter ); // support up to 512bit cache lines - localparam AxiNumWords = (ariane_pkg::ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ariane_pkg::ICACHE_LINE_WIDTH > ariane_pkg::DCACHE_LINE_WIDTH) + - (ariane_pkg::DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (ariane_pkg::ICACHE_LINE_WIDTH <= ariane_pkg::DCACHE_LINE_WIDTH) ; + localparam AxiNumWords = (CVA6Cfg.ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (CVA6Cfg.ICACHE_LINE_WIDTH > CVA6Cfg.DCACHE_LINE_WIDTH) + + (CVA6Cfg.DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth) * (CVA6Cfg.ICACHE_LINE_WIDTH <= CVA6Cfg.DCACHE_LINE_WIDTH) ; /////////////////////////////////////////////////////// @@ -90,9 +95,9 @@ module wt_axi_adapter // AMO generates r beat logic amo_gen_r_d, amo_gen_r_q; - logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] icache_rtrn_tid_d, icache_rtrn_tid_q; - logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] dcache_rtrn_tid_d, dcache_rtrn_tid_q; - logic [wt_cache_pkg::CACHE_ID_WIDTH-1:0] dcache_rtrn_rd_tid, dcache_rtrn_wr_tid; + logic [CVA6Cfg.MEM_TID_WIDTH-1:0] icache_rtrn_tid_d, icache_rtrn_tid_q; + logic [CVA6Cfg.MEM_TID_WIDTH-1:0] dcache_rtrn_tid_d, dcache_rtrn_tid_q; + logic [CVA6Cfg.MEM_TID_WIDTH-1:0] dcache_rtrn_rd_tid, dcache_rtrn_wr_tid; logic dcache_rd_pop, dcache_wr_pop; logic icache_rd_full, icache_rd_empty; logic dcache_rd_full, dcache_rd_empty; @@ -131,7 +136,7 @@ module wt_axi_adapter always_comb begin : p_axi_req // write channel axi_wr_id_in = arb_idx; - axi_wr_data = {(CVA6Cfg.AxiDataWidth/riscv::XLEN){dcache_data.data}}; + axi_wr_data = {(CVA6Cfg.AxiDataWidth/CVA6Cfg.XLEN){dcache_data.data}}; axi_wr_user = dcache_data.user; // Cast to AXI address width axi_wr_addr = dcache_data.paddr; @@ -163,7 +168,7 @@ module wt_axi_adapter // If dcache_data.size MSB is set, we want to read as much as possible axi_rd_size = dcache_data.size[2] ? $clog2(CVA6Cfg.AxiDataWidth / 8) : dcache_data.size; if (dcache_data.size[2]) begin - axi_rd_blen = ariane_pkg::DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1; + axi_rd_blen = CVA6Cfg.DCACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1; end end else begin // Cast to AXI address width @@ -171,7 +176,7 @@ module wt_axi_adapter axi_rd_size = $clog2(CVA6Cfg.AxiDataWidth / 8); // always request max number of words in case of ifill if (!icache_data.nc) begin - axi_rd_blen = ariane_pkg::ICACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1; + axi_rd_blen = CVA6Cfg.ICACHE_LINE_WIDTH / CVA6Cfg.AxiDataWidth - 1; end end @@ -249,8 +254,8 @@ module wt_axi_adapter AMO_ADD: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_ADD}; AMO_AND: begin // in this case we need to invert the data to get a "CLR" - axi_wr_data = ~{(CVA6Cfg.AxiDataWidth/riscv::XLEN){dcache_data.data}}; - axi_wr_user = ~{(CVA6Cfg.AxiDataWidth/riscv::XLEN){dcache_data.user}}; + axi_wr_data = ~{(CVA6Cfg.AxiDataWidth/CVA6Cfg.XLEN){dcache_data.data}}; + axi_wr_user = ~{(CVA6Cfg.AxiDataWidth/CVA6Cfg.XLEN){dcache_data.user}}; axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_CLR}; end AMO_OR: axi_wr_atop = {axi_pkg::ATOP_ATOMICLOAD, axi_pkg::ATOP_LITTLE_END, axi_pkg::ATOP_SET}; @@ -272,7 +277,8 @@ module wt_axi_adapter fifo_v3 #( .dtype(icache_req_t), - .DEPTH(ReqFifoDepth) + .DEPTH(ReqFifoDepth), + .FPGA_EN(CVA6Cfg.FPGA_EN) ) i_icache_data_fifo ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -289,7 +295,8 @@ module wt_axi_adapter fifo_v3 #( .dtype(dcache_req_t), - .DEPTH(ReqFifoDepth) + .DEPTH(ReqFifoDepth), + .FPGA_EN(CVA6Cfg.FPGA_EN) ) i_dcache_data_fifo ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -312,8 +319,9 @@ module wt_axi_adapter logic icache_rtrn_vld_d, icache_rtrn_vld_q, dcache_rtrn_vld_d, dcache_rtrn_vld_q; fifo_v3 #( - .DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH), - .DEPTH (MetaFifoDepth) + .DATA_WIDTH(CVA6Cfg.MEM_TID_WIDTH), + .DEPTH (MetaFifoDepth), + .FPGA_EN(CVA6Cfg.FPGA_EN) ) i_rd_icache_id ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -329,8 +337,9 @@ module wt_axi_adapter ); fifo_v3 #( - .DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH), - .DEPTH (MetaFifoDepth) + .DATA_WIDTH(CVA6Cfg.MEM_TID_WIDTH), + .DEPTH (MetaFifoDepth), + .FPGA_EN(CVA6Cfg.FPGA_EN) ) i_rd_dcache_id ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -346,8 +355,9 @@ module wt_axi_adapter ); fifo_v3 #( - .DATA_WIDTH(wt_cache_pkg::CACHE_ID_WIDTH), - .DEPTH (MetaFifoDepth) + .DATA_WIDTH(CVA6Cfg.MEM_TID_WIDTH), + .DEPTH (MetaFifoDepth), + .FPGA_EN(CVA6Cfg.FPGA_EN) ) i_wr_dcache_id ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -377,7 +387,8 @@ module wt_axi_adapter fifo_v3 #( .DATA_WIDTH (CVA6Cfg.AxiIdWidth + 1), .DEPTH (MetaFifoDepth), - .FALL_THROUGH(1'b1) + .FALL_THROUGH(1'b1), + .FPGA_EN(CVA6Cfg.FPGA_EN) ) i_b_fifo ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -394,16 +405,16 @@ module wt_axi_adapter // buffer read responses in shift regs logic icache_first_d, icache_first_q, dcache_first_d, dcache_first_q; - logic [ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0] + logic [CVA6Cfg.ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0] icache_rd_shift_user_d, icache_rd_shift_user_q; - logic [DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0] + logic [CVA6Cfg.DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:0][CVA6Cfg.AxiUserWidth-1:0] dcache_rd_shift_user_d, dcache_rd_shift_user_q; - logic [ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] + logic [CVA6Cfg.ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] icache_rd_shift_d, icache_rd_shift_q; - logic [DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] + logic [CVA6Cfg.DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:0][CVA6Cfg.AxiDataWidth-1:0] dcache_rd_shift_d, dcache_rd_shift_q; wt_cache_pkg::dcache_in_t dcache_rtrn_type_d, dcache_rtrn_type_q; - wt_cache_pkg::dcache_inval_t dcache_rtrn_inv_d, dcache_rtrn_inv_q; + dcache_inval_t dcache_rtrn_inv_d, dcache_rtrn_inv_q; logic dcache_sc_rtrn, axi_rd_last; always_comb begin : p_axi_rtrn_shift @@ -433,15 +444,15 @@ module wt_axi_adapter if (icache_rtrn_rd_en) begin icache_first_d = axi_rd_last; - if (ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin + if (CVA6Cfg.ICACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin icache_rd_shift_d = axi_rd_data; end else begin icache_rd_shift_d = { - axi_rd_data, icache_rd_shift_q[ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1] + axi_rd_data, icache_rd_shift_q[CVA6Cfg.ICACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1] }; end icache_rd_shift_user_d = { - axi_rd_user, icache_rd_shift_user_q[ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1] + axi_rd_user, icache_rd_shift_user_q[CVA6Cfg.ICACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1] }; // if this is a single word transaction, we need to make sure that word is placed at offset 0 if (icache_first_q) begin @@ -452,15 +463,15 @@ module wt_axi_adapter if (dcache_rtrn_rd_en) begin dcache_first_d = axi_rd_last; - if (DCACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin + if (CVA6Cfg.DCACHE_LINE_WIDTH == CVA6Cfg.AxiDataWidth) begin dcache_rd_shift_d = axi_rd_data; end else begin dcache_rd_shift_d = { - axi_rd_data, dcache_rd_shift_q[DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1] + axi_rd_data, dcache_rd_shift_q[CVA6Cfg.DCACHE_LINE_WIDTH/CVA6Cfg.AxiDataWidth-1:1] }; end dcache_rd_shift_user_d = { - axi_rd_user, dcache_rd_shift_user_q[DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1] + axi_rd_user, dcache_rd_shift_user_q[CVA6Cfg.DCACHE_USER_LINE_WIDTH/CVA6Cfg.AxiUserWidth-1:1] }; // if this is a single word transaction, we need to make sure that word is placed at offset 0 if (dcache_first_q) begin @@ -509,7 +520,7 @@ module wt_axi_adapter dcache_rtrn_type_d = wt_cache_pkg::DCACHE_INV_REQ; dcache_rtrn_vld_d = 1'b1; dcache_rtrn_inv_d.all = 1'b1; - dcache_rtrn_inv_d.idx = inval_addr_i[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; + dcache_rtrn_inv_d.idx = inval_addr_i[CVA6Cfg.DCACHE_INDEX_WIDTH-1:0]; ////////////////////////////////////// // dcache needs some special treatment // for arbitration and decoding of atomics @@ -523,7 +534,7 @@ module wt_axi_adapter dcache_rtrn_vld_d = 1'b1; dcache_rtrn_inv_d.all = 1'b1; - dcache_rtrn_inv_d.idx = dcache_data.paddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; + dcache_rtrn_inv_d.idx = dcache_data.paddr[CVA6Cfg.DCACHE_INDEX_WIDTH-1:0]; ////////////////////////////////////// // read responses // note that in case of atomics, the dcache sequentializes requests and diff --git a/core/cache_subsystem/wt_cache_subsystem.sv b/core/cache_subsystem/wt_cache_subsystem.sv index ec094671c4..48ef0ea60e 100644 --- a/core/cache_subsystem/wt_cache_subsystem.sv +++ b/core/cache_subsystem/wt_cache_subsystem.sv @@ -24,9 +24,45 @@ module wt_cache_subsystem import wt_cache_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type icache_areq_t = logic, + parameter type icache_arsp_t = logic, + parameter type icache_dreq_t = logic, + parameter type icache_drsp_t = logic, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic, + parameter type icache_req_t = logic, + parameter type icache_rtrn_t = logic, parameter int unsigned NumPorts = 4, parameter type noc_req_t = logic, - parameter type noc_resp_t = logic + parameter type noc_resp_t = logic, + + // dcache interface + localparam type dcache_inval_t = struct packed { + logic vld; // invalidate only affected way + logic all; // invalidate all ways + logic [CVA6Cfg.DCACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate + logic [CVA6Cfg.DCACHE_SET_ASSOC_WIDTH-1:0] way; // way to invalidate + }, + + localparam type dcache_req_t = struct packed { + dcache_out_t rtype; // see definitions above + logic [2:0] size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte) + logic [CVA6Cfg.DCACHE_SET_ASSOC_WIDTH-1:0] way; // way to replace + logic [CVA6Cfg.PLEN-1:0] paddr; // physical address + logic [CVA6Cfg.XLEN-1:0] data; // word width of processor (no block stores at the moment) + logic [CVA6Cfg.DATA_USER_WIDTH-1:0] user; // user width of processor (no block stores at the moment) + logic nc; // noncacheable + logic [CVA6Cfg.MEM_TID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) + ariane_pkg::amo_t amo_op; // amo opcode + }, + + localparam type dcache_rtrn_t = struct packed { + dcache_in_t rtype; // see definitions above + logic [CVA6Cfg.DCACHE_LINE_WIDTH-1:0] data; // full cache line width + logic [CVA6Cfg.DCACHE_USER_LINE_WIDTH-1:0] user; // user bits + dcache_inval_t inv; // invalidation vector + logic [CVA6Cfg.MEM_TID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) + } ) ( input logic clk_i, input logic rst_ni, @@ -47,7 +83,7 @@ module wt_cache_subsystem output logic dcache_flush_ack_o, // send a single cycle acknowledge signal when the cache is flushed output logic dcache_miss_o, // we missed on a ld/st // For Performance Counter - output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, + output logic [NumPorts-1:0][CVA6Cfg.DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // AMO interface input amo_req_t dcache_amo_req_i, output amo_resp_t dcache_amo_resp_o, @@ -68,17 +104,23 @@ module wt_cache_subsystem ); logic icache_adapter_data_req, adapter_icache_data_ack, adapter_icache_rtrn_vld; - wt_cache_pkg::icache_req_t icache_adapter; - wt_cache_pkg::icache_rtrn_t adapter_icache; + icache_req_t icache_adapter; + icache_rtrn_t adapter_icache; logic dcache_adapter_data_req, adapter_dcache_data_ack, adapter_dcache_rtrn_vld; - wt_cache_pkg::dcache_req_t dcache_adapter; - wt_cache_pkg::dcache_rtrn_t adapter_dcache; + dcache_req_t dcache_adapter; + dcache_rtrn_t adapter_dcache; cva6_icache #( // use ID 0 for icache reads .CVA6Cfg(CVA6Cfg), + .icache_areq_t(icache_areq_t), + .icache_arsp_t(icache_arsp_t), + .icache_dreq_t(icache_dreq_t), + .icache_drsp_t(icache_drsp_t), + .icache_req_t(icache_req_t), + .icache_rtrn_t(icache_rtrn_t), .RdTxId (0) ) i_cva6_icache ( .clk_i (clk_i), @@ -104,6 +146,10 @@ module wt_cache_subsystem // Port 2 is write only and goes into the merging write buffer wt_dcache #( .CVA6Cfg (CVA6Cfg), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t), + .dcache_req_t(dcache_req_t), + .dcache_rtrn_t(dcache_rtrn_t), // use ID 1 for dcache reads and amos. note that the writebuffer // uses all IDs up to DCACHE_MAX_TX-1 for write transactions. .RdAmoTxId(1) @@ -137,6 +183,10 @@ module wt_cache_subsystem `ifdef PITON_ARIANE wt_l15_adapter #( .CVA6Cfg(CVA6Cfg), + .icache_req_t(icache_req_t), + .icache_rtrn_t(icache_rtrn_t), + .dcache_req_t(dcache_req_t), + .dcache_rtrn_t(dcache_rtrn_t) ) i_adapter ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -157,7 +207,12 @@ module wt_cache_subsystem wt_axi_adapter #( .CVA6Cfg (CVA6Cfg), .axi_req_t(noc_req_t), - .axi_rsp_t(noc_resp_t) + .axi_rsp_t(noc_resp_t), + .icache_req_t(icache_req_t), + .icache_rtrn_t(icache_rtrn_t), + .dcache_req_t(dcache_req_t), + .dcache_rtrn_t(dcache_rtrn_t), + .dcache_inval_t(dcache_inval_t) ) i_adapter ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -196,7 +251,7 @@ module wt_cache_subsystem icache_dreq_o.data ); - for (genvar j = 0; j < riscv::XLEN / 8; j++) begin : gen_invalid_write_assertion + for (genvar j = 0; j < CVA6Cfg.XLEN / 8; j++) begin : gen_invalid_write_assertion a_invalid_write_data : assert property ( @(posedge clk_i) disable iff (!rst_ni) dcache_req_ports_i[NumPorts-1].data_req |-> dcache_req_ports_i[NumPorts-1].data_be[j] |-> (|dcache_req_ports_i[NumPorts-1].data_wdata[j*8+:8] !== 1'hX)) diff --git a/core/cache_subsystem/wt_dcache.sv b/core/cache_subsystem/wt_dcache.sv index fd4aa62b80..97806b7922 100644 --- a/core/cache_subsystem/wt_dcache.sv +++ b/core/cache_subsystem/wt_dcache.sv @@ -18,10 +18,24 @@ module wt_dcache import wt_cache_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic, + parameter type dcache_req_t = logic, + parameter type dcache_rtrn_t = logic, parameter int unsigned NumPorts = 4, // number of miss ports + localparam type wbuffer_t = struct packed { + logic [CVA6Cfg.DCACHE_TAG_WIDTH+(CVA6Cfg.DCACHE_INDEX_WIDTH-CVA6Cfg.XLEN_ALIGN_BYTES)-1:0] wtag; + logic [CVA6Cfg.XLEN-1:0] data; + logic [CVA6Cfg.DCACHE_USER_WIDTH-1:0] user; + logic [(CVA6Cfg.XLEN/8)-1:0] dirty; // byte is dirty + logic [(CVA6Cfg.XLEN/8)-1:0] valid; // byte is valid + logic [(CVA6Cfg.XLEN/8)-1:0] txblock; // byte is part of transaction in-flight + logic checked; // if cache state of this word has been checked + logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] hit_oh; // valid way in the cache + }, // ID to be used for read and AMO transactions. // note that the write buffer uses all IDs up to DCACHE_MAX_TX-1 for write transactions - parameter logic [CACHE_ID_WIDTH-1:0] RdAmoTxId = 1 + parameter logic [CVA6Cfg.MEM_TID_WIDTH-1:0] RdAmoTxId = 1 ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -42,7 +56,7 @@ module wt_dcache input dcache_req_i_t [NumPorts-1:0] req_ports_i, output dcache_req_o_t [NumPorts-1:0] req_ports_o, - output logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, + output logic [NumPorts-1:0][CVA6Cfg.DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, input logic mem_rtrn_vld_i, input dcache_rtrn_t mem_rtrn_i, @@ -51,61 +65,63 @@ module wt_dcache output dcache_req_t mem_data_o ); + localparam DCACHE_CL_IDX_WIDTH = $clog2(CVA6Cfg.DCACHE_NUM_WORDS); // excluding byte offset + // miss unit <-> read controllers - logic cache_en; + logic cache_en; // miss unit <-> memory - logic wr_cl_vld; - logic wr_cl_nc; - logic [ DCACHE_SET_ASSOC-1:0] wr_cl_we; - logic [ DCACHE_TAG_WIDTH-1:0] wr_cl_tag; - logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx; - logic [ DCACHE_OFFSET_WIDTH-1:0] wr_cl_off; - logic [ DCACHE_LINE_WIDTH-1:0] wr_cl_data; - logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user; - logic [ DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be; - logic [ DCACHE_SET_ASSOC-1:0] wr_vld_bits; - logic [ DCACHE_SET_ASSOC-1:0] wr_req; - logic wr_ack; - logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_idx; - logic [ DCACHE_OFFSET_WIDTH-1:0] wr_off; - riscv::xlen_t wr_data; - logic [ (riscv::XLEN/8)-1:0] wr_data_be; - logic [ DCACHE_USER_WIDTH-1:0] wr_user; + logic wr_cl_vld; + logic wr_cl_nc; + logic [ CVA6Cfg.DCACHE_SET_ASSOC-1:0] wr_cl_we; + logic [ CVA6Cfg.DCACHE_TAG_WIDTH-1:0] wr_cl_tag; + logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx; + logic [ CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0] wr_cl_off; + logic [ CVA6Cfg.DCACHE_LINE_WIDTH-1:0] wr_cl_data; + logic [CVA6Cfg.DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user; + logic [ CVA6Cfg.DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be; + logic [ CVA6Cfg.DCACHE_SET_ASSOC-1:0] wr_vld_bits; + logic [ CVA6Cfg.DCACHE_SET_ASSOC-1:0] wr_req; + logic wr_ack; + logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_idx; + logic [ CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0] wr_off; + logic [CVA6Cfg.XLEN-1:0] wr_data; + logic [ (CVA6Cfg.XLEN/8)-1:0] wr_data_be; + logic [ CVA6Cfg.DCACHE_USER_WIDTH-1:0] wr_user; // miss unit <-> controllers/wbuffer - logic [ NumPorts-1:0] miss_req; - logic [ NumPorts-1:0] miss_ack; - logic [ NumPorts-1:0] miss_nc; - logic [ NumPorts-1:0] miss_we; - logic [ NumPorts-1:0][ riscv::XLEN-1:0] miss_wdata; - logic [ NumPorts-1:0][ DCACHE_USER_WIDTH-1:0] miss_wuser; - logic [ NumPorts-1:0][ riscv::PLEN-1:0] miss_paddr; - logic [ NumPorts-1:0][ 2:0] miss_size; - logic [ NumPorts-1:0][ CACHE_ID_WIDTH-1:0] miss_id; - logic [ NumPorts-1:0] miss_replay; - logic [ NumPorts-1:0] miss_rtrn_vld; - logic [ CACHE_ID_WIDTH-1:0] miss_rtrn_id; + logic [ NumPorts-1:0] miss_req; + logic [ NumPorts-1:0] miss_ack; + logic [ NumPorts-1:0] miss_nc; + logic [ NumPorts-1:0] miss_we; + logic [ NumPorts-1:0][ CVA6Cfg.XLEN-1:0] miss_wdata; + logic [ NumPorts-1:0][ CVA6Cfg.DCACHE_USER_WIDTH-1:0] miss_wuser; + logic [ NumPorts-1:0][ CVA6Cfg.PLEN-1:0] miss_paddr; + logic [ NumPorts-1:0][ 2:0] miss_size; + logic [ NumPorts-1:0][ CVA6Cfg.MEM_TID_WIDTH-1:0] miss_id; + logic [ NumPorts-1:0] miss_replay; + logic [ NumPorts-1:0] miss_rtrn_vld; + logic [ CVA6Cfg.MEM_TID_WIDTH-1:0] miss_rtrn_id; // memory <-> read controllers/miss unit - logic [ NumPorts-1:0] rd_prio; - logic [ NumPorts-1:0] rd_tag_only; - logic [ NumPorts-1:0] rd_req; - logic [ NumPorts-1:0] rd_ack; - logic [ NumPorts-1:0][ DCACHE_TAG_WIDTH-1:0] rd_tag; - logic [ NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx; - logic [ NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off; - riscv::xlen_t rd_data; - logic [ DCACHE_USER_WIDTH-1:0] rd_user; - logic [ DCACHE_SET_ASSOC-1:0] rd_vld_bits; - logic [ DCACHE_SET_ASSOC-1:0] rd_hit_oh; + logic [ NumPorts-1:0] rd_prio; + logic [ NumPorts-1:0] rd_tag_only; + logic [ NumPorts-1:0] rd_req; + logic [ NumPorts-1:0] rd_ack; + logic [ NumPorts-1:0][ CVA6Cfg.DCACHE_TAG_WIDTH-1:0] rd_tag; + logic [ NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx; + logic [ NumPorts-1:0][CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0] rd_off; + logic [CVA6Cfg.XLEN-1:0] rd_data; + logic [ CVA6Cfg.DCACHE_USER_WIDTH-1:0] rd_user; + logic [ CVA6Cfg.DCACHE_SET_ASSOC-1:0] rd_vld_bits; + logic [ CVA6Cfg.DCACHE_SET_ASSOC-1:0] rd_hit_oh; // miss unit <-> wbuffer - logic [ DCACHE_MAX_TX-1:0][ riscv::PLEN-1:0] tx_paddr; - logic [ DCACHE_MAX_TX-1:0] tx_vld; + logic [ CVA6Cfg.DCACHE_MAX_TX-1:0][ CVA6Cfg.PLEN-1:0] tx_paddr; + logic [ CVA6Cfg.DCACHE_MAX_TX-1:0] tx_vld; // wbuffer <-> memory - wbuffer_t [ DCACHE_WBUF_DEPTH-1:0] wbuffer_data; + wbuffer_t [ DCACHE_WBUF_DEPTH-1:0] wbuffer_data; /////////////////////////////////////////////////////// @@ -115,7 +131,10 @@ module wt_dcache wt_dcache_missunit #( .CVA6Cfg (CVA6Cfg), .AmoTxId (RdAmoTxId), - .NumPorts(NumPorts) + .NumPorts(NumPorts), + .dcache_req_t(dcache_req_t), + .dcache_rtrn_t(dcache_rtrn_t), + .DCACHE_CL_IDX_WIDTH(DCACHE_CL_IDX_WIDTH) ) i_wt_dcache_missunit ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -175,6 +194,9 @@ module wt_dcache assign rd_prio[k] = 1'b1; wt_dcache_ctrl #( .CVA6Cfg(CVA6Cfg), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t), + .DCACHE_CL_IDX_WIDTH(DCACHE_CL_IDX_WIDTH), .RdTxId (RdAmoTxId) ) i_wt_dcache_ctrl ( .clk_i (clk_i), @@ -215,16 +237,16 @@ module wt_dcache assign req_ports_o[k] = '0; assign miss_req[k] = 1'b0; assign miss_we[k] = 1'b0; - assign miss_wdata[k] = {{riscv::XLEN}{1'b0}}; - assign miss_wuser[k] = {{DCACHE_USER_WIDTH}{1'b0}}; - assign miss_vld_bits_o[k] = {{DCACHE_SET_ASSOC}{1'b0}}; - assign miss_paddr[k] = {{riscv::PLEN}{1'b0}}; + assign miss_wdata[k] = {{CVA6Cfg.XLEN}{1'b0}}; + assign miss_wuser[k] = {{CVA6Cfg.DCACHE_USER_WIDTH}{1'b0}}; + assign miss_vld_bits_o[k] = {{CVA6Cfg.DCACHE_SET_ASSOC}{1'b0}}; + assign miss_paddr[k] = {{CVA6Cfg.PLEN}{1'b0}}; assign miss_nc[k] = 1'b0; assign miss_size[k] = 3'b0; - assign miss_id[k] = {{CACHE_ID_WIDTH}{1'b0}}; - assign rd_tag[k] = {{DCACHE_TAG_WIDTH}{1'b0}}; + assign miss_id[k] = {{CVA6Cfg.MEM_TID_WIDTH}{1'b0}}; + assign rd_tag[k] = {{CVA6Cfg.DCACHE_TAG_WIDTH}{1'b0}}; assign rd_idx[k] = {{DCACHE_CL_IDX_WIDTH}{1'b0}}; - assign rd_off[k] = {{DCACHE_OFFSET_WIDTH}{1'b0}}; + assign rd_off[k] = {{CVA6Cfg.DCACHE_OFFSET_WIDTH}{1'b0}}; assign rd_req[k] = 1'b0; assign rd_tag_only[k] = 1'b0; end @@ -238,7 +260,11 @@ module wt_dcache assign rd_prio[NumPorts-1] = 1'b0; wt_dcache_wbuffer #( - .CVA6Cfg(CVA6Cfg) + .CVA6Cfg(CVA6Cfg), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t), + .wbuffer_t(wbuffer_t), + .DCACHE_CL_IDX_WIDTH(DCACHE_CL_IDX_WIDTH) ) i_wt_dcache_wbuffer ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -296,7 +322,9 @@ module wt_dcache wt_dcache_mem #( .CVA6Cfg (CVA6Cfg), - .NumPorts(NumPorts) + .NumPorts(NumPorts), + .wbuffer_t(wbuffer_t), + .DCACHE_CL_IDX_WIDTH(DCACHE_CL_IDX_WIDTH) ) i_wt_dcache_mem ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -351,7 +379,7 @@ module wt_dcache initial begin // assert wrong parameterizations - assert (DCACHE_INDEX_WIDTH <= 12) + assert (CVA6Cfg.DCACHE_INDEX_WIDTH <= 12) else $fatal(1, "[l1 dcache] cache index width can be maximum 12bit since VM uses 4kB pages"); end `endif diff --git a/core/cache_subsystem/wt_dcache_ctrl.sv b/core/cache_subsystem/wt_dcache_ctrl.sv index b5973df195..d0470fe58d 100644 --- a/core/cache_subsystem/wt_dcache_ctrl.sv +++ b/core/cache_subsystem/wt_dcache_ctrl.sv @@ -18,7 +18,10 @@ module wt_dcache_ctrl import wt_cache_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, - parameter logic [CACHE_ID_WIDTH-1:0] RdTxId = 1 + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic, + parameter int unsigned DCACHE_CL_IDX_WIDTH = 1, + parameter logic [CVA6Cfg.MEM_TID_WIDTH-1:0] RdTxId = 1 ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -30,28 +33,28 @@ module wt_dcache_ctrl output logic miss_req_o, input logic miss_ack_i, output logic miss_we_o, // unused (set to 0) - output riscv::xlen_t miss_wdata_o, // unused (set to 0) - output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o, // unused (set to 0) - output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // valid bits at the missed index - output logic [riscv::PLEN-1:0] miss_paddr_o, + output logic [CVA6Cfg.XLEN-1:0] miss_wdata_o, // unused (set to 0) + output logic [CVA6Cfg.DCACHE_USER_WIDTH-1:0] miss_wuser_o, // unused (set to 0) + output logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // valid bits at the missed index + output logic [CVA6Cfg.PLEN-1:0] miss_paddr_o, output logic miss_nc_o, // request to I/O space output logic [2:0] miss_size_o, // 00: 1byte, 01: 2byte, 10: 4byte, 11: 8byte, 111: cacheline - output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // set to constant ID + output logic [CVA6Cfg.MEM_TID_WIDTH-1:0] miss_id_o, // set to constant ID input logic miss_replay_i, // request collided with pending miss - have to replay the request input logic miss_rtrn_vld_i, // signals that the miss has been served, asserted in the same cycle as when the data returns from memory // used to detect readout mux collisions input logic wr_cl_vld_i, // cache memory interface - output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later + output logic [CVA6Cfg.DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o, - output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o, + output logic [CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0] rd_off_o, output logic rd_req_o, // read the word at offset off_i[:3] in all ways output logic rd_tag_only_o, // set to zero here input logic rd_ack_i, - input riscv::xlen_t rd_data_i, - input logic [DCACHE_USER_WIDTH-1:0] rd_user_i, - input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, - input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i + input logic [CVA6Cfg.XLEN-1:0] rd_data_i, + input logic [CVA6Cfg.DCACHE_USER_WIDTH-1:0] rd_user_i, + input logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, + input logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] rd_hit_oh_i ); // controller FSM @@ -67,11 +70,11 @@ module wt_dcache_ctrl } state_e; state_e state_d, state_q; - logic [DCACHE_TAG_WIDTH-1:0] address_tag_d, address_tag_q; + logic [CVA6Cfg.DCACHE_TAG_WIDTH-1:0] address_tag_d, address_tag_q; logic [DCACHE_CL_IDX_WIDTH-1:0] address_idx_d, address_idx_q; - logic [DCACHE_OFFSET_WIDTH-1:0] address_off_d, address_off_q; + logic [CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0] address_off_d, address_off_q; logic [DCACHE_TID_WIDTH-1:0] id_d, id_q; - logic [DCACHE_SET_ASSOC-1:0] vld_data_d, vld_data_q; + logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] vld_data_d, vld_data_q; logic save_tag, rd_req_d, rd_req_q, rd_ack_d, rd_ack_q; logic [1:0] data_size_d, data_size_q; @@ -82,8 +85,8 @@ module wt_dcache_ctrl // map address to tag/idx/offset and save assign vld_data_d = (rd_req_q) ? rd_vld_bits_i : vld_data_q; assign address_tag_d = (save_tag) ? req_port_i.address_tag : address_tag_q; - assign address_idx_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] : address_idx_q; - assign address_off_d = (req_port_o.data_gnt) ? req_port_i.address_index[DCACHE_OFFSET_WIDTH-1:0] : address_off_q; + assign address_idx_d = (req_port_o.data_gnt) ? req_port_i.address_index[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH] : address_idx_q; + assign address_off_d = (req_port_o.data_gnt) ? req_port_i.address_index[CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0] : address_off_q; assign id_d = (req_port_o.data_gnt) ? req_port_i.data_id : id_q; assign data_size_d = (req_port_o.data_gnt) ? req_port_i.data_size : data_size_q; assign rd_tag_o = address_tag_d; @@ -102,7 +105,7 @@ module wt_dcache_ctrl // noncacheable if request goes to I/O space, or if cache is disabled assign miss_nc_o = (~cache_en_i) | (~config_pkg::is_inside_cacheable_regions( CVA6Cfg, - {{{64-DCACHE_TAG_WIDTH-DCACHE_INDEX_WIDTH}{1'b0}}, address_tag_q, {DCACHE_INDEX_WIDTH{1'b0}}} + {{{64-CVA6Cfg.DCACHE_TAG_WIDTH-CVA6Cfg.DCACHE_INDEX_WIDTH}{1'b0}}, address_tag_q, {CVA6Cfg.DCACHE_INDEX_WIDTH{1'b0}}} )); @@ -289,7 +292,7 @@ module wt_dcache_ctrl initial begin // assert wrong parameterizations - assert (DCACHE_INDEX_WIDTH <= 12) + assert (CVA6Cfg.DCACHE_INDEX_WIDTH <= 12) else $fatal(1, "[l1 dcache ctrl] cache index width can be maximum 12bit since VM uses 4kB pages"); end diff --git a/core/cache_subsystem/wt_dcache_mem.sv b/core/cache_subsystem/wt_dcache_mem.sv index b2b41c3c73..ec9cf5a69c 100644 --- a/core/cache_subsystem/wt_dcache_mem.sv +++ b/core/cache_subsystem/wt_dcache_mem.sv @@ -31,49 +31,55 @@ module wt_dcache_mem import wt_cache_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, - parameter int unsigned NumPorts = 3 + parameter int unsigned NumPorts = 3, + parameter type wbuffer_t = logic, + parameter int unsigned DCACHE_CL_IDX_WIDTH = 1 ) ( input logic clk_i, input logic rst_ni, // ports - input logic [NumPorts-1:0][DCACHE_TAG_WIDTH-1:0] rd_tag_i, // tag in - comes one cycle later + input logic [NumPorts-1:0][CVA6Cfg.DCACHE_TAG_WIDTH-1:0] rd_tag_i, // tag in - comes one cycle later input logic [NumPorts-1:0][DCACHE_CL_IDX_WIDTH-1:0] rd_idx_i, - input logic [NumPorts-1:0][DCACHE_OFFSET_WIDTH-1:0] rd_off_i, + input logic [NumPorts-1:0][CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0] rd_off_i, input logic [NumPorts-1:0] rd_req_i, // read the word at offset off_i[:3] in all ways input logic [NumPorts-1:0] rd_tag_only_i, // only do a tag/valid lookup, no access to data arrays input logic [NumPorts-1:0] rd_prio_i, // 0: low prio, 1: high prio output logic [NumPorts-1:0] rd_ack_o, - output logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_o, - output logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_o, - output riscv::xlen_t rd_data_o, - output logic [DCACHE_USER_WIDTH-1:0] rd_user_o, + output logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] rd_vld_bits_o, + output logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] rd_hit_oh_o, + output logic [CVA6Cfg.XLEN-1:0] rd_data_o, + output logic [CVA6Cfg.DCACHE_USER_WIDTH-1:0] rd_user_o, // only available on port 0, uses address signals of port 0 input logic wr_cl_vld_i, input logic wr_cl_nc_i, // noncacheable access - input logic [ DCACHE_SET_ASSOC-1:0] wr_cl_we_i, // writes a full cacheline - input logic [ DCACHE_TAG_WIDTH-1:0] wr_cl_tag_i, + input logic [ CVA6Cfg.DCACHE_SET_ASSOC-1:0] wr_cl_we_i, // writes a full cacheline + input logic [ CVA6Cfg.DCACHE_TAG_WIDTH-1:0] wr_cl_tag_i, input logic [ DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i, - input logic [ DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_i, - input logic [ DCACHE_LINE_WIDTH-1:0] wr_cl_data_i, - input logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_i, - input logic [ DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_i, - input logic [ DCACHE_SET_ASSOC-1:0] wr_vld_bits_i, + input logic [ CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_i, + input logic [ CVA6Cfg.DCACHE_LINE_WIDTH-1:0] wr_cl_data_i, + input logic [CVA6Cfg.DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_i, + input logic [ CVA6Cfg.DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_i, + input logic [ CVA6Cfg.DCACHE_SET_ASSOC-1:0] wr_vld_bits_i, // separate port for single word write, no tag access - input logic [DCACHE_SET_ASSOC-1:0] wr_req_i, // write a single word to offset off_i[:3] + input logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] wr_req_i, // write a single word to offset off_i[:3] output logic wr_ack_o, input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_i, - input logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_i, - input riscv::xlen_t wr_data_i, - input logic [DCACHE_USER_WIDTH-1:0] wr_user_i, - input logic [(riscv::XLEN/8)-1:0] wr_data_be_i, + input logic [CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0] wr_off_i, + input logic [CVA6Cfg.XLEN-1:0] wr_data_i, + input logic [CVA6Cfg.DCACHE_USER_WIDTH-1:0] wr_user_i, + input logic [(CVA6Cfg.XLEN/8)-1:0] wr_data_be_i, // forwarded wbuffer input wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_i ); + // Calculated parameter + localparam DCACHE_NUM_BANKS = CVA6Cfg.DCACHE_LINE_WIDTH / CVA6Cfg.XLEN; + localparam DCACHE_NUM_BANKS_WIDTH = $clog2(DCACHE_NUM_BANKS); + // functions function automatic logic [DCACHE_NUM_BANKS-1:0] dcache_cl_bin2oh( input logic [DCACHE_NUM_BANKS_WIDTH-1:0] in); @@ -83,9 +89,9 @@ module wt_dcache_mem return out; endfunction - // number of bits needed to address AXI data. If AxiDataWidth equals XLEN this parameter + // number of bits needed to address AXI data. If AxiDataWidth equals CVA6Cfg.XLEN this parameter // is not needed. Therefore, increment it by one to avoid reverse range select during elaboration. - localparam AXI_OFFSET_WIDTH = CVA6Cfg.AxiDataWidth == riscv::XLEN ? $clog2( + localparam AXI_OFFSET_WIDTH = CVA6Cfg.AxiDataWidth == CVA6Cfg.XLEN ? $clog2( CVA6Cfg.AxiDataWidth / 8 ) + 1 : $clog2( CVA6Cfg.AxiDataWidth / 8 @@ -93,32 +99,32 @@ module wt_dcache_mem logic [DCACHE_NUM_BANKS-1:0] bank_req; logic [DCACHE_NUM_BANKS-1:0] bank_we; - logic [DCACHE_NUM_BANKS-1:0][ DCACHE_SET_ASSOC-1:0][(riscv::XLEN/8)-1:0] bank_be; + logic [DCACHE_NUM_BANKS-1:0][ CVA6Cfg.DCACHE_SET_ASSOC-1:0][(CVA6Cfg.XLEN/8)-1:0] bank_be; logic [DCACHE_NUM_BANKS-1:0][DCACHE_CL_IDX_WIDTH-1:0] bank_idx; logic [DCACHE_CL_IDX_WIDTH-1:0] bank_idx_d, bank_idx_q; - logic [DCACHE_OFFSET_WIDTH-1:0] bank_off_d, bank_off_q; + logic [CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0] bank_off_d, bank_off_q; - logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_wdata; // - logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] bank_rdata; // - logic [DCACHE_SET_ASSOC-1:0][riscv::XLEN-1:0] rdata_cl; // selected word from each cacheline - logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_wuser; // - logic [DCACHE_NUM_BANKS-1:0][DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] bank_ruser; // - logic [DCACHE_SET_ASSOC-1:0][DCACHE_USER_WIDTH-1:0] ruser_cl; // selected word from each cacheline + logic [DCACHE_NUM_BANKS-1:0][CVA6Cfg.DCACHE_SET_ASSOC-1:0][CVA6Cfg.XLEN-1:0] bank_wdata; // + logic [DCACHE_NUM_BANKS-1:0][CVA6Cfg.DCACHE_SET_ASSOC-1:0][CVA6Cfg.XLEN-1:0] bank_rdata; // + logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0][CVA6Cfg.XLEN-1:0] rdata_cl; // selected word from each cacheline + logic [DCACHE_NUM_BANKS-1:0][CVA6Cfg.DCACHE_SET_ASSOC-1:0][CVA6Cfg.DCACHE_USER_WIDTH-1:0] bank_wuser; // + logic [DCACHE_NUM_BANKS-1:0][CVA6Cfg.DCACHE_SET_ASSOC-1:0][CVA6Cfg.DCACHE_USER_WIDTH-1:0] bank_ruser; // + logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0][CVA6Cfg.DCACHE_USER_WIDTH-1:0] ruser_cl; // selected word from each cacheline - logic [DCACHE_TAG_WIDTH-1:0] rd_tag; - logic [DCACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs + logic [CVA6Cfg.DCACHE_TAG_WIDTH-1:0] rd_tag; + logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs logic vld_we; // valid bits write enable - logic [DCACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write - logic [DCACHE_SET_ASSOC-1:0][DCACHE_TAG_WIDTH-1:0] tag_rdata; // these are the tags coming from the tagmem + logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write + logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0][CVA6Cfg.DCACHE_TAG_WIDTH-1:0] tag_rdata; // these are the tags coming from the tagmem logic [DCACHE_CL_IDX_WIDTH-1:0] vld_addr; // valid bit logic [$clog2(NumPorts)-1:0] vld_sel_d, vld_sel_q; logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh; - logic [ (riscv::XLEN/8)-1:0] wbuffer_be; - riscv::xlen_t wbuffer_rdata, rdata; - logic [DCACHE_USER_WIDTH-1:0] wbuffer_ruser, ruser; - logic [riscv::PLEN-1:0] wbuffer_cmp_addr; + logic [ (CVA6Cfg.XLEN/8)-1:0] wbuffer_be; + logic [CVA6Cfg.XLEN-1:0] wbuffer_rdata, rdata; + logic [CVA6Cfg.DCACHE_USER_WIDTH-1:0] wbuffer_ruser, ruser; + logic [CVA6Cfg.PLEN-1:0] wbuffer_cmp_addr; logic cmp_en_d, cmp_en_q; logic rd_acked; @@ -137,13 +143,13 @@ module wt_dcache_mem // byte enable mapping for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_bank - for (genvar j = 0; j < DCACHE_SET_ASSOC; j++) begin : gen_bank_way - assign bank_be[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_be_i[k*(riscv::XLEN/8) +: (riscv::XLEN/8)] : + for (genvar j = 0; j < CVA6Cfg.DCACHE_SET_ASSOC; j++) begin : gen_bank_way + assign bank_be[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_be_i[k*(CVA6Cfg.XLEN/8) +: (CVA6Cfg.XLEN/8)] : (wr_req_i[j] & wr_ack_o) ? wr_data_be_i : '0; - assign bank_wdata[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_i[k*riscv::XLEN +: riscv::XLEN] : + assign bank_wdata[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_data_i[k*CVA6Cfg.XLEN +: CVA6Cfg.XLEN] : wr_data_i; - assign bank_wuser[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_user_i[k*DCACHE_USER_WIDTH +: DCACHE_USER_WIDTH] : + assign bank_wuser[k][j] = (wr_cl_we_i[j] & wr_cl_vld_i) ? wr_cl_user_i[k*CVA6Cfg.DCACHE_USER_WIDTH +: CVA6Cfg.DCACHE_USER_WIDTH] : wr_user_i; end end @@ -189,7 +195,7 @@ module wt_dcache_mem bank_idx = '{default: wr_idx_i}; for (int k = 0; k < NumPorts; k++) begin - bank_collision[k] = rd_off_i[k][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES] == wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]; + bank_collision[k] = rd_off_i[k][CVA6Cfg.DCACHE_OFFSET_WIDTH-1:CVA6Cfg.XLEN_ALIGN_BYTES] == wr_off_i[CVA6Cfg.DCACHE_OFFSET_WIDTH-1:CVA6Cfg.XLEN_ALIGN_BYTES]; end if (wr_cl_vld_i & |wr_cl_we_i) begin @@ -200,16 +206,16 @@ module wt_dcache_mem if (rd_acked) begin if (!rd_tag_only_i[vld_sel_d]) begin bank_req = - dcache_cl_bin2oh(rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]); - bank_idx[rd_off_i[vld_sel_d][DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]] = rd_idx_i[vld_sel_d]; + dcache_cl_bin2oh(rd_off_i[vld_sel_d][CVA6Cfg.DCACHE_OFFSET_WIDTH-1:CVA6Cfg.XLEN_ALIGN_BYTES]); + bank_idx[rd_off_i[vld_sel_d][CVA6Cfg.DCACHE_OFFSET_WIDTH-1:CVA6Cfg.XLEN_ALIGN_BYTES]] = rd_idx_i[vld_sel_d]; end end if (|wr_req_i) begin if (rd_tag_only_i[vld_sel_d] || !(rd_ack_o[vld_sel_d] && bank_collision[vld_sel_d])) begin wr_ack_o = 1'b1; - bank_req |= dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]); - bank_we = dcache_cl_bin2oh(wr_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]); + bank_req |= dcache_cl_bin2oh(wr_off_i[CVA6Cfg.DCACHE_OFFSET_WIDTH-1:CVA6Cfg.XLEN_ALIGN_BYTES]); + bank_we = dcache_cl_bin2oh(wr_off_i[CVA6Cfg.DCACHE_OFFSET_WIDTH-1:CVA6Cfg.XLEN_ALIGN_BYTES]); end end end @@ -219,10 +225,10 @@ module wt_dcache_mem // tag comparison, hit generatio, readoud muxes /////////////////////////////////////////////////////// - logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_off; - logic [DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES-1:0] wr_cl_nc_off; + logic [CVA6Cfg.DCACHE_OFFSET_WIDTH-CVA6Cfg.XLEN_ALIGN_BYTES-1:0] wr_cl_off; + logic [CVA6Cfg.DCACHE_OFFSET_WIDTH-CVA6Cfg.XLEN_ALIGN_BYTES-1:0] wr_cl_nc_off; logic [ $clog2(DCACHE_WBUF_DEPTH)-1:0] wbuffer_hit_idx; - logic [ $clog2(DCACHE_SET_ASSOC)-1:0] rd_hit_idx; + logic [ CVA6Cfg.DCACHE_SET_ASSOC_WIDTH-1:0] rd_hit_idx; assign cmp_en_d = (|vld_req) & ~vld_we; @@ -230,16 +236,16 @@ module wt_dcache_mem assign wbuffer_cmp_addr = (wr_cl_vld_i) ? {wr_cl_tag_i, wr_cl_idx_i, wr_cl_off_i} : {rd_tag, bank_idx_q, bank_off_q}; // hit generation - for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel + for (genvar i = 0; i < CVA6Cfg.DCACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel // tag comparison of ways >0 assign rd_hit_oh_o[i] = (rd_tag == tag_rdata[i]) & rd_vld_bits_o[i] & cmp_en_q; // byte offset mux of ways >0 - assign rdata_cl[i] = bank_rdata[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i]; - assign ruser_cl[i] = bank_ruser[bank_off_q[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]][i]; + assign rdata_cl[i] = bank_rdata[bank_off_q[CVA6Cfg.DCACHE_OFFSET_WIDTH-1:CVA6Cfg.XLEN_ALIGN_BYTES]][i]; + assign ruser_cl[i] = bank_ruser[bank_off_q[CVA6Cfg.DCACHE_OFFSET_WIDTH-1:CVA6Cfg.XLEN_ALIGN_BYTES]][i]; end for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_wbuffer_hit - assign wbuffer_hit_oh[k] = (|wbuffer_data_i[k].valid) & ({{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_data_i[k].wtag} == (wbuffer_cmp_addr >> riscv::XLEN_ALIGN_BYTES)); + assign wbuffer_hit_oh[k] = (|wbuffer_data_i[k].valid) & ({{CVA6Cfg.XLEN_ALIGN_BYTES{1'b0}}, wbuffer_data_i[k].wtag} == (wbuffer_cmp_addr >> CVA6Cfg.XLEN_ALIGN_BYTES)); end lzc #( @@ -251,7 +257,7 @@ module wt_dcache_mem ); lzc #( - .WIDTH(DCACHE_SET_ASSOC) + .WIDTH(CVA6Cfg.DCACHE_SET_ASSOC) ) i_lzc_rd_hit ( .in_i (rd_hit_oh_o), .cnt_o (rd_hit_idx), @@ -263,18 +269,18 @@ module wt_dcache_mem assign wbuffer_be = (|wbuffer_hit_oh) ? wbuffer_data_i[wbuffer_hit_idx].valid : '0; if (CVA6Cfg.NOCType == config_pkg::NOC_TYPE_AXI4_ATOP) begin : gen_axi_offset - // In case of an uncached read, return the desired XLEN-bit segment of the most recent AXI read - assign wr_cl_off = (wr_cl_nc_i) ? (CVA6Cfg.AxiDataWidth == riscv::XLEN) ? '0 : - {{DCACHE_OFFSET_WIDTH-AXI_OFFSET_WIDTH{1'b0}}, wr_cl_off_i[AXI_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]} : - wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:riscv::XLEN_ALIGN_BYTES]; + // In case of an uncached read, return the desired CVA6Cfg.XLEN-bit segment of the most recent AXI read + assign wr_cl_off = (wr_cl_nc_i) ? (CVA6Cfg.AxiDataWidth == CVA6Cfg.XLEN) ? '0 : + {{CVA6Cfg.DCACHE_OFFSET_WIDTH-AXI_OFFSET_WIDTH{1'b0}}, wr_cl_off_i[AXI_OFFSET_WIDTH-1:CVA6Cfg.XLEN_ALIGN_BYTES]} : + wr_cl_off_i[CVA6Cfg.DCACHE_OFFSET_WIDTH-1:CVA6Cfg.XLEN_ALIGN_BYTES]; end else begin : gen_piton_offset - assign wr_cl_off = wr_cl_off_i[DCACHE_OFFSET_WIDTH-1:3]; + assign wr_cl_off = wr_cl_off_i[CVA6Cfg.DCACHE_OFFSET_WIDTH-1:3]; end always_comb begin if (wr_cl_vld_i) begin - rdata = wr_cl_data_i[wr_cl_off*riscv::XLEN+:riscv::XLEN]; - ruser = wr_cl_user_i[wr_cl_off*DCACHE_USER_WIDTH+:DCACHE_USER_WIDTH]; + rdata = wr_cl_data_i[wr_cl_off*CVA6Cfg.XLEN+:CVA6Cfg.XLEN]; + ruser = wr_cl_user_i[wr_cl_off*CVA6Cfg.DCACHE_USER_WIDTH+:CVA6Cfg.DCACHE_USER_WIDTH]; end else begin rdata = rdata_cl[rd_hit_idx]; ruser = ruser_cl[rd_hit_idx]; @@ -282,10 +288,10 @@ module wt_dcache_mem end // overlay bytes that hit in the write buffer - for (genvar k = 0; k < (riscv::XLEN / 8); k++) begin : gen_rd_data + for (genvar k = 0; k < (CVA6Cfg.XLEN / 8); k++) begin : gen_rd_data assign rd_data_o[8*k+:8] = (wbuffer_be[k]) ? wbuffer_rdata[8*k+:8] : rdata[8*k+:8]; end - for (genvar k = 0; k < DCACHE_USER_WIDTH / 8; k++) begin : gen_rd_user + for (genvar k = 0; k < CVA6Cfg.DCACHE_USER_WIDTH / 8; k++) begin : gen_rd_user assign rd_user_o[8*k+:8] = (wbuffer_be[k]) ? wbuffer_ruser[8*k+:8] : ruser[8*k+:8]; end @@ -293,15 +299,15 @@ module wt_dcache_mem // memory arrays and regs /////////////////////////////////////////////////////// - logic [DCACHE_TAG_WIDTH:0] vld_tag_rdata[DCACHE_SET_ASSOC-1:0]; + logic [CVA6Cfg.DCACHE_TAG_WIDTH:0] vld_tag_rdata[CVA6Cfg.DCACHE_SET_ASSOC-1:0]; for (genvar k = 0; k < DCACHE_NUM_BANKS; k++) begin : gen_data_banks // Data RAM sram #( - .USER_WIDTH(ariane_pkg::DCACHE_SET_ASSOC * DATA_USER_WIDTH), - .DATA_WIDTH(ariane_pkg::DCACHE_SET_ASSOC * riscv::XLEN), - .USER_EN (ariane_pkg::DATA_USER_EN), - .NUM_WORDS (wt_cache_pkg::DCACHE_NUM_WORDS) + .USER_WIDTH(CVA6Cfg.DCACHE_SET_ASSOC * CVA6Cfg.DATA_USER_WIDTH), + .DATA_WIDTH(CVA6Cfg.DCACHE_SET_ASSOC * CVA6Cfg.XLEN), + .USER_EN (CVA6Cfg.DATA_USER_EN), + .NUM_WORDS (CVA6Cfg.DCACHE_NUM_WORDS) ) i_data_sram ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -316,16 +322,16 @@ module wt_dcache_mem ); end - for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_srams + for (genvar i = 0; i < CVA6Cfg.DCACHE_SET_ASSOC; i++) begin : gen_tag_srams - assign tag_rdata[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH-1:0]; - assign rd_vld_bits_o[i] = vld_tag_rdata[i][DCACHE_TAG_WIDTH]; + assign tag_rdata[i] = vld_tag_rdata[i][CVA6Cfg.DCACHE_TAG_WIDTH-1:0]; + assign rd_vld_bits_o[i] = vld_tag_rdata[i][CVA6Cfg.DCACHE_TAG_WIDTH]; // Tag RAM sram #( // tag + valid bit - .DATA_WIDTH(ariane_pkg::DCACHE_TAG_WIDTH + 1), - .NUM_WORDS (wt_cache_pkg::DCACHE_NUM_WORDS) + .DATA_WIDTH(CVA6Cfg.DCACHE_TAG_WIDTH + 1), + .NUM_WORDS (CVA6Cfg.DCACHE_NUM_WORDS) ) i_tag_sram ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -362,20 +368,20 @@ module wt_dcache_mem `ifndef VERILATOR initial begin cach_line_width_axi : - assert (DCACHE_LINE_WIDTH >= CVA6Cfg.AxiDataWidth) + assert (CVA6Cfg.DCACHE_LINE_WIDTH >= CVA6Cfg.AxiDataWidth) else $fatal(1, "[l1 dcache] cache line size needs to be greater or equal AXI data width"); end initial begin axi_xlen : - assert (CVA6Cfg.AxiDataWidth >= riscv::XLEN) - else $fatal(1, "[l1 dcache] AXI data width needs to be greater or equal XLEN"); + assert (CVA6Cfg.AxiDataWidth >= CVA6Cfg.XLEN) + else $fatal(1, "[l1 dcache] AXI data width needs to be greater or equal CVA6Cfg.XLEN"); end initial begin cach_line_width_xlen : - assert (DCACHE_LINE_WIDTH > riscv::XLEN) - else $fatal(1, "[l1 dcache] cache_line_size needs to be greater than XLEN"); + assert (CVA6Cfg.DCACHE_LINE_WIDTH > CVA6Cfg.XLEN) + else $fatal(1, "[l1 dcache] cache_line_size needs to be greater than CVA6Cfg.XLEN"); end hit_hot1 : @@ -395,16 +401,16 @@ module wt_dcache_mem else $fatal(1, "[l1 dcache] wbuffer_hit_oh signal must be hot1"); // this is only used for verification! - logic vld_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0]; - logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag_mirror[wt_cache_pkg::DCACHE_NUM_WORDS-1:0][ariane_pkg::DCACHE_SET_ASSOC-1:0]; - logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] tag_write_duplicate_test; + logic vld_mirror[CVA6Cfg.DCACHE_NUM_WORDS-1:0][CVA6Cfg.DCACHE_SET_ASSOC-1:0]; + logic [CVA6Cfg.DCACHE_TAG_WIDTH-1:0] tag_mirror[CVA6Cfg.DCACHE_NUM_WORDS-1:0][CVA6Cfg.DCACHE_SET_ASSOC-1:0]; + logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] tag_write_duplicate_test; always_ff @(posedge clk_i or negedge rst_ni) begin : p_mirror if (!rst_ni) begin vld_mirror <= '{default: '0}; tag_mirror <= '{default: '0}; end else begin - for (int i = 0; i < DCACHE_SET_ASSOC; i++) begin + for (int i = 0; i < CVA6Cfg.DCACHE_SET_ASSOC; i++) begin if (vld_req[i] & vld_we) begin vld_mirror[vld_addr][i] <= vld_wdata[i]; tag_mirror[vld_addr][i] <= wr_cl_tag_i; @@ -413,7 +419,7 @@ module wt_dcache_mem end end - for (genvar i = 0; i < DCACHE_SET_ASSOC; i++) begin : gen_tag_dubl_test + for (genvar i = 0; i < CVA6Cfg.DCACHE_SET_ASSOC; i++) begin : gen_tag_dubl_test assign tag_write_duplicate_test[i] = (tag_mirror[vld_addr][i] == wr_cl_tag_i) & vld_mirror[vld_addr][i] & (|vld_wdata); end diff --git a/core/cache_subsystem/wt_dcache_missunit.sv b/core/cache_subsystem/wt_dcache_missunit.sv index 6cedc28c6a..21738fe17c 100644 --- a/core/cache_subsystem/wt_dcache_missunit.sv +++ b/core/cache_subsystem/wt_dcache_missunit.sv @@ -19,8 +19,11 @@ module wt_dcache_missunit import wt_cache_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, - parameter logic [CACHE_ID_WIDTH-1:0] AmoTxId = 1, // TX id to be used for AMOs - parameter int unsigned NumPorts = 4 // number of miss ports + parameter logic [CVA6Cfg.MEM_TID_WIDTH-1:0] AmoTxId = 1, // TX id to be used for AMOs + parameter int unsigned NumPorts = 4, // number of miss ports + parameter type dcache_req_t = logic, + parameter type dcache_rtrn_t = logic, + parameter int unsigned DCACHE_CL_IDX_WIDTH = 1 ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -40,31 +43,31 @@ module wt_dcache_missunit output logic [NumPorts-1:0] miss_ack_o, input logic [NumPorts-1:0] miss_nc_i, input logic [NumPorts-1:0] miss_we_i, - input logic [NumPorts-1:0][riscv::XLEN-1:0] miss_wdata_i, - input logic [NumPorts-1:0][DCACHE_USER_WIDTH-1:0] miss_wuser_i, - input logic [NumPorts-1:0][riscv::PLEN-1:0] miss_paddr_i, - input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits_i, + input logic [NumPorts-1:0][CVA6Cfg.XLEN-1:0] miss_wdata_i, + input logic [NumPorts-1:0][CVA6Cfg.DCACHE_USER_WIDTH-1:0] miss_wuser_i, + input logic [NumPorts-1:0][CVA6Cfg.PLEN-1:0] miss_paddr_i, + input logic [NumPorts-1:0][CVA6Cfg.DCACHE_SET_ASSOC-1:0] miss_vld_bits_i, input logic [NumPorts-1:0][2:0] miss_size_i, - input logic [NumPorts-1:0][CACHE_ID_WIDTH-1:0] miss_id_i, // used as transaction ID + input logic [NumPorts-1:0][CVA6Cfg.MEM_TID_WIDTH-1:0] miss_id_i, // used as transaction ID // signals that the request collided with a pending read output logic [NumPorts-1:0] miss_replay_o, // signals response from memory output logic [NumPorts-1:0] miss_rtrn_vld_o, - output logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_o, // only used for writes, set to zero fro reads + output logic [CVA6Cfg.MEM_TID_WIDTH-1:0] miss_rtrn_id_o, // only used for writes, set to zero fro reads // from writebuffer - input logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_i, // used to check for address collisions with read operations - input logic [DCACHE_MAX_TX-1:0] tx_vld_i, // used to check for address collisions with read operations + input logic [CVA6Cfg.DCACHE_MAX_TX-1:0][CVA6Cfg.PLEN-1:0] tx_paddr_i, // used to check for address collisions with read operations + input logic [CVA6Cfg.DCACHE_MAX_TX-1:0] tx_vld_i, // used to check for address collisions with read operations // write interface to cache memory output logic wr_cl_vld_o, // writes a full cacheline output logic wr_cl_nc_o, // writes a full cacheline - output logic [DCACHE_SET_ASSOC-1:0] wr_cl_we_o, // writes a full cacheline - output logic [DCACHE_TAG_WIDTH-1:0] wr_cl_tag_o, + output logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] wr_cl_we_o, // writes a full cacheline + output logic [CVA6Cfg.DCACHE_TAG_WIDTH-1:0] wr_cl_tag_o, output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_o, - output logic [DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_o, - output logic [DCACHE_LINE_WIDTH-1:0] wr_cl_data_o, - output logic [DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_o, - output logic [DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_o, - output logic [DCACHE_SET_ASSOC-1:0] wr_vld_bits_o, + output logic [CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0] wr_cl_off_o, + output logic [CVA6Cfg.DCACHE_LINE_WIDTH-1:0] wr_cl_data_o, + output logic [CVA6Cfg.DCACHE_USER_LINE_WIDTH-1:0] wr_cl_user_o, + output logic [CVA6Cfg.DCACHE_LINE_WIDTH/8-1:0] wr_cl_data_be_o, + output logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] wr_vld_bits_o, // memory interface input logic mem_rtrn_vld_i, input dcache_rtrn_t mem_rtrn_i, @@ -74,9 +77,9 @@ module wt_dcache_missunit ); // functions - function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] dcache_way_bin2oh( - input logic [L1D_WAY_WIDTH-1:0] in); - logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] out; + function automatic logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] dcache_way_bin2oh( + input logic [CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0] in); + logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] out; out = '0; out[in] = 1'b1; return out; @@ -88,15 +91,15 @@ module wt_dcache_missunit // 010: word // 011: dword // 111: DCACHE line - function automatic logic [riscv::PLEN-1:0] paddrSizeAlign(input logic [riscv::PLEN-1:0] paddr, + function automatic logic [CVA6Cfg.PLEN-1:0] paddrSizeAlign(input logic [CVA6Cfg.PLEN-1:0] paddr, input logic [2:0] size); - logic [riscv::PLEN-1:0] out; + logic [CVA6Cfg.PLEN-1:0] out; out = paddr; unique case (size) 3'b001: out[0:0] = '0; 3'b010: out[1:0] = '0; 3'b011: out[2:0] = '0; - 3'b111: out[DCACHE_OFFSET_WIDTH-1:0] = '0; + 3'b111: out[CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0] = '0; default: ; endcase return out; @@ -116,17 +119,17 @@ module wt_dcache_missunit // MSHR for reads typedef struct packed { - logic [riscv::PLEN-1:0] paddr; + logic [CVA6Cfg.PLEN-1:0] paddr; logic [2:0] size; - logic [DCACHE_SET_ASSOC-1:0] vld_bits; - logic [CACHE_ID_WIDTH-1:0] id; + logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] vld_bits; + logic [CVA6Cfg.MEM_TID_WIDTH-1:0] id; logic nc; - logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way; + logic [CVA6Cfg.DCACHE_SET_ASSOC_WIDTH-1:0] repl_way; logic [$clog2(NumPorts)-1:0] miss_port_idx; } mshr_t; mshr_t mshr_d, mshr_q; - logic [$clog2(DCACHE_SET_ASSOC)-1:0] repl_way, inv_way, rnd_way; + logic [CVA6Cfg.DCACHE_SET_ASSOC_WIDTH-1:0] repl_way, inv_way, rnd_way; logic mshr_vld_d, mshr_vld_q, mshr_vld_q1; logic mshr_allocate; logic update_lfsr, all_ways_valid; @@ -138,9 +141,9 @@ module wt_dcache_missunit logic amo_sel, miss_is_write; logic amo_req_d, amo_req_q; logic [63:0] amo_rtrn_mux; - riscv::xlen_t amo_data, amo_data_a, amo_data_b; - riscv::xlen_t amo_user; //DCACHE USER ? DATA_USER_WIDTH - logic [riscv::PLEN-1:0] tmp_paddr; + logic [CVA6Cfg.XLEN-1:0] amo_data, amo_data_a, amo_data_b; + logic [CVA6Cfg.XLEN-1:0] amo_user; //DCACHE USER ? CVA6Cfg.DATA_USER_WIDTH + logic [CVA6Cfg.PLEN-1:0] tmp_paddr; logic [$clog2(NumPorts)-1:0] miss_port_idx; logic [DCACHE_CL_IDX_WIDTH-1:0] cnt_d, cnt_q; logic [NumPorts-1:0] miss_req_masked_d, miss_req_masked_q; @@ -158,7 +161,7 @@ module wt_dcache_missunit assign cache_en_o = enable_q; assign cnt_d = (flush_en) ? cnt_q + 1 : '0; - assign flush_done = (cnt_q == wt_cache_pkg::DCACHE_NUM_WORDS - 1); + assign flush_done = (cnt_q == CVA6Cfg.DCACHE_NUM_WORDS - 1); assign miss_req_masked_d = (lock_reqs) ? miss_req_masked_q : (mask_reads) ? miss_we_i & miss_req_i : miss_req_i; @@ -186,7 +189,7 @@ module wt_dcache_missunit // find invalid cache line lzc #( - .WIDTH(ariane_pkg::DCACHE_SET_ASSOC) + .WIDTH(CVA6Cfg.DCACHE_SET_ASSOC) ) i_lzc_inv ( .in_i (~miss_vld_bits_i[miss_port_idx]), .cnt_o (inv_way), @@ -196,7 +199,7 @@ module wt_dcache_missunit // generate random cacheline index lfsr #( .LfsrWidth(8), - .OutWidth ($clog2(ariane_pkg::DCACHE_SET_ASSOC)) + .OutWidth (CVA6Cfg.DCACHE_SET_ASSOC_WIDTH) ) i_lfsr_inv ( .clk_i (clk_i), .rst_ni(rst_ni), @@ -221,19 +224,19 @@ module wt_dcache_missunit for (genvar k = 0; k < NumPorts; k++) begin : gen_rdrd_collision - assign mshr_rdrd_collision[k] = (mshr_q.paddr[riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == miss_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && (mshr_vld_q | mshr_vld_q1); + assign mshr_rdrd_collision[k] = (mshr_q.paddr[CVA6Cfg.PLEN-1:CVA6Cfg.DCACHE_OFFSET_WIDTH] == miss_paddr_i[k][CVA6Cfg.PLEN-1:CVA6Cfg.DCACHE_OFFSET_WIDTH]) && (mshr_vld_q | mshr_vld_q1); assign mshr_rdrd_collision_d[k] = (!miss_req_i[k]) ? 1'b0 : mshr_rdrd_collision_q[k] | mshr_rdrd_collision[k]; end // read/write collision, stalls the corresponding request // write port[NumPorts-1] collides with MSHR_Q - assign mshr_rdwr_collision = (mshr_q.paddr[riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == miss_paddr_i[NumPorts-1][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && mshr_vld_q; + assign mshr_rdwr_collision = (mshr_q.paddr[CVA6Cfg.PLEN-1:CVA6Cfg.DCACHE_OFFSET_WIDTH] == miss_paddr_i[NumPorts-1][CVA6Cfg.PLEN-1:CVA6Cfg.DCACHE_OFFSET_WIDTH]) && mshr_vld_q; // read collides with inflight TX always_comb begin : p_tx_coll tx_rdwr_collision = 1'b0; - for (int k = 0; k < DCACHE_MAX_TX; k++) begin - tx_rdwr_collision |= (miss_paddr_i[miss_port_idx][riscv::PLEN-1:DCACHE_OFFSET_WIDTH] == tx_paddr_i[k][riscv::PLEN-1:DCACHE_OFFSET_WIDTH]) && tx_vld_i[k]; + for (int k = 0; k < CVA6Cfg.DCACHE_MAX_TX; k++) begin + tx_rdwr_collision |= (miss_paddr_i[miss_port_idx][CVA6Cfg.PLEN-1:CVA6Cfg.DCACHE_OFFSET_WIDTH] == tx_paddr_i[k][CVA6Cfg.PLEN-1:CVA6Cfg.DCACHE_OFFSET_WIDTH]) && tx_vld_i[k]; end end @@ -244,7 +247,7 @@ module wt_dcache_missunit // if size = 32bit word, select appropriate offset, replicate for openpiton... if (CVA6Cfg.RVA) begin - if (riscv::IS_XLEN64) begin : gen_amo_64b_data + if (CVA6Cfg.IS_XLEN64) begin : gen_amo_64b_data assign amo_data_a = {amo_req_i.operand_b[0 +: 32], amo_req_i.operand_b[0 +: 32]}; assign amo_data_b = amo_req_i.operand_b; end else begin : gen_amo_32b_data @@ -254,7 +257,7 @@ module wt_dcache_missunit always_comb begin if (CVA6Cfg.RVA) begin - if (riscv::IS_XLEN64) begin + if (CVA6Cfg.IS_XLEN64) begin if (amo_req_i.size == 2'b10) begin amo_data = amo_data_a; end else begin @@ -263,7 +266,7 @@ module wt_dcache_missunit end else begin amo_data = amo_data_a; end - if (ariane_pkg::DATA_USER_EN) begin + if (CVA6Cfg.DATA_USER_EN) begin amo_user = amo_data; end else begin amo_user = '0; @@ -282,7 +285,7 @@ module wt_dcache_missunit assign amo_rtrn_mux = mem_rtrn_i.data[0+:64]; end end else begin : gen_piton_rtrn_mux - assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[DCACHE_OFFSET_WIDTH-1:3]*64+:64]; + assign amo_rtrn_mux = mem_rtrn_i.data[amo_req_i.operand_a[CVA6Cfg.DCACHE_OFFSET_WIDTH-1:3]*64+:64]; end // always sign extend 32bit values @@ -300,7 +303,7 @@ module wt_dcache_missunit assign mem_data_o.size = (CVA6Cfg.RVA && amo_sel) ? {1'b0, amo_req_i.size} : miss_size_i [miss_port_idx]; assign mem_data_o.amo_op = (CVA6Cfg.RVA && amo_sel) ? amo_req_i.amo_op : AMO_NONE; - assign tmp_paddr = (CVA6Cfg.RVA && amo_sel) ? amo_req_i.operand_a[riscv::PLEN-1:0] : miss_paddr_i[miss_port_idx]; + assign tmp_paddr = (CVA6Cfg.RVA && amo_sel) ? amo_req_i.operand_a[CVA6Cfg.PLEN-1:0] : miss_paddr_i[miss_port_idx]; assign mem_data_o.paddr = paddrSizeAlign(tmp_paddr, mem_data_o.size); /////////////////////////////////////////////////////// @@ -325,7 +328,7 @@ module wt_dcache_missunit // keep track of pending stores logic store_sent; - logic [$clog2(wt_cache_pkg::DCACHE_MAX_TX + 1)-1:0] stores_inflight_d, stores_inflight_q; + logic [$clog2(CVA6Cfg.DCACHE_MAX_TX + 1)-1:0] stores_inflight_d, stores_inflight_q; assign store_sent = mem_data_req_o & mem_data_ack_i & (mem_data_o.rtype == DCACHE_STORE_REQ); assign stores_inflight_d = (store_ack && store_sent) ? stores_inflight_q : @@ -408,11 +411,11 @@ module wt_dcache_missunit ) : '0; assign wr_cl_idx_o = (flush_en) ? cnt_q : - (inv_vld) ? mem_rtrn_i.inv.idx[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH] : - mshr_q.paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH]; + (inv_vld) ? mem_rtrn_i.inv.idx[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH] : + mshr_q.paddr[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH]; - assign wr_cl_tag_o = mshr_q.paddr[DCACHE_TAG_WIDTH+DCACHE_INDEX_WIDTH-1:DCACHE_INDEX_WIDTH]; - assign wr_cl_off_o = mshr_q.paddr[DCACHE_OFFSET_WIDTH-1:0]; + assign wr_cl_tag_o = mshr_q.paddr[CVA6Cfg.DCACHE_TAG_WIDTH+CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_INDEX_WIDTH]; + assign wr_cl_off_o = mshr_q.paddr[CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0]; assign wr_cl_data_o = mem_rtrn_i.data; assign wr_cl_user_o = mem_rtrn_i.user; assign wr_cl_data_be_o = (cl_write_en) ? '1 : '0;// we only write complete cachelines into the memory diff --git a/core/cache_subsystem/wt_dcache_wbuffer.sv b/core/cache_subsystem/wt_dcache_wbuffer.sv index 706db4d638..3743100144 100644 --- a/core/cache_subsystem/wt_dcache_wbuffer.sv +++ b/core/cache_subsystem/wt_dcache_wbuffer.sv @@ -53,7 +53,11 @@ module wt_dcache_wbuffer import ariane_pkg::*; import wt_cache_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic, + parameter type wbuffer_t = logic, + parameter int unsigned DCACHE_CL_IDX_WIDTH = 1 ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -66,63 +70,69 @@ module wt_dcache_wbuffer output dcache_req_o_t req_port_o, // interface to miss handler input logic miss_ack_i, - output logic [riscv::PLEN-1:0] miss_paddr_o, + output logic [CVA6Cfg.PLEN-1:0] miss_paddr_o, output logic miss_req_o, output logic miss_we_o, // always 1 here - output riscv::xlen_t miss_wdata_o, - output logic [DCACHE_USER_WIDTH-1:0] miss_wuser_o, - output logic [DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // unused here (set to 0) + output logic [CVA6Cfg.XLEN-1:0] miss_wdata_o, + output logic [CVA6Cfg.DCACHE_USER_WIDTH-1:0] miss_wuser_o, + output logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] miss_vld_bits_o, // unused here (set to 0) output logic miss_nc_o, // request to I/O space output logic [2:0] miss_size_o, // - output logic [CACHE_ID_WIDTH-1:0] miss_id_o, // ID of this transaction (wbuffer uses all IDs from 0 to DCACHE_MAX_TX-1) + output logic [CVA6Cfg.MEM_TID_WIDTH-1:0] miss_id_o, // ID of this transaction (wbuffer uses all IDs from 0 to DCACHE_MAX_TX-1) // write responses from memory input logic miss_rtrn_vld_i, - input logic [CACHE_ID_WIDTH-1:0] miss_rtrn_id_i, // transaction ID to clear + input logic [CVA6Cfg.MEM_TID_WIDTH-1:0] miss_rtrn_id_i, // transaction ID to clear // cache read interface - output logic [DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later + output logic [CVA6Cfg.DCACHE_TAG_WIDTH-1:0] rd_tag_o, // tag in - comes one cycle later output logic [DCACHE_CL_IDX_WIDTH-1:0] rd_idx_o, - output logic [DCACHE_OFFSET_WIDTH-1:0] rd_off_o, + output logic [CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0] rd_off_o, output logic rd_req_o, // read the word at offset off_i[:3] in all ways output logic rd_tag_only_o, // set to 1 here as we do not have to read the data arrays input logic rd_ack_i, - input riscv::xlen_t rd_data_i, // unused - input logic [DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, // unused - input logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_i, + input logic [CVA6Cfg.XLEN-1:0] rd_data_i, // unused + input logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] rd_vld_bits_i, // unused + input logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] rd_hit_oh_i, // cacheline writes input logic wr_cl_vld_i, input logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_i, // cache word write interface - output logic [DCACHE_SET_ASSOC-1:0] wr_req_o, + output logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] wr_req_o, input logic wr_ack_i, output logic [DCACHE_CL_IDX_WIDTH-1:0] wr_idx_o, - output logic [DCACHE_OFFSET_WIDTH-1:0] wr_off_o, - output riscv::xlen_t wr_data_o, - output logic [(riscv::XLEN/8)-1:0] wr_data_be_o, - output logic [DCACHE_USER_WIDTH-1:0] wr_user_o, + output logic [CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0] wr_off_o, + output logic [CVA6Cfg.XLEN-1:0] wr_data_o, + output logic [(CVA6Cfg.XLEN/8)-1:0] wr_data_be_o, + output logic [CVA6Cfg.DCACHE_USER_WIDTH-1:0] wr_user_o, // to forwarding logic and miss unit output wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_data_o, - output logic [DCACHE_MAX_TX-1:0][riscv::PLEN-1:0] tx_paddr_o, // used to check for address collisions with read operations - output logic [DCACHE_MAX_TX-1:0] tx_vld_o + output logic [CVA6Cfg.DCACHE_MAX_TX-1:0][CVA6Cfg.PLEN-1:0] tx_paddr_o, // used to check for address collisions with read operations + output logic [CVA6Cfg.DCACHE_MAX_TX-1:0] tx_vld_o ); - tx_stat_t [DCACHE_MAX_TX-1:0] tx_stat_d, tx_stat_q; + typedef struct packed { + logic vld; + logic [(CVA6Cfg.XLEN/8)-1:0] be; + logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] ptr; + } tx_stat_t; + + tx_stat_t [CVA6Cfg.DCACHE_MAX_TX-1:0] tx_stat_d, tx_stat_q; wbuffer_t [DCACHE_WBUF_DEPTH-1:0] wbuffer_d, wbuffer_q; logic [DCACHE_WBUF_DEPTH-1:0] valid; logic [DCACHE_WBUF_DEPTH-1:0] dirty; logic [DCACHE_WBUF_DEPTH-1:0] tocheck; logic [DCACHE_WBUF_DEPTH-1:0] wbuffer_hit_oh, inval_hit; //logic [DCACHE_WBUF_DEPTH-1:0][7:0] bdirty; - logic [DCACHE_WBUF_DEPTH-1:0][(riscv::XLEN/8)-1:0] bdirty; + logic [DCACHE_WBUF_DEPTH-1:0][(CVA6Cfg.XLEN/8)-1:0] bdirty; logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] next_ptr, dirty_ptr, hit_ptr, wr_ptr, check_ptr_d, check_ptr_q, check_ptr_q1, rtrn_ptr; - logic [CACHE_ID_WIDTH-1:0] tx_id, rtrn_id; + logic [CVA6Cfg.MEM_TID_WIDTH-1:0] tx_id, rtrn_id; - logic [riscv::XLEN_ALIGN_BYTES-1:0] bdirty_off; - logic [(riscv::XLEN/8)-1:0] tx_be; - logic [riscv::PLEN-1:0] wr_paddr, rd_paddr, extract_tag; - logic [DCACHE_TAG_WIDTH-1:0] rd_tag_d, rd_tag_q; - logic [DCACHE_SET_ASSOC-1:0] rd_hit_oh_d, rd_hit_oh_q; + logic [CVA6Cfg.XLEN_ALIGN_BYTES-1:0] bdirty_off; + logic [(CVA6Cfg.XLEN/8)-1:0] tx_be; + logic [CVA6Cfg.PLEN-1:0] wr_paddr, rd_paddr, extract_tag; + logic [CVA6Cfg.DCACHE_TAG_WIDTH-1:0] rd_tag_d, rd_tag_q; + logic [CVA6Cfg.DCACHE_SET_ASSOC-1:0] rd_hit_oh_d, rd_hit_oh_q; logic check_en_d, check_en_q, check_en_q1; logic full, dirty_rd_en, rdy; logic rtrn_empty, evict; @@ -133,21 +143,21 @@ module wt_dcache_wbuffer logic wr_cl_vld_q, wr_cl_vld_d; logic [DCACHE_CL_IDX_WIDTH-1:0] wr_cl_idx_q, wr_cl_idx_d; - logic [riscv::PLEN-1:0] debug_paddr[DCACHE_WBUF_DEPTH-1:0]; + logic [CVA6Cfg.PLEN-1:0] debug_paddr[DCACHE_WBUF_DEPTH-1:0]; wbuffer_t wbuffer_check_mux, wbuffer_dirty_mux; /////////////////////////////////////////////////////// // misc /////////////////////////////////////////////////////// - logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] miss_tag; + logic [CVA6Cfg.DCACHE_TAG_WIDTH-1:0] miss_tag; logic is_nc_miss; logic is_ni; - assign miss_tag = miss_paddr_o[ariane_pkg::DCACHE_INDEX_WIDTH+:ariane_pkg::DCACHE_TAG_WIDTH]; + assign miss_tag = miss_paddr_o[CVA6Cfg.DCACHE_INDEX_WIDTH+:CVA6Cfg.DCACHE_TAG_WIDTH]; assign is_nc_miss = !config_pkg::is_inside_cacheable_regions( CVA6Cfg, { - {64 - DCACHE_TAG_WIDTH - DCACHE_INDEX_WIDTH{1'b0}}, miss_tag, {DCACHE_INDEX_WIDTH{1'b0}} + {64 - CVA6Cfg.DCACHE_TAG_WIDTH - CVA6Cfg.DCACHE_INDEX_WIDTH{1'b0}}, miss_tag, {CVA6Cfg.DCACHE_INDEX_WIDTH{1'b0}} } ); assign miss_nc_o = !cache_en_i || is_nc_miss; @@ -155,9 +165,9 @@ module wt_dcache_wbuffer assign is_ni = config_pkg::is_inside_nonidempotent_regions( CVA6Cfg, { - {64 - DCACHE_TAG_WIDTH - DCACHE_INDEX_WIDTH{1'b0}}, + {64 - CVA6Cfg.DCACHE_TAG_WIDTH - CVA6Cfg.DCACHE_INDEX_WIDTH{1'b0}}, req_port_i.address_tag, - {DCACHE_INDEX_WIDTH{1'b0}} + {CVA6Cfg.DCACHE_INDEX_WIDTH{1'b0}} } ); @@ -165,9 +175,9 @@ module wt_dcache_wbuffer assign miss_vld_bits_o = '0; assign wbuffer_data_o = wbuffer_q; - for (genvar k = 0; k < DCACHE_MAX_TX; k++) begin : gen_tx_vld + for (genvar k = 0; k < CVA6Cfg.DCACHE_MAX_TX; k++) begin : gen_tx_vld assign tx_vld_o[k] = tx_stat_q[k].vld; - assign tx_paddr_o[k] = {{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[tx_stat_q[k].ptr].wtag << riscv::XLEN_ALIGN_BYTES}; + assign tx_paddr_o[k] = {{CVA6Cfg.XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[tx_stat_q[k].ptr].wtag << CVA6Cfg.XLEN_ALIGN_BYTES}; end /////////////////////////////////////////////////////// @@ -183,7 +193,7 @@ module wt_dcache_wbuffer // get byte offset lzc #( - .WIDTH(riscv::XLEN / 8) + .WIDTH(CVA6Cfg.XLEN / 8) ) i_vld_bdirty ( .in_i (bdirty[dirty_ptr]), .cnt_o (bdirty_off), @@ -201,20 +211,44 @@ module wt_dcache_wbuffer // note: openpiton can only handle aligned offsets + size, and hence // we have to split unaligned data into multiple transfers (see toSize64) // e.g. if we have the following valid bytes: 0011_1001 -> TX0: 0000_0001, TX1: 0000_1000, TX2: 0011_0000 - if(riscv::IS_XLEN64) begin : gen_size_64b + if(CVA6Cfg.IS_XLEN64) begin : gen_size_64b assign miss_size_o = {1'b0, toSize64(bdirty[dirty_ptr])}; end else begin : gen_size_32b assign miss_size_o = {1'b0, toSize32(bdirty[dirty_ptr])}; end + // openpiton requires the data to be replicated in case of smaller sizes than dwords + function automatic logic [CVA6Cfg.XLEN-1:0] repData64(input logic [CVA6Cfg.XLEN-1:0] data, input logic [CVA6Cfg.XLEN_ALIGN_BYTES-1:0] offset, + input logic [1:0] size); + logic [CVA6Cfg.XLEN-1:0] out; + unique case (size) + 2'b00: for (int k = 0; k < 8; k++) out[k*8+:8] = data[offset*8+:8]; // byte + 2'b01: for (int k = 0; k < 4; k++) out[k*16+:16] = data[offset*8+:16]; // hword + 2'b10: for (int k = 0; k < 2; k++) out[k*32+:32] = data[offset*8+:32]; // word + default: out = data; // dword + endcase // size + return out; + endfunction : repData64 + + function automatic logic [CVA6Cfg.XLEN-1:0] repData32(input logic [CVA6Cfg.XLEN-1:0] data, input logic [CVA6Cfg.XLEN_ALIGN_BYTES-1:0] offset, + input logic [1:0] size); + logic [CVA6Cfg.XLEN-1:0] out; + unique case (size) + 2'b00: for (int k = 0; k < 4; k++) out[k*8+:8] = data[offset*8+:8]; // byte + 2'b01: for (int k = 0; k < 2; k++) out[k*16+:16] = data[offset*8+:16]; // hword + default: out = data; // word + endcase // size + return out; + endfunction : repData32 + // replicate transfers shorter than a dword - assign miss_wdata_o = riscv::IS_XLEN64 ? repData64( + assign miss_wdata_o = CVA6Cfg.IS_XLEN64 ? repData64( wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0] ) : repData32( wbuffer_dirty_mux.data, bdirty_off, miss_size_o[1:0] ); - if (ariane_pkg::DATA_USER_EN) begin - assign miss_wuser_o = riscv::IS_XLEN64 ? repData64( + if (CVA6Cfg.DATA_USER_EN) begin + assign miss_wuser_o = CVA6Cfg.IS_XLEN64 ? repData64( wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0] ) : repData32( wbuffer_dirty_mux.user, bdirty_off, miss_size_o[1:0] @@ -223,7 +257,30 @@ module wt_dcache_wbuffer assign miss_wuser_o = '0; end - assign tx_be = riscv::IS_XLEN64 ? to_byte_enable8( + function automatic logic [(CVA6Cfg.XLEN/8)-1:0] to_byte_enable8(input logic [CVA6Cfg.XLEN_ALIGN_BYTES-1:0] offset, input logic [1:0] size); + logic [(CVA6Cfg.XLEN/8)-1:0] be; + be = '0; + unique case (size) + 2'b00: be[offset] = '1; // byte + 2'b01: be[offset+:2] = '1; // hword + 2'b10: be[offset+:4] = '1; // word + default: be = '1; // dword + endcase // size + return be; + endfunction : to_byte_enable8 + + function automatic logic [(CVA6Cfg.XLEN/8)-1:0] to_byte_enable4(input logic [CVA6Cfg.XLEN_ALIGN_BYTES-1:0] offset, input logic [1:0] size); + logic [3:0] be; + be = '0; + unique case (size) + 2'b00: be[offset] = '1; // byte + 2'b01: be[offset+:2] = '1; // hword + default: be = '1; // word + endcase // size + return be; + endfunction : to_byte_enable4 + + assign tx_be = CVA6Cfg.IS_XLEN64 ? to_byte_enable8( bdirty_off, miss_size_o[1:0] ) : to_byte_enable4( bdirty_off, miss_size_o[1:0] @@ -236,8 +293,9 @@ module wt_dcache_wbuffer // TODO: todo: make this fall through if timing permits it fifo_v3 #( .FALL_THROUGH(1'b0), - .DATA_WIDTH ($clog2(DCACHE_MAX_TX)), - .DEPTH (DCACHE_MAX_TX) + .DATA_WIDTH ($clog2(CVA6Cfg.DCACHE_MAX_TX)), + .DEPTH (CVA6Cfg.DCACHE_MAX_TX), + .FPGA_EN(CVA6Cfg.FPGA_EN) ) i_rtrn_id_fifo ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -285,7 +343,7 @@ module wt_dcache_wbuffer // next word to lookup in the cache rr_arb_tree #( - .NumIn (DCACHE_MAX_TX), + .NumIn (CVA6Cfg.DCACHE_MAX_TX), .LockIn (1'b1), .DataWidth(1) ) i_tx_id_rr ( @@ -306,16 +364,16 @@ module wt_dcache_wbuffer // cache readout & update /////////////////////////////////////////////////////// - assign extract_tag = rd_paddr >> DCACHE_INDEX_WIDTH; - assign rd_tag_d = extract_tag[DCACHE_TAG_WIDTH-1:0]; + assign extract_tag = rd_paddr >> CVA6Cfg.DCACHE_INDEX_WIDTH; + assign rd_tag_d = extract_tag[CVA6Cfg.DCACHE_TAG_WIDTH-1:0]; // trigger TAG readout in cache assign rd_tag_only_o = 1'b1; - assign rd_paddr = {{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_check_mux.wtag << riscv::XLEN_ALIGN_BYTES}; + assign rd_paddr = {{CVA6Cfg.XLEN_ALIGN_BYTES{1'b0}}, wbuffer_check_mux.wtag << CVA6Cfg.XLEN_ALIGN_BYTES}; assign rd_req_o = |tocheck; assign rd_tag_o = rd_tag_q; //delay by one cycle - assign rd_idx_o = rd_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH]; - assign rd_off_o = rd_paddr[DCACHE_OFFSET_WIDTH-1:0]; + assign rd_idx_o = rd_paddr[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH]; + assign rd_off_o = rd_paddr[CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0]; assign check_en_d = rd_req_o & rd_ack_i; // cache update port @@ -323,9 +381,9 @@ module wt_dcache_wbuffer // if we wrote into a word while it was in-flight, we cannot write the dirty bytes to the cache // when the TX returns assign wr_data_be_o = tx_stat_q[rtrn_id].be & (~wbuffer_q[rtrn_ptr].dirty); - assign wr_paddr = {{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[rtrn_ptr].wtag << riscv::XLEN_ALIGN_BYTES}; - assign wr_idx_o = wr_paddr[DCACHE_INDEX_WIDTH-1:DCACHE_OFFSET_WIDTH]; - assign wr_off_o = wr_paddr[DCACHE_OFFSET_WIDTH-1:0]; + assign wr_paddr = {{CVA6Cfg.XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[rtrn_ptr].wtag << CVA6Cfg.XLEN_ALIGN_BYTES}; + assign wr_idx_o = wr_paddr[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.DCACHE_OFFSET_WIDTH]; + assign wr_off_o = wr_paddr[CVA6Cfg.DCACHE_OFFSET_WIDTH-1:0]; assign wr_data_o = wbuffer_q[rtrn_ptr].data; assign wr_user_o = wbuffer_q[rtrn_ptr].user; @@ -342,7 +400,7 @@ module wt_dcache_wbuffer for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_flags // only for debug, will be pruned if(CVA6Cfg.DebugEn) begin - assign debug_paddr[k] = {{riscv::XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[k].wtag << riscv::XLEN_ALIGN_BYTES}; + assign debug_paddr[k] = {{CVA6Cfg.XLEN_ALIGN_BYTES{1'b0}}, wbuffer_q[k].wtag << CVA6Cfg.XLEN_ALIGN_BYTES}; end // dirty bytes that are ready for transmission. @@ -353,12 +411,12 @@ module wt_dcache_wbuffer assign dirty[k] = |bdirty[k]; assign valid[k] = |wbuffer_q[k].valid; - assign wbuffer_hit_oh[k] = valid[k] & (wbuffer_q[k].wtag == {req_port_i.address_tag, req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES]}); + assign wbuffer_hit_oh[k] = valid[k] & (wbuffer_q[k].wtag == {req_port_i.address_tag, req_port_i.address_index[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.XLEN_ALIGN_BYTES]}); // checks if an invalidation/cache refill hits a particular word // note: an invalidation can hit multiple words! // need to respect previous cycle, too, since we add a cycle of latency to the rd_hit_oh_i signal... - assign wtag_comp[k] = wbuffer_q[k].wtag[DCACHE_INDEX_WIDTH-riscv::XLEN_ALIGN_BYTES-1:DCACHE_OFFSET_WIDTH-riscv::XLEN_ALIGN_BYTES]; + assign wtag_comp[k] = wbuffer_q[k].wtag[CVA6Cfg.DCACHE_INDEX_WIDTH-CVA6Cfg.XLEN_ALIGN_BYTES-1:CVA6Cfg.DCACHE_OFFSET_WIDTH-CVA6Cfg.XLEN_ALIGN_BYTES]; assign inval_hit[k] = (wr_cl_vld_d & valid[k] & (wtag_comp[k] == wr_cl_idx_d)) | (wr_cl_vld_q & valid[k] & (wtag_comp[k] == wr_cl_idx_q)); @@ -468,7 +526,7 @@ module wt_dcache_wbuffer // once TX write response came back, we can clear the TX block. if it was not dirty, we // can completely evict it - otherwise we have to leave it there for retransmission if (evict) begin - for (int k = 0; k < (riscv::XLEN / 8); k++) begin + for (int k = 0; k < (CVA6Cfg.XLEN / 8); k++) begin if (tx_stat_q[rtrn_id].be[k]) begin wbuffer_d[rtrn_ptr].txblock[k] = 1'b0; if (!wbuffer_q[rtrn_ptr].dirty[k]) begin @@ -490,7 +548,7 @@ module wt_dcache_wbuffer // mark bytes sent out to the memory system if (miss_req_o && miss_ack_i) begin dirty_rd_en = 1'b1; - for (int k = 0; k < (riscv::XLEN / 8); k++) begin + for (int k = 0; k < (CVA6Cfg.XLEN / 8); k++) begin if (tx_be[k]) begin wbuffer_d[dirty_ptr].dirty[k] = 1'b0; wbuffer_d[dirty_ptr].txblock[k] = 1'b1; @@ -511,16 +569,16 @@ module wt_dcache_wbuffer wbuffer_d[wr_ptr].checked = 1'b0; wbuffer_d[wr_ptr].wtag = { req_port_i.address_tag, - req_port_i.address_index[DCACHE_INDEX_WIDTH-1:riscv::XLEN_ALIGN_BYTES] + req_port_i.address_index[CVA6Cfg.DCACHE_INDEX_WIDTH-1:CVA6Cfg.XLEN_ALIGN_BYTES] }; // mark bytes as dirty - for (int k = 0; k < (riscv::XLEN / 8); k++) begin + for (int k = 0; k < (CVA6Cfg.XLEN / 8); k++) begin if (req_port_i.data_be[k]) begin wbuffer_d[wr_ptr].valid[k] = 1'b1; wbuffer_d[wr_ptr].dirty[k] = 1'b1; wbuffer_d[wr_ptr].data[k*8+:8] = req_port_i.data_wdata[k*8+:8]; - if (ariane_pkg::DATA_USER_EN) begin + if (CVA6Cfg.DATA_USER_EN) begin wbuffer_d[wr_ptr].user[k*8+:8] = req_port_i.data_wuser[k*8+:8]; end else begin wbuffer_d[wr_ptr].user[k*8+:8] = '0; @@ -605,7 +663,7 @@ module wt_dcache_wbuffer else $fatal(1, "[l1 dcache wbuffer] req_port_i.kill_req should not be asserted"); for (genvar k = 0; k < DCACHE_WBUF_DEPTH; k++) begin : gen_assert1 - for (genvar j = 0; j < (riscv::XLEN / 8); j++) begin : gen_assert2 + for (genvar j = 0; j < (CVA6Cfg.XLEN / 8); j++) begin : gen_assert2 byteStates : assert property ( @(posedge clk_i) disable iff (!rst_ni) {wbuffer_q[k].valid[j], wbuffer_q[k].dirty[j], wbuffer_q[k].txblock[j]} inside {3'b000, 3'b110, 3'b101, 3'b111} ) diff --git a/core/cache_subsystem/wt_l15_adapter.sv b/core/cache_subsystem/wt_l15_adapter.sv index 7634bf302a..1768689c61 100644 --- a/core/cache_subsystem/wt_l15_adapter.sv +++ b/core/cache_subsystem/wt_l15_adapter.sv @@ -53,7 +53,56 @@ module wt_l15_adapter import ariane_pkg::*; import wt_cache_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type icache_req_t = logic, + parameter type icache_rtrn_t = logic, + parameter type dcache_req_t = logic, + parameter type dcache_rtrn_t = logic, + + localparam type l15_req_t = struct packed { + logic l15_val; // valid signal, asserted with request + logic l15_req_ack; // ack for response + l15_reqtypes_t l15_rqtype; // see below for encoding + logic l15_nc; // non-cacheable bit + logic [2:0] l15_size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte) + logic [CVA6Cfg.MEM_TID_WIDTH-1:0] l15_threadid; // currently 0 or 1 + logic l15_prefetch; // unused in openpiton + logic l15_invalidate_cacheline; // unused by Ariane as L1 has no ECC at the moment + logic l15_blockstore; // unused in openpiton + logic l15_blockinitstore; // unused in openpiton + logic [CVA6Cfg.DCACHE_SET_ASSOC_WIDTH-1:0] l15_l1rplway; // way to replace + logic [39:0] l15_address; // physical address + logic [63:0] l15_data; // word to write + logic [63:0] l15_data_next_entry; // unused in Ariane (only used for CAS atomic requests) + logic [L15_TLB_CSM_WIDTH-1:0] l15_csm_data; // unused in Ariane + logic [3:0] l15_amo_op; // atomic operation type + }, + localparam type l15_rtrn_t = struct packed { + logic l15_ack; // ack for request struct + logic l15_header_ack; // ack for request struct + logic l15_val; // valid signal for return struct + l15_rtrntypes_t l15_returntype; // see below for encoding + logic l15_l2miss; // unused in Ariane + logic [1:0] l15_error; // unused in openpiton + logic l15_noncacheable; // non-cacheable bit + logic l15_atomic; // asserted in load return and store ack packets of atomic tx + logic [CVA6Cfg.MEM_TID_WIDTH-1:0] l15_threadid; // used as transaction ID + logic l15_prefetch; // unused in openpiton + logic l15_f4b; // 4byte instruction fill from I/O space (nc). + logic [63:0] l15_data_0; // used for both caches + logic [63:0] l15_data_1; // used for both caches + logic [63:0] l15_data_2; // currently only used for I$ + logic [63:0] l15_data_3; // currently only used for I$ + logic l15_inval_icache_all_way; // invalidate all ways + logic l15_inval_dcache_all_way; // unused in openpiton + logic [15:4] l15_inval_address_15_4; // invalidate selected cacheline + logic l15_cross_invalidate; // unused in openpiton + logic [CVA6Cfg.DCACHE_SET_ASSOC_WIDTH-1:0] l15_cross_invalidate_way; // unused in openpiton + logic l15_inval_dcache_inval; // invalidate selected cacheline and way + logic l15_inval_icache_inval; // unused in openpiton + logic [CVA6Cfg.DCACHE_SET_ASSOC_WIDTH-1:0] l15_inval_way; // way to invalidate + logic l15_blockinitstore; // unused in openpiton + } ) ( input logic clk_i, input logic rst_ni, @@ -389,19 +438,19 @@ module wt_l15_adapter initial begin // assert wrong parameterizations - assert (L15_SET_ASSOC >= ICACHE_SET_ASSOC) + assert (CVA6Cfg.DCACHE_SET_ASSOC >= CVA6Cfg.ICACHE_SET_ASSOC) else $fatal( 1, "[l15_adapter] number of icache ways must be smaller or equal the number of L15 ways" ); // assert wrong parameterizations - assert (L15_SET_ASSOC >= DCACHE_SET_ASSOC) + assert (CVA6Cfg.DCACHE_SET_ASSOC >= CVA6Cfg.DCACHE_SET_ASSOC) else $fatal( 1, "[l15_adapter] number of dcache ways must be smaller or equal the number of L15 ways" ); // invalidation address returned by L1.5 is 16 bit - assert (16 >= DCACHE_INDEX_WIDTH && 16 >= ICACHE_INDEX_WIDTH) + assert (16 >= CVA6Cfg.DCACHE_INDEX_WIDTH && 16 >= CVA6Cfg.ICACHE_INDEX_WIDTH) else $fatal(1, "[l15_adapter] maximum number of index bits supported by L1.5 is 16"); end `endif diff --git a/core/commit_stage.sv b/core/commit_stage.sv index 37b69af04f..59284f254a 100644 --- a/core/commit_stage.sv +++ b/core/commit_stage.sv @@ -16,7 +16,9 @@ module commit_stage import ariane_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type exception_t = logic, + parameter type scoreboard_entry_t = logic ) ( input logic clk_i, input logic rst_ni, @@ -30,23 +32,23 @@ module commit_stage output logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_o, // acknowledge that we are indeed committing // to register file output logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_o, // register file write address - output logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_o, // register file write data + output logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.XLEN-1:0] wdata_o, // register file write data output logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_o, // register file write enable output logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_o, // floating point register enable // Atomic memory operations input amo_resp_t amo_resp_i, // result of AMO operation // to CSR file and PC Gen (because on certain CSR instructions we'll need to flush the whole pipeline) - output logic [riscv::VLEN-1:0] pc_o, + output logic [CVA6Cfg.VLEN-1:0] pc_o, // to/from CSR file output fu_op csr_op_o, // decoded CSR operation - output riscv::xlen_t csr_wdata_o, // data to write to CSR - input riscv::xlen_t csr_rdata_i, // data to read from CSR + output logic [CVA6Cfg.XLEN-1:0] csr_wdata_o, // data to write to CSR + input logic [CVA6Cfg.XLEN-1:0] csr_rdata_i, // data to read from CSR input exception_t csr_exception_i, // exception or interrupt occurred in CSR stage (the same as commit) output logic csr_write_fflags_o, // write the fflags CSR // commit signals to ex output logic commit_lsu_o, // commit the pending store input logic commit_lsu_ready_i, // commit buffer of LSU is ready - output logic [TRANS_ID_BITS-1:0] commit_tran_id_o, // transaction id of first commit port + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] commit_tran_id_o, // transaction id of first commit port output logic amo_valid_commit_o, // valid AMO in commit stage input logic no_st_pending_i, // there is no store pending output logic commit_csr_o, // commit the pending CSR instruction @@ -106,9 +108,9 @@ module commit_stage commit_lsu_o = 1'b0; commit_csr_o = 1'b0; // amos will commit on port 0 - wdata_o[0] = (CVA6Cfg.RVA && amo_resp_i.ack) ? amo_resp_i.result[riscv::XLEN-1:0] : commit_instr_i[0].result; + wdata_o[0] = (CVA6Cfg.RVA && amo_resp_i.ack) ? amo_resp_i.result[CVA6Cfg.XLEN-1:0] : commit_instr_i[0].result; csr_op_o = ADD; // this corresponds to a CSR NOP - csr_wdata_o = {riscv::XLEN{1'b0}}; + csr_wdata_o = {CVA6Cfg.XLEN{1'b0}}; fence_i_o = 1'b0; fence_o = 1'b0; sfence_vma_o = 1'b0; @@ -143,7 +145,7 @@ module commit_stage if (CVA6Cfg.FpPresent) begin if (commit_instr_i[0].fu inside {FPU, FPU_VEC}) begin // write the CSR with potential exception flags from retiring floating point instruction - csr_wdata_o = {{riscv::XLEN - 5{1'b0}}, commit_instr_i[0].ex.cause[4:0]}; + csr_wdata_o = {{CVA6Cfg.XLEN - 5{1'b0}}, commit_instr_i[0].ex.cause[4:0]}; csr_write_fflags_o = 1'b1; commit_ack_o[0] = 1'b1; end @@ -245,10 +247,10 @@ module commit_stage if (CVA6Cfg.FpPresent && commit_instr_i[1].fu inside {FPU, FPU_VEC}) begin if (csr_write_fflags_o) csr_wdata_o = { - {riscv::XLEN - 5{1'b0}}, + {CVA6Cfg.XLEN - 5{1'b0}}, (commit_instr_i[0].ex.cause[4:0] | commit_instr_i[1].ex.cause[4:0]) }; - else csr_wdata_o = {{riscv::XLEN - 5{1'b0}}, commit_instr_i[1].ex.cause[4:0]}; + else csr_wdata_o = {{CVA6Cfg.XLEN - 5{1'b0}}, commit_instr_i[1].ex.cause[4:0]}; csr_write_fflags_o = 1'b1; end diff --git a/core/compressed_decoder.sv b/core/compressed_decoder.sv index e816972275..8423628292 100644 --- a/core/compressed_decoder.sv +++ b/core/compressed_decoder.sv @@ -104,7 +104,7 @@ module compressed_decoder #( // c.ld -> ld rd', imm(rs1') // RV32 // c.flw -> flw fprd', imm(rs1') - if (riscv::IS_XLEN64) begin + if (CVA6Cfg.IS_XLEN64) begin // CLD: | funct3 | imm[5:3] | rs1' | imm[7:6] | rd' | C0 | instr_o = { 4'b0, @@ -275,7 +275,7 @@ module compressed_decoder #( // c.sd -> sd rs2', imm(rs1') // RV32 // c.fsw -> fsw fprs2', imm(rs1') - if (riscv::IS_XLEN64) begin + if (CVA6Cfg.IS_XLEN64) begin instr_o = { 4'b0, instr_i[6:5], @@ -336,7 +336,7 @@ module compressed_decoder #( riscv::OpcodeC1Addiw: begin // or riscv::OpcodeC1Jal for RV32IC - if (riscv::IS_XLEN64) begin + if (CVA6Cfg.IS_XLEN64) begin // c.addiw -> addiw rd, rd, nzimm for RV64IC if (instr_i[11:7] != 5'h0) begin // only valid if the destination is not r0 instr_o = { @@ -509,7 +509,7 @@ module compressed_decoder #( end 3'b100: begin - if (riscv::IS_XLEN64) begin + if (CVA6Cfg.IS_XLEN64) begin // c.subw -> subw rd', rd', rs2' instr_o = { 2'b01, @@ -529,7 +529,7 @@ module compressed_decoder #( end 3'b101: begin - if (riscv::IS_XLEN64) begin + if (CVA6Cfg.IS_XLEN64) begin // c.addw -> addw rd', rd', rs2' instr_o = { 2'b00, @@ -588,7 +588,7 @@ module compressed_decoder #( end 3'b001: begin - if (ariane_pkg::BITMANIP) begin + if (CVA6Cfg.BITMANIP) begin // c.sext.b -> sext.b rd', rd' instr_o = { 7'h30, @@ -604,9 +604,9 @@ module compressed_decoder #( end 3'b010: begin - if (ariane_pkg::BITMANIP) begin + if (CVA6Cfg.BITMANIP) begin // c.zext.h -> zext.h rd', rd' - if (riscv::IS_XLEN64) begin + if (CVA6Cfg.IS_XLEN64) begin instr_o = { 7'h4, 5'h0, @@ -633,7 +633,7 @@ module compressed_decoder #( end 3'b011: begin - if (ariane_pkg::BITMANIP) begin + if (CVA6Cfg.BITMANIP) begin // c.sext.h -> sext.h rd', rd' instr_o = { 7'h30, @@ -649,9 +649,9 @@ module compressed_decoder #( end 3'b100: begin - if (ariane_pkg::BITMANIP) begin + if (CVA6Cfg.BITMANIP) begin // c.zext.w -> add.uw - if (riscv::IS_XLEN64) begin + if (CVA6Cfg.IS_XLEN64) begin instr_o = { 7'h4, 5'h0, @@ -788,7 +788,7 @@ module compressed_decoder #( // c.ldsp -> ld rd, imm(x2) // RV32 // c.flwsp -> flw fprd, imm(x2) - if (riscv::IS_XLEN64) begin + if (CVA6Cfg.IS_XLEN64) begin instr_o = { 3'b0, instr_i[4:2], @@ -884,7 +884,7 @@ module compressed_decoder #( // c.sdsp -> sd rs2, imm(x2) // RV32 // c.fswsp -> fsw fprs2, imm(x2) - if (riscv::IS_XLEN64) begin + if (CVA6Cfg.IS_XLEN64) begin instr_o = { 3'b0, instr_i[9:7], diff --git a/core/controller.sv b/core/controller.sv index eec20cb092..cffa6b2d8e 100644 --- a/core/controller.sv +++ b/core/controller.sv @@ -16,7 +16,8 @@ module controller import ariane_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type bp_resolve_t = logic ) ( input logic clk_i, input logic rst_ni, diff --git a/core/csr_buffer.sv b/core/csr_buffer.sv index 57be04dda4..814a9a5e2b 100644 --- a/core/csr_buffer.sv +++ b/core/csr_buffer.sv @@ -17,7 +17,8 @@ module csr_buffer import ariane_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type fu_data_t = logic ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -27,7 +28,7 @@ module csr_buffer output logic csr_ready_o, // FU is ready e.g. not busy input logic csr_valid_i, // Input is valid - output riscv::xlen_t csr_result_o, + output logic [CVA6Cfg.XLEN-1:0] csr_result_o, input logic csr_commit_i, // commit the pending CSR OP // to CSR file output logic [11:0] csr_addr_o // CSR address to commit stage diff --git a/core/csr_regfile.sv b/core/csr_regfile.sv index 0c43864e40..2c55914fd0 100644 --- a/core/csr_regfile.sv +++ b/core/csr_regfile.sv @@ -17,6 +17,9 @@ module csr_regfile import ariane_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type exception_t = logic, + parameter type irq_ctrl_t = logic, + parameter type scoreboard_entry_t = logic, parameter int AsidWidth = 1, parameter int unsigned MHPMCounterNum = 6 ) ( @@ -30,26 +33,26 @@ module csr_regfile input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, // Commit acknowledged a instruction -> increase instret CSR // Core and Cluster ID - input logic[riscv::VLEN-1:0] boot_addr_i, // Address from which to start booting, mtvec is set to the same address - input logic[riscv::XLEN-1:0] hart_id_i, // Hart id in a multicore environment (reflected in a CSR) + input logic[CVA6Cfg.VLEN-1:0] boot_addr_i, // Address from which to start booting, mtvec is set to the same address + input logic[CVA6Cfg.XLEN-1:0] hart_id_i, // Hart id in a multicore environment (reflected in a CSR) // we are taking an exception input exception_t ex_i, // We've got an exception from the commit stage, take it input fu_op csr_op_i, // Operation to perform on the CSR file input logic [11:0] csr_addr_i, // Address of the register to read/write - input logic [riscv::XLEN-1:0] csr_wdata_i, // Write data in - output logic [riscv::XLEN-1:0] csr_rdata_o, // Read data out + input logic [CVA6Cfg.XLEN-1:0] csr_wdata_i, // Write data in + output logic [CVA6Cfg.XLEN-1:0] csr_rdata_o, // Read data out input logic dirty_fp_state_i, // Mark the FP sate as dirty input logic csr_write_fflags_i, // Write fflags register e.g.: we are retiring a floating point instruction input logic dirty_v_state_i, // Mark the V state as dirty - input logic [riscv::VLEN-1:0] pc_i, // PC of instruction accessing the CSR + input logic [CVA6Cfg.VLEN-1:0] pc_i, // PC of instruction accessing the CSR output exception_t csr_exception_o, // attempts to access a CSR without appropriate privilege // level or to write a read-only register also // raises illegal instruction exceptions. // Interrupts/Exceptions - output logic [riscv::VLEN-1:0] epc_o, // Output the exception PC to PC Gen, the correct CSR (mepc, sepc) is set accordingly + output logic [CVA6Cfg.VLEN-1:0] epc_o, // Output the exception PC to PC Gen, the correct CSR (mepc, sepc) is set accordingly output logic eret_o, // Return from exception, set the PC of epc_o - output logic [riscv::VLEN-1:0] trap_vector_base_o, // Output base of exception vector, correct CSR is output (mtvec, stvec) + output logic [CVA6Cfg.VLEN-1:0] trap_vector_base_o, // Output base of exception vector, correct CSR is output (mtvec, stvec) output riscv::priv_lvl_t priv_lvl_o, // Current privilege level the CPU is in // FP Imprecise exceptions input logic [4:0] acc_fflags_ex_i, // Imprecise FP exception from the accelerator (fcsr.fflags format) @@ -69,7 +72,7 @@ module csr_regfile output riscv::priv_lvl_t ld_st_priv_lvl_o, // Privilege level at which load and stores should happen output logic sum_o, output logic mxr_o, - output logic [riscv::PPNW-1:0] satp_ppn_o, + output logic [CVA6Cfg.PPNW-1:0] satp_ppn_o, output logic [AsidWidth-1:0] asid_o, // external interrupts input logic [1:0] irq_i, // external interrupt in @@ -89,18 +92,58 @@ module csr_regfile output logic acc_cons_en_o, // Accelerator memory consistent mode // Performance Counter output logic [11:0] perf_addr_o, // read/write address to performance counter module - output logic [riscv::XLEN-1:0] perf_data_o, // write data to performance counter module - input logic [riscv::XLEN-1:0] perf_data_i, // read data from performance counter module + output logic [CVA6Cfg.XLEN-1:0] perf_data_o, // write data to performance counter module + input logic [CVA6Cfg.XLEN-1:0] perf_data_i, // read data from performance counter module output logic perf_we_o, // PMPs output riscv::pmpcfg_t [15:0] pmpcfg_o, // PMP configuration containing pmpcfg for max 16 PMPs - output logic [15:0][riscv::PLEN-3:0] pmpaddr_o, // PMP addresses + output logic [15:0][CVA6Cfg.PLEN-3:0] pmpaddr_o, // PMP addresses output logic [31:0] mcountinhibit_o ); + + typedef struct packed { + logic [CVA6Cfg.ModeW-1:0] mode; + logic [CVA6Cfg.ASIDW-1:0] asid; + logic [CVA6Cfg.PPNW-1:0] ppn; + } satp_t; + + localparam logic [63:0] SSTATUS_UIE = 'h00000001; + localparam logic [63:0] SSTATUS_SIE = 'h00000002; + localparam logic [63:0] SSTATUS_SPIE = 'h00000020; + localparam logic [63:0] SSTATUS_SPP = 'h00000100; + localparam logic [63:0] SSTATUS_FS = 'h00006000; + localparam logic [63:0] SSTATUS_XS = 'h00018000; + localparam logic [63:0] SSTATUS_SUM = 'h00040000; + localparam logic [63:0] SSTATUS_MXR = 'h00080000; + localparam logic [63:0] SSTATUS_UPIE = 'h00000010; + localparam logic [63:0] SSTATUS_UXL = 64'h0000000300000000; + localparam logic [63:0] SSTATUS_SD = {CVA6Cfg.IS_XLEN64, 31'h00000000, ~CVA6Cfg.IS_XLEN64, 31'h00000000}; + + // read mask for SSTATUS over MMSTATUS + localparam logic [63:0] SMODE_STATUS_READ_MASK = SSTATUS_UIE + | SSTATUS_SIE + | SSTATUS_SPIE + | SSTATUS_SPP + | SSTATUS_FS + | SSTATUS_XS + | SSTATUS_SUM + | SSTATUS_MXR + | SSTATUS_UPIE + | SSTATUS_SPIE + | SSTATUS_UXL + | SSTATUS_SD; + + localparam logic [63:0] SMODE_STATUS_WRITE_MASK = SSTATUS_SIE + | SSTATUS_SPIE + | SSTATUS_SPP + | SSTATUS_FS + | SSTATUS_SUM + | SSTATUS_MXR; + // internal signal to keep track of access exceptions logic read_access_exception, update_access_exception, privilege_violation; logic csr_we, csr_read; - riscv::xlen_t csr_wdata, csr_rdata; + logic [CVA6Cfg.XLEN-1:0] csr_wdata, csr_rdata; riscv::priv_lvl_t trap_to_priv_lvl; // register for enabling load store address translation, this is critical, hence the register logic en_ld_st_translation_d, en_ld_st_translation_q; @@ -111,8 +154,8 @@ module csr_regfile // CSR write causes us to mark the FPU state as dirty logic dirty_fp_state_csr; riscv::mstatus_rv_t mstatus_q, mstatus_d; - riscv::xlen_t mstatus_extended; - riscv::satp_t satp_q, satp_d; + logic [CVA6Cfg.XLEN-1:0] mstatus_extended; + satp_t satp_q, satp_d; riscv::dcsr_t dcsr_q, dcsr_d; riscv::csr_t csr_addr; // privilege level register @@ -121,29 +164,29 @@ module csr_regfile logic debug_mode_q, debug_mode_d; logic mtvec_rst_load_q; // used to determine whether we came out of reset - riscv::xlen_t dpc_q, dpc_d; - riscv::xlen_t dscratch0_q, dscratch0_d; - riscv::xlen_t dscratch1_q, dscratch1_d; - riscv::xlen_t mtvec_q, mtvec_d; - riscv::xlen_t medeleg_q, medeleg_d; - riscv::xlen_t mideleg_q, mideleg_d; - riscv::xlen_t mip_q, mip_d; - riscv::xlen_t mie_q, mie_d; - riscv::xlen_t mcounteren_q, mcounteren_d; - riscv::xlen_t mscratch_q, mscratch_d; - riscv::xlen_t mepc_q, mepc_d; - riscv::xlen_t mcause_q, mcause_d; - riscv::xlen_t mtval_q, mtval_d; - - riscv::xlen_t stvec_q, stvec_d; - riscv::xlen_t scounteren_q, scounteren_d; - riscv::xlen_t sscratch_q, sscratch_d; - riscv::xlen_t sepc_q, sepc_d; - riscv::xlen_t scause_q, scause_d; - riscv::xlen_t stval_q, stval_d; - riscv::xlen_t dcache_q, dcache_d; - riscv::xlen_t icache_q, icache_d; - riscv::xlen_t acc_cons_q, acc_cons_d; + logic [CVA6Cfg.XLEN-1:0] dpc_q, dpc_d; + logic [CVA6Cfg.XLEN-1:0] dscratch0_q, dscratch0_d; + logic [CVA6Cfg.XLEN-1:0] dscratch1_q, dscratch1_d; + logic [CVA6Cfg.XLEN-1:0] mtvec_q, mtvec_d; + logic [CVA6Cfg.XLEN-1:0] medeleg_q, medeleg_d; + logic [CVA6Cfg.XLEN-1:0] mideleg_q, mideleg_d; + logic [CVA6Cfg.XLEN-1:0] mip_q, mip_d; + logic [CVA6Cfg.XLEN-1:0] mie_q, mie_d; + logic [CVA6Cfg.XLEN-1:0] mcounteren_q, mcounteren_d; + logic [CVA6Cfg.XLEN-1:0] mscratch_q, mscratch_d; + logic [CVA6Cfg.XLEN-1:0] mepc_q, mepc_d; + logic [CVA6Cfg.XLEN-1:0] mcause_q, mcause_d; + logic [CVA6Cfg.XLEN-1:0] mtval_q, mtval_d; + + logic [CVA6Cfg.XLEN-1:0] stvec_q, stvec_d; + logic [CVA6Cfg.XLEN-1:0] scounteren_q, scounteren_d; + logic [CVA6Cfg.XLEN-1:0] sscratch_q, sscratch_d; + logic [CVA6Cfg.XLEN-1:0] sepc_q, sepc_d; + logic [CVA6Cfg.XLEN-1:0] scause_q, scause_d; + logic [CVA6Cfg.XLEN-1:0] stval_q, stval_d; + logic [CVA6Cfg.XLEN-1:0] dcache_q, dcache_d; + logic [CVA6Cfg.XLEN-1:0] icache_q, icache_d; + logic [CVA6Cfg.XLEN-1:0] acc_cons_q, acc_cons_d; logic wfi_d, wfi_q; @@ -151,22 +194,22 @@ module csr_regfile logic [63:0] instret_q, instret_d; riscv::pmpcfg_t [15:0] pmpcfg_q, pmpcfg_d; - logic [15:0][riscv::PLEN-3:0] pmpaddr_q, pmpaddr_d; + logic [15:0][CVA6Cfg.PLEN-3:0] pmpaddr_q, pmpaddr_d; logic [MHPMCounterNum+3-1:0] mcountinhibit_d, mcountinhibit_q; logic [3:0] index; - localparam riscv::xlen_t IsaCode = (riscv::XLEN'(CVA6Cfg.RVA) << 0) // A - Atomic Instructions extension - | (riscv::XLEN'(CVA6Cfg.RVC) << 2) // C - Compressed extension - | (riscv::XLEN'(CVA6Cfg.RVD) << 3) // D - Double precsision floating-point extension - | (riscv::XLEN'(CVA6Cfg.RVF) << 5) // F - Single precsision floating-point extension - | (riscv::XLEN'(1) << 8) // I - RV32I/64I/128I base ISA - | (riscv::XLEN'(1) << 12) // M - Integer Multiply/Divide extension - | (riscv::XLEN'(0) << 13) // N - User level interrupts supported - | (riscv::XLEN'(CVA6Cfg.RVS) << 18) // S - Supervisor mode implemented - | (riscv::XLEN'(CVA6Cfg.RVU) << 20) // U - User mode implemented - | (riscv::XLEN'(CVA6Cfg.RVV) << 21) // V - Vector extension - | (riscv::XLEN'(CVA6Cfg.NSX) << 23) // X - Non-standard extensions present - | ((riscv::XLEN == 64 ? 2 : 1) << riscv::XLEN - 2); // MXL + localparam logic [CVA6Cfg.XLEN-1:0] IsaCode = (CVA6Cfg.XLEN'(CVA6Cfg.RVA) << 0) // A - Atomic Instructions extension + | (CVA6Cfg.XLEN'(CVA6Cfg.RVC) << 2) // C - Compressed extension + | (CVA6Cfg.XLEN'(CVA6Cfg.RVD) << 3) // D - Double precsision floating-point extension + | (CVA6Cfg.XLEN'(CVA6Cfg.RVF) << 5) // F - Single precsision floating-point extension + | (CVA6Cfg.XLEN'(1) << 8) // I - RV32I/64I/128I base ISA + | (CVA6Cfg.XLEN'(1) << 12) // M - Integer Multiply/Divide extension + | (CVA6Cfg.XLEN'(0) << 13) // N - User level interrupts supported + | (CVA6Cfg.XLEN'(CVA6Cfg.RVS) << 18) // S - Supervisor mode implemented + | (CVA6Cfg.XLEN'(CVA6Cfg.RVU) << 20) // U - User mode implemented + | (CVA6Cfg.XLEN'(CVA6Cfg.RVV) << 21) // V - Vector extension + | (CVA6Cfg.XLEN'(CVA6Cfg.NSX) << 23) // X - Non-standard extensions present + | ((CVA6Cfg.XLEN == 64 ? 2 : 1) << CVA6Cfg.XLEN - 2); // MXL assign pmpcfg_o = pmpcfg_q[15:0]; assign pmpaddr_o = pmpaddr_q; @@ -181,7 +224,7 @@ module csr_regfile // ---------------- // CSR Read logic // ---------------- - assign mstatus_extended = riscv::IS_XLEN64 ? mstatus_q[riscv::XLEN-1:0] : + assign mstatus_extended = CVA6Cfg.IS_XLEN64 ? mstatus_q[CVA6Cfg.XLEN-1:0] : {mstatus_q.sd, mstatus_q.wpri3[7:0], mstatus_q[22:0]}; @@ -196,21 +239,21 @@ module csr_regfile unique case (csr_addr.address) riscv::CSR_FFLAGS: begin if (CVA6Cfg.FpPresent) begin - csr_rdata = {{riscv::XLEN - 5{1'b0}}, fcsr_q.fflags}; + csr_rdata = {{CVA6Cfg.XLEN - 5{1'b0}}, fcsr_q.fflags}; end else begin read_access_exception = 1'b1; end end riscv::CSR_FRM: begin if (CVA6Cfg.FpPresent) begin - csr_rdata = {{riscv::XLEN - 3{1'b0}}, fcsr_q.frm}; + csr_rdata = {{CVA6Cfg.XLEN - 3{1'b0}}, fcsr_q.frm}; end else begin read_access_exception = 1'b1; end end riscv::CSR_FCSR: begin if (CVA6Cfg.FpPresent) begin - csr_rdata = {{riscv::XLEN - 8{1'b0}}, fcsr_q.frm, fcsr_q.fflags}; + csr_rdata = {{CVA6Cfg.XLEN - 8{1'b0}}, fcsr_q.frm, fcsr_q.fflags}; end else begin read_access_exception = 1'b1; end @@ -218,14 +261,14 @@ module csr_regfile // non-standard extension riscv::CSR_FTRAN: begin if (CVA6Cfg.FpPresent) begin - csr_rdata = {{riscv::XLEN - 7{1'b0}}, fcsr_q.fprec}; + csr_rdata = {{CVA6Cfg.XLEN - 7{1'b0}}, fcsr_q.fprec}; end else begin read_access_exception = 1'b1; end end // debug registers riscv::CSR_DCSR: - if (CVA6Cfg.DebugEn) csr_rdata = {{riscv::XLEN - 32{1'b0}}, dcsr_q}; + if (CVA6Cfg.DebugEn) csr_rdata = {{CVA6Cfg.XLEN - 32{1'b0}}, dcsr_q}; else read_access_exception = 1'b1; riscv::CSR_DPC: if (CVA6Cfg.DebugEn) csr_rdata = dpc_q; @@ -244,7 +287,7 @@ module csr_regfile // supervisor registers riscv::CSR_SSTATUS: begin if (CVA6Cfg.RVS) - csr_rdata = mstatus_extended & ariane_pkg::SMODE_STATUS_READ_MASK[riscv::XLEN-1:0]; + csr_rdata = mstatus_extended & SMODE_STATUS_READ_MASK[CVA6Cfg.XLEN-1:0]; else read_access_exception = 1'b1; end riscv::CSR_SIE: @@ -286,7 +329,7 @@ module csr_regfile // machine mode registers riscv::CSR_MSTATUS: csr_rdata = mstatus_extended; riscv::CSR_MSTATUSH: - if (riscv::XLEN == 32) csr_rdata = '0; + if (CVA6Cfg.XLEN == 32) csr_rdata = '0; else read_access_exception = 1'b1; riscv::CSR_MISA: csr_rdata = IsaCode; riscv::CSR_MEDELEG: @@ -303,29 +346,29 @@ module csr_regfile riscv::CSR_MCAUSE: csr_rdata = mcause_q; riscv::CSR_MTVAL: csr_rdata = mtval_q; riscv::CSR_MIP: csr_rdata = mip_q; - riscv::CSR_MVENDORID: csr_rdata = OPENHWGROUP_MVENDORID; - riscv::CSR_MARCHID: csr_rdata = ARIANE_MARCHID; + riscv::CSR_MVENDORID: csr_rdata = {{CVA6Cfg.XLEN - 32{1'b0}}, OPENHWGROUP_MVENDORID}; + riscv::CSR_MARCHID: csr_rdata = {{CVA6Cfg.XLEN - 32{1'b0}}, ARIANE_MARCHID}; riscv::CSR_MIMPID: csr_rdata = '0; // not implemented riscv::CSR_MHARTID: csr_rdata = hart_id_i; riscv::CSR_MCONFIGPTR: csr_rdata = '0; // not implemented riscv::CSR_MCOUNTINHIBIT: - csr_rdata = {{(riscv::XLEN - (MHPMCounterNum + 3)) {1'b0}}, mcountinhibit_q}; + csr_rdata = {{(CVA6Cfg.XLEN - (MHPMCounterNum + 3)) {1'b0}}, mcountinhibit_q}; // Counters and Timers - riscv::CSR_MCYCLE: csr_rdata = cycle_q[riscv::XLEN-1:0]; + riscv::CSR_MCYCLE: csr_rdata = cycle_q[CVA6Cfg.XLEN-1:0]; riscv::CSR_MCYCLEH: - if (riscv::XLEN == 32) csr_rdata = cycle_q[63:32]; + if (CVA6Cfg.XLEN == 32) csr_rdata = cycle_q[63:32]; else read_access_exception = 1'b1; - riscv::CSR_MINSTRET: csr_rdata = instret_q[riscv::XLEN-1:0]; + riscv::CSR_MINSTRET: csr_rdata = instret_q[CVA6Cfg.XLEN-1:0]; riscv::CSR_MINSTRETH: - if (riscv::XLEN == 32) csr_rdata = instret_q[63:32]; + if (CVA6Cfg.XLEN == 32) csr_rdata = instret_q[63:32]; else read_access_exception = 1'b1; - riscv::CSR_CYCLE: csr_rdata = cycle_q[riscv::XLEN-1:0]; + riscv::CSR_CYCLE: csr_rdata = cycle_q[CVA6Cfg.XLEN-1:0]; riscv::CSR_CYCLEH: - if (riscv::XLEN == 32) csr_rdata = cycle_q[63:32]; + if (CVA6Cfg.XLEN == 32) csr_rdata = cycle_q[63:32]; else read_access_exception = 1'b1; - riscv::CSR_INSTRET: csr_rdata = instret_q[riscv::XLEN-1:0]; + riscv::CSR_INSTRET: csr_rdata = instret_q[CVA6Cfg.XLEN-1:0]; riscv::CSR_INSTRETH: - if (riscv::XLEN == 32) csr_rdata = instret_q[63:32]; + if (CVA6Cfg.XLEN == 32) csr_rdata = instret_q[63:32]; else read_access_exception = 1'b1; //Event Selector riscv::CSR_MHPM_EVENT_3, @@ -419,7 +462,7 @@ module csr_regfile riscv::CSR_MHPM_COUNTER_29H, riscv::CSR_MHPM_COUNTER_30H, riscv::CSR_MHPM_COUNTER_31H : - if (riscv::XLEN == 32) csr_rdata = perf_data_i; + if (CVA6Cfg.XLEN == 32) csr_rdata = perf_data_i; else read_access_exception = 1'b1; // Performance counters (User Mode - R/O Shadows) @@ -483,7 +526,7 @@ module csr_regfile riscv::CSR_HPM_COUNTER_29H, riscv::CSR_HPM_COUNTER_30H, riscv::CSR_HPM_COUNTER_31H : - if (riscv::XLEN == 32) csr_rdata = perf_data_i; + if (CVA6Cfg.XLEN == 32) csr_rdata = perf_data_i; else read_access_exception = 1'b1; // custom (non RISC-V) cache control @@ -498,13 +541,13 @@ module csr_regfile end end // PMPs - riscv::CSR_PMPCFG0: csr_rdata = pmpcfg_q[riscv::XLEN/8-1:0]; + riscv::CSR_PMPCFG0: csr_rdata = pmpcfg_q[CVA6Cfg.XLEN/8-1:0]; riscv::CSR_PMPCFG1: - if (riscv::XLEN == 32) csr_rdata = pmpcfg_q[7:4]; + if (CVA6Cfg.XLEN == 32) csr_rdata = pmpcfg_q[7:4]; else read_access_exception = 1'b1; - riscv::CSR_PMPCFG2: csr_rdata = pmpcfg_q[8+:riscv::XLEN/8]; + riscv::CSR_PMPCFG2: csr_rdata = pmpcfg_q[8+:CVA6Cfg.XLEN/8]; riscv::CSR_PMPCFG3: - if (riscv::XLEN == 32) csr_rdata = pmpcfg_q[15:12]; + if (CVA6Cfg.XLEN == 32) csr_rdata = pmpcfg_q[15:12]; else read_access_exception = 1'b1; // PMPADDR riscv::CSR_PMPADDR0, @@ -529,8 +572,8 @@ module csr_regfile // -> last bit of pmpaddr must be set 0/1 based on the mode: // NA4, NAPOT: 1 // TOR, OFF: 0 - if (pmpcfg_q[index].addr_mode[1] == 1'b1) csr_rdata = pmpaddr_q[index][riscv::PLEN-3:0]; - else csr_rdata = {pmpaddr_q[index][riscv::PLEN-3:1], 1'b0}; + if (pmpcfg_q[index].addr_mode[1] == 1'b1) csr_rdata = pmpaddr_q[index][CVA6Cfg.PLEN-3:0]; + else csr_rdata = {pmpaddr_q[index][CVA6Cfg.PLEN-3:1], 1'b0}; end default: read_access_exception = 1'b1; endcase @@ -539,9 +582,9 @@ module csr_regfile // --------------------------- // CSR Write and update logic // --------------------------- - riscv::xlen_t mask; + logic [CVA6Cfg.XLEN-1:0] mask; always_comb begin : csr_update - automatic riscv::satp_t satp; + automatic satp_t satp; automatic logic [63:0] instret; @@ -592,7 +635,7 @@ module csr_regfile // boot_addr_i will be assigned a constant // on the top-level. if (mtvec_rst_load_q) begin - mtvec_d = {{riscv::XLEN - riscv::VLEN{1'b0}}, boot_addr_i} + 'h40; + mtvec_d = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, boot_addr_i} + 'h40; end else begin mtvec_d = mtvec_q; end @@ -699,8 +742,8 @@ module csr_regfile // sstatus is a subset of mstatus - mask it accordingly riscv::CSR_SSTATUS: begin if (CVA6Cfg.RVS) begin - mask = ariane_pkg::SMODE_STATUS_WRITE_MASK[riscv::XLEN-1:0]; - mstatus_d = (mstatus_q & ~{{64-riscv::XLEN{1'b0}}, mask}) | {{64-riscv::XLEN{1'b0}}, (csr_wdata & mask)}; + mask = SMODE_STATUS_WRITE_MASK[CVA6Cfg.XLEN-1:0]; + mstatus_d = (mstatus_q & ~{{64-CVA6Cfg.XLEN{1'b0}}, mask}) | {{64-CVA6Cfg.XLEN{1'b0}}, (csr_wdata & mask)}; // hardwire to zero if floating point extension is not present if (!CVA6Cfg.FpPresent) begin mstatus_d.fs = riscv::Off; @@ -737,16 +780,16 @@ module csr_regfile end riscv::CSR_STVEC: - if (CVA6Cfg.RVS) stvec_d = {csr_wdata[riscv::XLEN-1:2], 1'b0, csr_wdata[0]}; + if (CVA6Cfg.RVS) stvec_d = {csr_wdata[CVA6Cfg.XLEN-1:2], 1'b0, csr_wdata[0]}; else update_access_exception = 1'b1; riscv::CSR_SCOUNTEREN: - if (CVA6Cfg.RVS) scounteren_d = {{riscv::XLEN - 32{1'b0}}, csr_wdata[31:0]}; + if (CVA6Cfg.RVS) scounteren_d = {{CVA6Cfg.XLEN - 32{1'b0}}, csr_wdata[31:0]}; else update_access_exception = 1'b1; riscv::CSR_SSCRATCH: if (CVA6Cfg.RVS) sscratch_d = csr_wdata; else update_access_exception = 1'b1; riscv::CSR_SEPC: - if (CVA6Cfg.RVS) sepc_d = {csr_wdata[riscv::XLEN-1:1], 1'b0}; + if (CVA6Cfg.RVS) sepc_d = {csr_wdata[CVA6Cfg.XLEN-1:1], 1'b0}; else update_access_exception = 1'b1; riscv::CSR_SCAUSE: if (CVA6Cfg.RVS) scause_d = csr_wdata; @@ -760,12 +803,12 @@ module csr_regfile // intercept SATP writes if in S-Mode and TVM is enabled if (priv_lvl_o == riscv::PRIV_LVL_S && mstatus_q.tvm) update_access_exception = 1'b1; else begin - satp = riscv::satp_t'(csr_wdata); + satp = satp_t'(csr_wdata); // only make ASID_LEN - 1 bit stick, that way software can figure out how many ASID bits are supported - satp.asid = satp.asid & {{(riscv::ASIDW - AsidWidth) {1'b0}}, {AsidWidth{1'b1}}}; + satp.asid = satp.asid & {{(CVA6Cfg.ASIDW - AsidWidth) {1'b0}}, {AsidWidth{1'b1}}}; // only update if we actually support this mode if (riscv::vm_mode_t'(satp.mode) == riscv::ModeOff || - riscv::vm_mode_t'(satp.mode) == riscv::MODE_SV) + riscv::vm_mode_t'(satp.mode) == CVA6Cfg.MODE_SV) satp_d = satp; end // changing the mode can have side-effects on address translation (e.g.: other instructions), re-fetch @@ -777,7 +820,7 @@ module csr_regfile end riscv::CSR_MSTATUS: begin - mstatus_d = {{64 - riscv::XLEN{1'b0}}, csr_wdata}; + mstatus_d = {{64 - CVA6Cfg.XLEN{1'b0}}, csr_wdata}; mstatus_d.xs = riscv::Off; if (!CVA6Cfg.FpPresent) begin mstatus_d.fs = riscv::Off; @@ -792,7 +835,7 @@ module csr_regfile // this register has side-effects on other registers, flush the pipeline flush_o = 1'b1; end - riscv::CSR_MSTATUSH: if (riscv::XLEN != 32) update_access_exception = 1'b1; + riscv::CSR_MSTATUSH: if (CVA6Cfg.XLEN != 32) update_access_exception = 1'b1; // MISA is WARL (Write Any Value, Reads Legal Value) riscv::CSR_MISA: ; // machine exception delegation register @@ -827,15 +870,15 @@ module csr_regfile end riscv::CSR_MTVEC: begin - mtvec_d = {csr_wdata[riscv::XLEN-1:2], 1'b0, csr_wdata[0]}; + mtvec_d = {csr_wdata[CVA6Cfg.XLEN-1:2], 1'b0, csr_wdata[0]}; // we are in vector mode, this implementation requires the additional // alignment constraint of 64 * 4 bytes - if (csr_wdata[0]) mtvec_d = {csr_wdata[riscv::XLEN-1:8], 7'b0, csr_wdata[0]}; + if (csr_wdata[0]) mtvec_d = {csr_wdata[CVA6Cfg.XLEN-1:8], 7'b0, csr_wdata[0]}; end - riscv::CSR_MCOUNTEREN: mcounteren_d = {{riscv::XLEN - 32{1'b0}}, csr_wdata[31:0]}; + riscv::CSR_MCOUNTEREN: mcounteren_d = {{CVA6Cfg.XLEN - 32{1'b0}}, csr_wdata[31:0]}; riscv::CSR_MSCRATCH: mscratch_d = csr_wdata; - riscv::CSR_MEPC: mepc_d = {csr_wdata[riscv::XLEN-1:1], 1'b0}; + riscv::CSR_MEPC: mepc_d = {csr_wdata[CVA6Cfg.XLEN-1:1], 1'b0}; riscv::CSR_MCAUSE: mcause_d = csr_wdata; riscv::CSR_MTVAL: mtval_d = csr_wdata; riscv::CSR_MIP: begin @@ -845,13 +888,13 @@ module csr_regfile riscv::CSR_MCOUNTINHIBIT: mcountinhibit_d = {csr_wdata[MHPMCounterNum+2:2], 1'b0, csr_wdata[0]}; // performance counters - riscv::CSR_MCYCLE: cycle_d[riscv::XLEN-1:0] = csr_wdata; + riscv::CSR_MCYCLE: cycle_d[CVA6Cfg.XLEN-1:0] = csr_wdata; riscv::CSR_MCYCLEH: - if (riscv::XLEN == 32) cycle_d[63:32] = csr_wdata; + if (CVA6Cfg.XLEN == 32) cycle_d[63:32] = csr_wdata; else update_access_exception = 1'b1; - riscv::CSR_MINSTRET: instret_d[riscv::XLEN-1:0] = csr_wdata; + riscv::CSR_MINSTRET: instret_d[CVA6Cfg.XLEN-1:0] = csr_wdata; riscv::CSR_MINSTRETH: - if (riscv::XLEN == 32) instret_d[63:32] = csr_wdata; + if (CVA6Cfg.XLEN == 32) instret_d[63:32] = csr_wdata; else update_access_exception = 1'b1; //Event Selector riscv::CSR_MHPM_EVENT_3, @@ -950,15 +993,15 @@ module csr_regfile riscv::CSR_MHPM_COUNTER_30H, riscv::CSR_MHPM_COUNTER_31H : begin perf_we_o = 1'b1; - if (riscv::XLEN == 32) perf_data_o = csr_wdata; + if (CVA6Cfg.XLEN == 32) perf_data_o = csr_wdata; else update_access_exception = 1'b1; end - riscv::CSR_DCACHE: dcache_d = {{riscv::XLEN - 1{1'b0}}, csr_wdata[0]}; // enable bit - riscv::CSR_ICACHE: icache_d = {{riscv::XLEN - 1{1'b0}}, csr_wdata[0]}; // enable bit + riscv::CSR_DCACHE: dcache_d = {{CVA6Cfg.XLEN - 1{1'b0}}, csr_wdata[0]}; // enable bit + riscv::CSR_ICACHE: icache_d = {{CVA6Cfg.XLEN - 1{1'b0}}, csr_wdata[0]}; // enable bit riscv::CSR_ACC_CONS: begin if (CVA6Cfg.EnableAccelerator) begin - acc_cons_d = {{riscv::XLEN - 1{1'b0}}, csr_wdata[0]}; // enable bit + acc_cons_d = {{CVA6Cfg.XLEN - 1{1'b0}}, csr_wdata[0]}; // enable bit end else begin update_access_exception = 1'b1; end @@ -968,10 +1011,10 @@ module csr_regfile // 2. also refuse to update the entry below a locked TOR entry // Note that writes to pmpcfg below a locked TOR entry are valid riscv::CSR_PMPCFG0: - for (int i = 0; i < (riscv::XLEN / 8); i++) + for (int i = 0; i < (CVA6Cfg.XLEN / 8); i++) if (!pmpcfg_q[i].locked) pmpcfg_d[i] = csr_wdata[i*8+:8]; riscv::CSR_PMPCFG1: begin - if (riscv::XLEN == 32) begin + if (CVA6Cfg.XLEN == 32) begin for (int i = 0; i < 4; i++) if (!pmpcfg_q[i+4].locked) pmpcfg_d[i+4] = csr_wdata[i*8+:8]; end else begin @@ -979,10 +1022,10 @@ module csr_regfile end end riscv::CSR_PMPCFG2: - for (int i = 0; i < (riscv::XLEN / 8); i++) + for (int i = 0; i < (CVA6Cfg.XLEN / 8); i++) if (!pmpcfg_q[i+8].locked) pmpcfg_d[i+8] = csr_wdata[i*8+:8]; riscv::CSR_PMPCFG3: begin - if (riscv::XLEN == 32) begin + if (CVA6Cfg.XLEN == 32) begin for (int i = 0; i < 4; i++) if (!pmpcfg_q[i+12].locked) pmpcfg_d[i+12] = csr_wdata[i*8+:8]; end else begin @@ -1009,7 +1052,7 @@ module csr_regfile automatic logic [3:0] index = csr_addr.csr_decode.address[3:0]; // check if the entry or the entry above is locked if (!pmpcfg_q[index].locked && !(pmpcfg_q[index+1].locked && pmpcfg_q[index].addr_mode == riscv::TOR)) begin - pmpaddr_d[index] = csr_wdata[riscv::PLEN-3:0]; + pmpaddr_d[index] = csr_wdata[CVA6Cfg.PLEN-3:0]; end end default: update_access_exception = 1'b1; @@ -1071,12 +1114,12 @@ module csr_regfile flush_o = 1'b0; // figure out where to trap to // a m-mode trap might be delegated if we are taking it in S mode - // first figure out if this was an exception or an interrupt e.g.: look at bit (XLEN-1) - // the cause register can only be $clog2(riscv::XLEN) bits long (as we only support XLEN exceptions) - if (CVA6Cfg.RVS && ((ex_i.cause[riscv::XLEN-1] && mideleg_q[ex_i.cause[$clog2( - riscv::XLEN - )-1:0]]) || (~ex_i.cause[riscv::XLEN-1] && medeleg_q[ex_i.cause[$clog2( - riscv::XLEN + // first figure out if this was an exception or an interrupt e.g.: look at bit (CVA6Cfg.XLEN-1) + // the cause register can only be $clog2(CVA6Cfg.XLEN) bits long (as we only support CVA6Cfg.XLEN exceptions) + if (CVA6Cfg.RVS && ((ex_i.cause[CVA6Cfg.XLEN-1] && mideleg_q[ex_i.cause[$clog2( + CVA6Cfg.XLEN + )-1:0]]) || (~ex_i.cause[CVA6Cfg.XLEN-1] && medeleg_q[ex_i.cause[$clog2( + CVA6Cfg.XLEN )-1:0]]))) begin // traps never transition from a more-privileged mode to a less privileged mode // so if we are already in M mode, stay there @@ -1093,7 +1136,7 @@ module csr_regfile // set cause scause_d = ex_i.cause; // set epc - sepc_d = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i}; + sepc_d = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{pc_i[CVA6Cfg.VLEN-1]}}, pc_i}; // set mtval or stval stval_d = (ariane_pkg::ZERO_TVAL && (ex_i.cause inside { @@ -1102,7 +1145,7 @@ module csr_regfile riscv::ENV_CALL_UMODE, riscv::ENV_CALL_SMODE, riscv::ENV_CALL_MMODE - } || ex_i.cause[riscv::XLEN-1])) ? '0 : ex_i.tval; + } || ex_i.cause[CVA6Cfg.XLEN-1])) ? '0 : ex_i.tval; // trap to machine mode end else begin // update mstatus @@ -1112,7 +1155,7 @@ module csr_regfile mstatus_d.mpp = priv_lvl_q; mcause_d = ex_i.cause; // set epc - mepc_d = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i}; + mepc_d = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{pc_i[CVA6Cfg.VLEN-1]}}, pc_i}; // set mtval or stval mtval_d = (ariane_pkg::ZERO_TVAL && (ex_i.cause inside { @@ -1121,7 +1164,7 @@ module csr_regfile riscv::ENV_CALL_UMODE, riscv::ENV_CALL_SMODE, riscv::ENV_CALL_MMODE - } || ex_i.cause[riscv::XLEN-1])) ? '0 : ex_i.tval; + } || ex_i.cause[CVA6Cfg.XLEN-1])) ? '0 : ex_i.tval; end priv_lvl_d = trap_to_priv_lvl; @@ -1165,7 +1208,7 @@ module csr_regfile default: ; endcase // save PC of next this instruction e.g.: the next one to be executed - dpc_d = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i}; + dpc_d = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{pc_i[CVA6Cfg.VLEN-1]}}, pc_i}; dcsr_d.cause = ariane_pkg::CauseBreakpoint; end @@ -1173,7 +1216,7 @@ module csr_regfile if (CVA6Cfg.DebugEn && ex_i.valid && ex_i.cause == riscv::DEBUG_REQUEST) begin dcsr_d.prv = priv_lvl_o; // save the PC - dpc_d = {{riscv::XLEN - riscv::VLEN{pc_i[riscv::VLEN-1]}}, pc_i}; + dpc_d = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{pc_i[CVA6Cfg.VLEN-1]}}, pc_i}; // enter debug mode debug_mode_d = 1'b1; // jump to the base address @@ -1189,19 +1232,19 @@ module csr_regfile if (commit_instr_i[0].fu == CTRL_FLOW) begin // we saved the correct target address during execute dpc_d = { - {riscv::XLEN - riscv::VLEN{commit_instr_i[0].bp.predict_address[riscv::VLEN-1]}}, + {CVA6Cfg.XLEN - CVA6Cfg.VLEN{commit_instr_i[0].bp.predict_address[CVA6Cfg.VLEN-1]}}, commit_instr_i[0].bp.predict_address }; // exception valid end else if (ex_i.valid) begin - dpc_d = {{riscv::XLEN - riscv::VLEN{1'b0}}, trap_vector_base_o}; + dpc_d = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, trap_vector_base_o}; // return from environment end else if (eret_o) begin - dpc_d = {{riscv::XLEN - riscv::VLEN{1'b0}}, epc_o}; + dpc_d = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, epc_o}; // consecutive PC end else begin dpc_d = { - {riscv::XLEN - riscv::VLEN{commit_instr_i[0].pc[riscv::VLEN-1]}}, + {CVA6Cfg.XLEN - CVA6Cfg.VLEN{commit_instr_i[0].pc[CVA6Cfg.VLEN-1]}}, commit_instr_i[0].pc + (commit_instr_i[0].is_compressed ? 'h2 : 'h4) }; end @@ -1220,7 +1263,7 @@ module csr_regfile // ------------------------------ // Set the address translation at which the load and stores should occur // we can use the previous values since changing the address translation will always involve a pipeline flush - if (ariane_pkg::MMU_PRESENT && mprv && CVA6Cfg.RVS && riscv::vm_mode_t'(satp_q.mode) == riscv::MODE_SV && (mstatus_q.mpp != riscv::PRIV_LVL_M)) + if (ariane_pkg::MMU_PRESENT && mprv && CVA6Cfg.RVS && riscv::vm_mode_t'(satp_q.mode) == CVA6Cfg.MODE_SV && (mstatus_q.mpp != riscv::PRIV_LVL_M)) en_ld_st_translation_d = 1'b1; else // otherwise we go with the regular settings en_ld_st_translation_d = en_translation_o; @@ -1362,7 +1405,7 @@ module csr_regfile // CSR Exception Control // ---------------------- always_comb begin : exception_ctrl - csr_exception_o = {{riscv::XLEN{1'b0}}, {riscv::XLEN{1'b0}}, 1'b0}; + csr_exception_o = {{CVA6Cfg.XLEN{1'b0}}, {CVA6Cfg.XLEN{1'b0}}, 1'b0}; // ---------------------------------- // Illegal Access (decode exception) // ---------------------------------- @@ -1400,15 +1443,15 @@ module csr_regfile // output assignments dependent on privilege mode always_comb begin : priv_output - trap_vector_base_o = {mtvec_q[riscv::VLEN-1:2], 2'b0}; + trap_vector_base_o = {mtvec_q[CVA6Cfg.VLEN-1:2], 2'b0}; // output user mode stvec if (CVA6Cfg.RVS && trap_to_priv_lvl == riscv::PRIV_LVL_S) begin - trap_vector_base_o = {stvec_q[riscv::VLEN-1:2], 2'b0}; + trap_vector_base_o = {stvec_q[CVA6Cfg.VLEN-1:2], 2'b0}; end // if we are in debug mode jump to a specific address if (CVA6Cfg.DebugEn && debug_mode_q) begin - trap_vector_base_o = CVA6Cfg.DmBaseAddress[riscv::VLEN-1:0] + CVA6Cfg.ExceptionAddress[riscv::VLEN-1:0]; + trap_vector_base_o = CVA6Cfg.DmBaseAddress[CVA6Cfg.VLEN-1:0] + CVA6Cfg.ExceptionAddress[CVA6Cfg.VLEN-1:0]; end // check if we are in vectored mode, if yes then do BASE + 4 * cause we @@ -1416,20 +1459,20 @@ module csr_regfile // we want to spare the costly addition. Furthermore check to which // privilege level we are jumping and whether the vectored mode is // activated for _that_ privilege level. - if (ex_i.cause[riscv::XLEN-1] && + if (ex_i.cause[CVA6Cfg.XLEN-1] && ((trap_to_priv_lvl == riscv::PRIV_LVL_M && mtvec_q[0]) || (CVA6Cfg.RVS && trap_to_priv_lvl == riscv::PRIV_LVL_S && stvec_q[0]))) begin trap_vector_base_o[7:2] = ex_i.cause[5:0]; end - epc_o = mepc_q[riscv::VLEN-1:0]; + epc_o = mepc_q[CVA6Cfg.VLEN-1:0]; // we are returning from supervisor mode, so take the sepc register if (CVA6Cfg.RVS && sret) begin - epc_o = sepc_q[riscv::VLEN-1:0]; + epc_o = sepc_q[CVA6Cfg.VLEN-1:0]; end // we are returning from debug mode, to take the dpc register if (CVA6Cfg.DebugEn && dret) begin - epc_o = dpc_q[riscv::VLEN-1:0]; + epc_o = dpc_q[CVA6Cfg.VLEN-1:0]; end end @@ -1444,12 +1487,12 @@ module csr_regfile unique case (csr_addr.address) riscv::CSR_MIP: - csr_rdata_o = csr_rdata | ({{riscv::XLEN - 1{1'b0}}, irq_i[1]} << riscv::IRQ_S_EXT); + csr_rdata_o = csr_rdata | ({{CVA6Cfg.XLEN - 1{1'b0}}, irq_i[1]} << riscv::IRQ_S_EXT); // in supervisor mode we also need to check whether we delegated this bit riscv::CSR_SIP: begin if (CVA6Cfg.RVS) begin csr_rdata_o = csr_rdata - | ({{riscv::XLEN-1{1'b0}}, (irq_i[1] & mideleg_q[riscv::IRQ_S_EXT])} << riscv::IRQ_S_EXT); + | ({{CVA6Cfg.XLEN-1{1'b0}}, (irq_i[1] & mideleg_q[riscv::IRQ_S_EXT])} << riscv::IRQ_S_EXT); end end default: ; @@ -1467,7 +1510,7 @@ module csr_regfile assign asid_o = satp_q.asid[AsidWidth-1:0]; assign sum_o = mstatus_q.sum; // we support bare memory addressing and SV39 - assign en_translation_o = ((CVA6Cfg.RVS && riscv::vm_mode_t'(satp_q.mode) == riscv::MODE_SV) && + assign en_translation_o = ((CVA6Cfg.RVS && riscv::vm_mode_t'(satp_q.mode) == CVA6Cfg.MODE_SV) && priv_lvl_o != riscv::PRIV_LVL_M) ? 1'b1 : 1'b0; @@ -1503,36 +1546,36 @@ module csr_regfile dcsr_q.prv <= riscv::PRIV_LVL_M; dcsr_q.xdebugver <= 4'h4; dpc_q <= '0; - dscratch0_q <= {riscv::XLEN{1'b0}}; - dscratch1_q <= {riscv::XLEN{1'b0}}; + dscratch0_q <= {CVA6Cfg.XLEN{1'b0}}; + dscratch1_q <= {CVA6Cfg.XLEN{1'b0}}; end // machine mode registers mstatus_q <= 64'b0; // set to boot address + direct mode + 4 byte offset which is the initial trap mtvec_rst_load_q <= 1'b1; mtvec_q <= '0; - mip_q <= {riscv::XLEN{1'b0}}; - mie_q <= {riscv::XLEN{1'b0}}; - mepc_q <= {riscv::XLEN{1'b0}}; - mcause_q <= {riscv::XLEN{1'b0}}; - mcounteren_q <= {riscv::XLEN{1'b0}}; - mscratch_q <= {riscv::XLEN{1'b0}}; - mtval_q <= {riscv::XLEN{1'b0}}; - dcache_q <= {{riscv::XLEN - 1{1'b0}}, 1'b1}; - icache_q <= {{riscv::XLEN - 1{1'b0}}, 1'b1}; + mip_q <= {CVA6Cfg.XLEN{1'b0}}; + mie_q <= {CVA6Cfg.XLEN{1'b0}}; + mepc_q <= {CVA6Cfg.XLEN{1'b0}}; + mcause_q <= {CVA6Cfg.XLEN{1'b0}}; + mcounteren_q <= {CVA6Cfg.XLEN{1'b0}}; + mscratch_q <= {CVA6Cfg.XLEN{1'b0}}; + mtval_q <= {CVA6Cfg.XLEN{1'b0}}; + dcache_q <= {{CVA6Cfg.XLEN - 1{1'b0}}, 1'b1}; + icache_q <= {{CVA6Cfg.XLEN - 1{1'b0}}, 1'b1}; mcountinhibit_q <= '0; - acc_cons_q <= {{riscv::XLEN - 1{1'b0}}, CVA6Cfg.EnableAccelerator}; + acc_cons_q <= {{CVA6Cfg.XLEN - 1{1'b0}}, CVA6Cfg.EnableAccelerator}; // supervisor mode registers if (CVA6Cfg.RVS) begin - medeleg_q <= {riscv::XLEN{1'b0}}; - mideleg_q <= {riscv::XLEN{1'b0}}; - sepc_q <= {riscv::XLEN{1'b0}}; - scause_q <= {riscv::XLEN{1'b0}}; - stvec_q <= {riscv::XLEN{1'b0}}; - scounteren_q <= {riscv::XLEN{1'b0}}; - sscratch_q <= {riscv::XLEN{1'b0}}; - stval_q <= {riscv::XLEN{1'b0}}; - satp_q <= {riscv::XLEN{1'b0}}; + medeleg_q <= {CVA6Cfg.XLEN{1'b0}}; + mideleg_q <= {CVA6Cfg.XLEN{1'b0}}; + sepc_q <= {CVA6Cfg.XLEN{1'b0}}; + scause_q <= {CVA6Cfg.XLEN{1'b0}}; + stvec_q <= {CVA6Cfg.XLEN{1'b0}}; + scounteren_q <= {CVA6Cfg.XLEN{1'b0}}; + sscratch_q <= {CVA6Cfg.XLEN{1'b0}}; + stval_q <= {CVA6Cfg.XLEN{1'b0}}; + satp_q <= {CVA6Cfg.XLEN{1'b0}}; end // timer and counters cycle_q <= 64'b0; diff --git a/core/cva6.sv b/core/cva6.sv index c0b6d8d247..8441520c29 100644 --- a/core/cva6.sv +++ b/core/cva6.sv @@ -18,6 +18,168 @@ module cva6 #( // CVA6 config parameter config_pkg::cva6_cfg_t CVA6Cfg = cva6_config_pkg::cva6_cfg, + + // branch-predict + // this is the struct we get back from ex stage and we will use it to update + // all the necessary data structures + // bp_resolve_t + parameter type bp_resolve_t = struct packed { + logic valid; // prediction with all its values is valid + logic [CVA6Cfg.VLEN-1:0] pc; // PC of predict or mis-predict + logic [CVA6Cfg.VLEN-1:0] target_address; // target address at which to jump, or not + logic is_mispredict; // set if this was a mis-predict + logic is_taken; // branch is taken + cf_t cf_type; // Type of control flow change + }, + + // branchpredict scoreboard entry + // this is the struct which we will inject into the pipeline to guide the various + // units towards the correct branch decision and resolve + parameter type branchpredict_sbe_t = struct packed { + cf_t cf; // type of control flow prediction + logic [CVA6Cfg.VLEN-1:0] predict_address; // target address at which to jump, or not + }, + + parameter type exception_t = struct packed { + logic [CVA6Cfg.XLEN-1:0] cause; // cause of exception + logic [CVA6Cfg.XLEN-1:0] tval; // additional information of causing exception (e.g.: instruction causing it), + // address of LD/ST fault + logic valid; + }, + + + // All information needed to determine whether we need to associate an interrupt + // with the corresponding instruction or not. + parameter type irq_ctrl_t = struct packed { + logic [CVA6Cfg.XLEN-1:0] mie; + logic [CVA6Cfg.XLEN-1:0] mip; + logic [CVA6Cfg.XLEN-1:0] mideleg; + logic sie; + logic global_enable; + }, + + parameter type fu_data_t = struct packed { + fu_t fu; + fu_op operation; + logic [CVA6Cfg.XLEN-1:0] operand_a; + logic [CVA6Cfg.XLEN-1:0] operand_b; + logic [CVA6Cfg.XLEN-1:0] imm; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id; + }, + + // IF/ID Stage + // store the decompressed instruction + parameter type fetch_entry_t = struct packed { + logic [CVA6Cfg.VLEN-1:0] address; // the address of the instructions from below + logic [31:0] instruction; // instruction word + branchpredict_sbe_t branch_predict; // this field contains branch prediction information regarding the forward branch path + exception_t ex; // this field contains exceptions which might have happened earlier, e.g.: fetch exceptions + }, + + // ID/EX/WB Stage + parameter type scoreboard_entry_t = struct packed { + logic [CVA6Cfg.VLEN-1:0] pc; // PC of instruction + logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id; // this can potentially be simplified, we could index the scoreboard entry + // with the transaction id in any case make the width more generic + fu_t fu; // functional unit to use + fu_op op; // operation to perform in each functional unit + logic [REG_ADDR_SIZE-1:0] rs1; // register source address 1 + logic [REG_ADDR_SIZE-1:0] rs2; // register source address 2 + logic [REG_ADDR_SIZE-1:0] rd; // register destination address + logic [CVA6Cfg.XLEN-1:0] result; // for unfinished instructions this field also holds the immediate, + // for unfinished floating-point that are partly encoded in rs2, this field also holds rs2 + // for unfinished floating-point fused operations (FMADD, FMSUB, FNMADD, FNMSUB) + // this field holds the address of the third operand from the floating-point register file + logic valid; // is the result valid + logic use_imm; // should we use the immediate as operand b? + logic use_zimm; // use zimm as operand a + logic use_pc; // set if we need to use the PC as operand a, PC from exception + exception_t ex; // exception has occurred + branchpredict_sbe_t bp; // branch predict scoreboard data structure + logic is_compressed; // signals a compressed instructions, we need this information at the commit stage if + // we want jump accordingly e.g.: +4, +2 + logic [CVA6Cfg.XLEN-1:0] rs1_rdata; // information needed by RVFI + logic [CVA6Cfg.XLEN-1:0] rs2_rdata; // information needed by RVFI + logic [CVA6Cfg.VLEN-1:0] lsu_addr; // information needed by RVFI + logic [(CVA6Cfg.XLEN/8)-1:0] lsu_rmask; // information needed by RVFI + logic [(CVA6Cfg.XLEN/8)-1:0] lsu_wmask; // information needed by RVFI + logic [CVA6Cfg.XLEN-1:0] lsu_wdata; // information needed by RVFI + logic vfp; // is this a vector floating-point instruction? + }, + + // I$ data requests + parameter type icache_dreq_t = struct packed { + logic req; // we request a new word + logic kill_s1; // kill the current request + logic kill_s2; // kill the last request + logic spec; // request is speculative + logic [CVA6Cfg.VLEN-1:0] vaddr; // 1st cycle: 12 bit index is taken for lookup + }, + + parameter type icache_drsp_t = struct packed { + logic ready; // icache is ready + logic valid; // signals a valid read + logic [CVA6Cfg.FETCH_WIDTH-1:0] data; // 2+ cycle out: tag + logic [CVA6Cfg.FETCH_USER_WIDTH-1:0] user; // User bits + logic [CVA6Cfg.VLEN-1:0] vaddr; // virtual address out + exception_t ex; // we've encountered an exception + }, + + // cache request ports + // I$ address translation requests + parameter type icache_areq_t = struct packed { + logic fetch_valid; // address translation valid + logic [CVA6Cfg.PLEN-1:0] fetch_paddr; // physical address in + exception_t fetch_exception; // exception occurred during fetch + }, + + parameter type icache_arsp_t = struct packed { + logic fetch_req; // address translation request + logic [CVA6Cfg.VLEN-1:0] fetch_vaddr; // virtual address out + }, + + parameter type icache_req_t = struct packed { + logic [$clog2(CVA6Cfg.ICACHE_SET_ASSOC)-1:0] way; // way to replace + logic [CVA6Cfg.PLEN-1:0] paddr; // physical address + logic nc; // noncacheable + logic [CVA6Cfg.MEM_TID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) + }, + parameter type icache_rtrn_t = struct packed { + wt_cache_pkg::icache_in_t rtype; // see definitions above + logic [CVA6Cfg.ICACHE_LINE_WIDTH-1:0] data; // full cache line width + logic [CVA6Cfg.ICACHE_USER_LINE_WIDTH-1:0] user; // user bits + struct packed { + logic vld; // invalidate only affected way + logic all; // invalidate all ways + logic [CVA6Cfg.ICACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate + logic [CVA6Cfg.ICACHE_SET_ASSOC_WIDTH-1:0] way; // way to invalidate + } inv; // invalidation vector + logic [CVA6Cfg.MEM_TID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) + }, + + // D$ data requests + parameter type dcache_req_i_t = struct packed { + logic [CVA6Cfg.DCACHE_INDEX_WIDTH-1:0] address_index; + logic [CVA6Cfg.DCACHE_TAG_WIDTH-1:0] address_tag; + logic [CVA6Cfg.XLEN-1:0] data_wdata; + logic [CVA6Cfg.DCACHE_USER_WIDTH-1:0] data_wuser; + logic data_req; + logic data_we; + logic [(CVA6Cfg.XLEN/8)-1:0] data_be; + logic [1:0] data_size; + logic [DCACHE_TID_WIDTH-1:0] data_id; + logic kill_req; + logic tag_valid; + }, + + parameter type dcache_req_o_t = struct packed { + logic data_gnt; + logic data_rvalid; + logic [DCACHE_TID_WIDTH-1:0] data_rid; + logic [CVA6Cfg.XLEN-1:0] data_rdata; + logic [CVA6Cfg.DCACHE_USER_WIDTH-1:0] data_ruser; + }, + parameter bit IsRVFI = bit'(cva6_config_pkg::CVA6ConfigRvfiTrace), // RVFI parameter type rvfi_instr_t = struct packed { @@ -25,25 +187,25 @@ module cva6 logic [config_pkg::NRET*64-1:0] order; logic [config_pkg::NRET*config_pkg::ILEN-1:0] insn; logic [config_pkg::NRET-1:0] trap; - logic [config_pkg::NRET*riscv::XLEN-1:0] cause; + logic [config_pkg::NRET*CVA6Cfg.XLEN-1:0] cause; logic [config_pkg::NRET-1:0] halt; logic [config_pkg::NRET-1:0] intr; logic [config_pkg::NRET*2-1:0] mode; logic [config_pkg::NRET*2-1:0] ixl; logic [config_pkg::NRET*5-1:0] rs1_addr; logic [config_pkg::NRET*5-1:0] rs2_addr; - logic [config_pkg::NRET*riscv::XLEN-1:0] rs1_rdata; - logic [config_pkg::NRET*riscv::XLEN-1:0] rs2_rdata; + logic [config_pkg::NRET*CVA6Cfg.XLEN-1:0] rs1_rdata; + logic [config_pkg::NRET*CVA6Cfg.XLEN-1:0] rs2_rdata; logic [config_pkg::NRET*5-1:0] rd_addr; - logic [config_pkg::NRET*riscv::XLEN-1:0] rd_wdata; - logic [config_pkg::NRET*riscv::XLEN-1:0] pc_rdata; - logic [config_pkg::NRET*riscv::XLEN-1:0] pc_wdata; - logic [config_pkg::NRET*riscv::VLEN-1:0] mem_addr; - logic [config_pkg::NRET*riscv::PLEN-1:0] mem_paddr; - logic [config_pkg::NRET*(riscv::XLEN/8)-1:0] mem_rmask; - logic [config_pkg::NRET*(riscv::XLEN/8)-1:0] mem_wmask; - logic [config_pkg::NRET*riscv::XLEN-1:0] mem_rdata; - logic [config_pkg::NRET*riscv::XLEN-1:0] mem_wdata; + logic [config_pkg::NRET*CVA6Cfg.XLEN-1:0] rd_wdata; + logic [config_pkg::NRET*CVA6Cfg.XLEN-1:0] pc_rdata; + logic [config_pkg::NRET*CVA6Cfg.XLEN-1:0] pc_wdata; + logic [config_pkg::NRET*CVA6Cfg.VLEN-1:0] mem_addr; + logic [config_pkg::NRET*CVA6Cfg.PLEN-1:0] mem_paddr; + logic [config_pkg::NRET*(CVA6Cfg.XLEN/8)-1:0] mem_rmask; + logic [config_pkg::NRET*(CVA6Cfg.XLEN/8)-1:0] mem_wmask; + logic [config_pkg::NRET*CVA6Cfg.XLEN-1:0] mem_rdata; + logic [config_pkg::NRET*CVA6Cfg.XLEN-1:0] mem_wdata; }, // AXI types parameter type axi_ar_chan_t = struct packed { @@ -119,8 +281,8 @@ module cva6 input logic clk_i, input logic rst_ni, // Core ID, Cluster ID and boot address are considered more or less static - input logic [riscv::VLEN-1:0] boot_addr_i, // reset boot address - input logic [riscv::XLEN-1:0] hart_id_i, // hart id in a multicore environment (reflected in a CSR) + input logic [CVA6Cfg.VLEN-1:0] boot_addr_i, // reset boot address + input logic [CVA6Cfg.XLEN-1:0] hart_id_i, // hart id in a multicore environment (reflected in a CSR) // Interrupt inputs input logic [1:0] irq_i, // level sensitive IR lines, mip & sip (async) input logic ipi_i, // inter-processor interrupts (async) @@ -137,87 +299,6 @@ module cva6 input noc_resp_t noc_resp_i ); - // ------------------------------------------ - // CVA6 configuration - // ------------------------------------------ - // Extended config - localparam bit RVF = (riscv::IS_XLEN64 | riscv::IS_XLEN32) & CVA6Cfg.FpuEn; - localparam bit RVD = (riscv::IS_XLEN64 ? 1 : 0) & CVA6Cfg.FpuEn; - localparam bit FpPresent = RVF | RVD | CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8; - localparam bit NSX = CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8 | CVA6Cfg.XFVec; // Are non-standard extensions present? - localparam int unsigned FLen = RVD ? 64 : // D ext. - RVF ? 32 : // F ext. - CVA6Cfg.XF16 ? 16 : // Xf16 ext. - CVA6Cfg.XF16ALT ? 16 : // Xf16alt ext. - CVA6Cfg.XF8 ? 8 : // Xf8 ext. - 1; // Unused in case of no FP - - // Transprecision floating-point extensions configuration - localparam bit RVFVec = RVF & CVA6Cfg.XFVec & FLen>32; // FP32 vectors available if vectors and larger fmt enabled - localparam bit XF16Vec = CVA6Cfg.XF16 & CVA6Cfg.XFVec & FLen>16; // FP16 vectors available if vectors and larger fmt enabled - localparam bit XF16ALTVec = CVA6Cfg.XF16ALT & CVA6Cfg.XFVec & FLen>16; // FP16ALT vectors available if vectors and larger fmt enabled - localparam bit XF8Vec = CVA6Cfg.XF8 & CVA6Cfg.XFVec & FLen>8; // FP8 vectors available if vectors and larger fmt enabled - - localparam bit EnableAccelerator = CVA6Cfg.RVV; // Currently only used by V extension (Ara) - localparam int unsigned NrWbPorts = (CVA6Cfg.CvxifEn || EnableAccelerator) ? 5 : 4; - - localparam NrRgprPorts = 2; - - localparam config_pkg::cva6_cfg_t CVA6ExtendCfg = { - CVA6Cfg.NrCommitPorts, - CVA6Cfg.AxiAddrWidth, - CVA6Cfg.AxiDataWidth, - CVA6Cfg.AxiIdWidth, - CVA6Cfg.AxiUserWidth, - CVA6Cfg.NrLoadBufEntries, - CVA6Cfg.FpuEn, - CVA6Cfg.XF16, - CVA6Cfg.XF16ALT, - CVA6Cfg.XF8, - CVA6Cfg.RVA, - CVA6Cfg.RVV, - CVA6Cfg.RVC, - CVA6Cfg.RVZCB, - CVA6Cfg.XFVec, - CVA6Cfg.CvxifEn, - CVA6Cfg.ZiCondExtEn, - // Extended - bit'(RVF), - bit'(RVD), - bit'(FpPresent), - bit'(NSX), - unsigned'(FLen), - bit'(RVFVec), - bit'(XF16Vec), - bit'(XF16ALTVec), - bit'(XF8Vec), - unsigned'(NrRgprPorts), - unsigned'(NrWbPorts), - bit'(EnableAccelerator), - CVA6Cfg.RVS, - CVA6Cfg.RVU, - CVA6Cfg.HaltAddress, - CVA6Cfg.ExceptionAddress, - CVA6Cfg.RASDepth, - CVA6Cfg.BTBEntries, - CVA6Cfg.BHTEntries, - CVA6Cfg.DmBaseAddress, - CVA6Cfg.NrPMPEntries, - CVA6Cfg.NOCType, - CVA6Cfg.NrNonIdempotentRules, - CVA6Cfg.NonIdempotentAddrBase, - CVA6Cfg.NonIdempotentLength, - CVA6Cfg.NrExecuteRegionRules, - CVA6Cfg.ExecuteRegionAddrBase, - CVA6Cfg.ExecuteRegionLength, - CVA6Cfg.NrCachedRegionRules, - CVA6Cfg.CachedRegionAddrBase, - CVA6Cfg.CachedRegionLength, - CVA6Cfg.MaxOutstandingStores, - CVA6Cfg.DebugEn - }; - - // ------------------------------------------ // Global Signals // Signals connecting more than one module @@ -225,9 +306,9 @@ module cva6 riscv::priv_lvl_t priv_lvl; exception_t ex_commit; // exception from commit stage bp_resolve_t resolved_branch; - logic [ riscv::VLEN-1:0] pc_commit; + logic [ CVA6Cfg.VLEN-1:0] pc_commit; logic eret; - logic [CVA6ExtendCfg.NrCommitPorts-1:0] commit_ack; + logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack; localparam NumPorts = 4; cvxif_pkg::cvxif_req_t cvxif_req; @@ -236,8 +317,8 @@ module cva6 // -------------- // PCGEN <-> CSR // -------------- - logic [riscv::VLEN-1:0] trap_vector_base_commit_pcgen; - logic [riscv::VLEN-1:0] epc_commit_pcgen; + logic [CVA6Cfg.VLEN-1:0] trap_vector_base_commit_pcgen; + logic [CVA6Cfg.VLEN-1:0] epc_commit_pcgen; // -------------- // IF <-> ID // -------------- @@ -256,17 +337,17 @@ module cva6 // -------------- // ISSUE <-> EX // -------------- - logic [riscv::VLEN-1:0] rs1_forwarding_id_ex; // unregistered version of fu_data_o.operanda - logic [riscv::VLEN-1:0] rs2_forwarding_id_ex; // unregistered version of fu_data_o.operandb + logic [CVA6Cfg.VLEN-1:0] rs1_forwarding_id_ex; // unregistered version of fu_data_o.operanda + logic [CVA6Cfg.VLEN-1:0] rs2_forwarding_id_ex; // unregistered version of fu_data_o.operandb fu_data_t fu_data_id_ex; - logic [riscv::VLEN-1:0] pc_id_ex; + logic [CVA6Cfg.VLEN-1:0] pc_id_ex; logic is_compressed_instr_id_ex; // fixed latency units logic flu_ready_ex_id; - logic [TRANS_ID_BITS-1:0] flu_trans_id_ex_id; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] flu_trans_id_ex_id; logic flu_valid_ex_id; - riscv::xlen_t flu_result_ex_id; + logic [CVA6Cfg.XLEN-1:0] flu_result_ex_id; exception_t flu_exception_ex_id; // ALU logic alu_valid_id_ex; @@ -279,13 +360,13 @@ module cva6 logic lsu_valid_id_ex; logic lsu_ready_ex_id; - logic [TRANS_ID_BITS-1:0] load_trans_id_ex_id; - riscv::xlen_t load_result_ex_id; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] load_trans_id_ex_id; + logic [CVA6Cfg.XLEN-1:0] load_result_ex_id; logic load_valid_ex_id; exception_t load_exception_ex_id; - riscv::xlen_t store_result_ex_id; - logic [TRANS_ID_BITS-1:0] store_trans_id_ex_id; + logic [CVA6Cfg.XLEN-1:0] store_result_ex_id; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] store_trans_id_ex_id; logic store_valid_ex_id; exception_t store_exception_ex_id; // MULT @@ -295,16 +376,16 @@ module cva6 logic fpu_valid_id_ex; logic [1:0] fpu_fmt_id_ex; logic [2:0] fpu_rm_id_ex; - logic [TRANS_ID_BITS-1:0] fpu_trans_id_ex_id; - riscv::xlen_t fpu_result_ex_id; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] fpu_trans_id_ex_id; + logic [CVA6Cfg.XLEN-1:0] fpu_result_ex_id; logic fpu_valid_ex_id; exception_t fpu_exception_ex_id; // Accelerator logic stall_acc_id; scoreboard_entry_t issue_instr_id_acc; logic issue_instr_hs_id_acc; - logic [TRANS_ID_BITS-1:0] acc_trans_id_ex_id; - riscv::xlen_t acc_result_ex_id; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] acc_trans_id_ex_id; + logic [CVA6Cfg.XLEN-1:0] acc_result_ex_id; logic acc_valid_ex_id; exception_t acc_exception_ex_id; logic halt_acc_ctrl; @@ -313,8 +394,8 @@ module cva6 // CSR logic csr_valid_id_ex; // CVXIF - logic [TRANS_ID_BITS-1:0] x_trans_id_ex_id; - riscv::xlen_t x_result_ex_id; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] x_trans_id_ex_id; + logic [CVA6Cfg.XLEN-1:0] x_result_ex_id; logic x_valid_ex_id; exception_t x_exception_ex_id; logic x_we_ex_id; @@ -331,7 +412,7 @@ module cva6 // LSU Commit logic lsu_commit_commit_ex; logic lsu_commit_ready_ex_commit; - logic [TRANS_ID_BITS-1:0] lsu_commit_trans_id; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] lsu_commit_trans_id; logic stall_st_pending_ex; logic no_st_pending_ex; logic no_st_pending_commit; @@ -341,14 +422,14 @@ module cva6 // -------------- // ID <-> COMMIT // -------------- - scoreboard_entry_t [CVA6ExtendCfg.NrCommitPorts-1:0] commit_instr_id_commit; + scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_id_commit; // -------------- // COMMIT <-> ID // -------------- - logic [CVA6ExtendCfg.NrCommitPorts-1:0][4:0] waddr_commit_id; - logic [CVA6ExtendCfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_commit_id; - logic [CVA6ExtendCfg.NrCommitPorts-1:0] we_gpr_commit_id; - logic [CVA6ExtendCfg.NrCommitPorts-1:0] we_fpr_commit_id; + logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_commit_id; + logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.XLEN-1:0] wdata_commit_id; + logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_commit_id; + logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_commit_id; // -------------- // CSR <-> * // -------------- @@ -362,12 +443,12 @@ module cva6 riscv::priv_lvl_t ld_st_priv_lvl_csr_ex; logic sum_csr_ex; logic mxr_csr_ex; - logic [riscv::PPNW-1:0] satp_ppn_csr_ex; - logic [ASID_WIDTH-1:0] asid_csr_ex; + logic [CVA6Cfg.PPNW-1:0] satp_ppn_csr_ex; + logic [CVA6Cfg.ASID_WIDTH-1:0] asid_csr_ex; logic [11:0] csr_addr_ex_csr; fu_op csr_op_commit_csr; - riscv::xlen_t csr_wdata_commit_csr; - riscv::xlen_t csr_rdata_csr_commit; + logic [CVA6Cfg.XLEN-1:0] csr_wdata_commit_csr; + logic [CVA6Cfg.XLEN-1:0] csr_rdata_csr_commit; exception_t csr_exception_csr_commit; logic tvm_csr_id; logic tw_csr_id; @@ -380,13 +461,13 @@ module cva6 logic debug_mode; logic single_step_csr_commit; riscv::pmpcfg_t [15:0] pmpcfg; - logic [15:0][riscv::PLEN-3:0] pmpaddr; + logic [15:0][CVA6Cfg.PLEN-3:0] pmpaddr; logic [31:0] mcountinhibit_csr_perf; // ---------------------------- // Performance Counters <-> * // ---------------------------- logic [11:0] addr_csr_perf; - riscv::xlen_t data_csr_perf, data_perf_csr; + logic [CVA6Cfg.XLEN-1:0] data_csr_perf, data_perf_csr; logic we_csr_perf; logic icache_flush_ctrl_cache; @@ -394,7 +475,7 @@ module cva6 logic dtlb_miss_ex_perf; logic dcache_miss_cache_perf; logic icache_miss_cache_perf; - logic [ NumPorts-1:0][DCACHE_SET_ASSOC-1:0] miss_vld_bits; + logic [ NumPorts-1:0][CVA6Cfg.DCACHE_SET_ASSOC-1:0] miss_vld_bits; logic stall_issue; // -------------- // CTRL <-> * @@ -437,11 +518,11 @@ module cva6 logic dcache_commit_wbuffer_empty; logic dcache_commit_wbuffer_not_ni; - logic [ riscv::VLEN-1:0] lsu_addr; - logic [ riscv::PLEN-1:0] mem_paddr; - logic [ (riscv::XLEN/8)-1:0] lsu_rmask; - logic [ (riscv::XLEN/8)-1:0] lsu_wmask; - logic [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id; + logic [ CVA6Cfg.VLEN-1:0] lsu_addr; + logic [ CVA6Cfg.PLEN-1:0] mem_paddr; + logic [ (CVA6Cfg.XLEN/8)-1:0] lsu_rmask; + logic [ (CVA6Cfg.XLEN/8)-1:0] lsu_wmask; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] lsu_addr_trans_id; // Accelerator port logic [ 63:0] inval_addr; @@ -452,13 +533,17 @@ module cva6 // Frontend // -------------- frontend #( - .CVA6Cfg(CVA6ExtendCfg) + .CVA6Cfg(CVA6Cfg), + .bp_resolve_t(bp_resolve_t), + .fetch_entry_t(fetch_entry_t), + .icache_dreq_t(icache_dreq_t), + .icache_drsp_t(icache_drsp_t) ) i_frontend ( .flush_i (flush_ctrl_if), // not entirely correct .flush_bp_i (1'b0), .halt_i (halt_ctrl), .debug_mode_i (debug_mode), - .boot_addr_i (boot_addr_i[riscv::VLEN-1:0]), + .boot_addr_i (boot_addr_i[CVA6Cfg.VLEN-1:0]), .icache_dreq_i (icache_dreq_cache_if), .icache_dreq_o (icache_dreq_if_cache), .resolved_branch_i (resolved_branch), @@ -479,7 +564,12 @@ module cva6 // ID // --------- id_stage #( - .CVA6Cfg(CVA6ExtendCfg) + .CVA6Cfg(CVA6Cfg), + .exception_t(exception_t), + .branchpredict_sbe_t(branchpredict_sbe_t), + .irq_ctrl_t(irq_ctrl_t), + .fetch_entry_t(fetch_entry_t), + .scoreboard_entry_t(scoreboard_entry_t) ) id_stage_i ( .clk_i, .rst_ni, @@ -507,12 +597,12 @@ module cva6 .tsr_i (tsr_csr_id) ); - logic [NrWbPorts-1:0][TRANS_ID_BITS-1:0] trans_id_ex_id; - logic [NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_ex_id; - exception_t [NrWbPorts-1:0] ex_ex_ex_id; // exception from execute, ex_stage to id_stage - logic [NrWbPorts-1:0] wt_valid_ex_id; + logic [CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.TRANS_ID_BITS-1:0] trans_id_ex_id; + logic [CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.XLEN-1:0] wbdata_ex_id; + exception_t [CVA6Cfg.NrWbPorts-1:0] ex_ex_ex_id; // exception from execute, ex_stage to id_stage + logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_ex_id; - if (CVA6ExtendCfg.CvxifEn) begin + if (CVA6Cfg.CvxifEn) begin assign trans_id_ex_id = { x_trans_id_ex_id, flu_trans_id_ex_id, @@ -533,7 +623,7 @@ module cva6 assign wt_valid_ex_id = { x_valid_ex_id, flu_valid_ex_id, load_valid_ex_id, store_valid_ex_id, fpu_valid_ex_id }; - end else if (CVA6ExtendCfg.EnableAccelerator) begin + end else if (CVA6Cfg.EnableAccelerator) begin assign trans_id_ex_id = { flu_trans_id_ex_id, load_trans_id_ex_id, @@ -567,7 +657,7 @@ module cva6 assign wt_valid_ex_id = {flu_valid_ex_id, load_valid_ex_id, store_valid_ex_id, fpu_valid_ex_id}; end - if (CVA6ExtendCfg.CvxifEn && CVA6ExtendCfg.EnableAccelerator) begin : gen_err_xif_and_acc + if (CVA6Cfg.CvxifEn && CVA6Cfg.EnableAccelerator) begin : gen_err_xif_and_acc $error("X-interface and accelerator port cannot be enabled at the same time."); end @@ -575,9 +665,14 @@ module cva6 // Issue // --------- issue_stage #( - .CVA6Cfg (CVA6ExtendCfg), + .CVA6Cfg (CVA6Cfg), + .exception_t(exception_t), + .bp_resolve_t(bp_resolve_t), + .branchpredict_sbe_t(branchpredict_sbe_t), + .fu_data_t(fu_data_t), + .scoreboard_entry_t(scoreboard_entry_t), .IsRVFI (IsRVFI), - .NR_ENTRIES(NR_SB_ENTRIES) + .NR_ENTRIES(CVA6Cfg.NR_SB_ENTRIES) ) issue_stage_i ( .clk_i, .rst_ni, @@ -651,8 +746,17 @@ module cva6 // EX // --------- ex_stage #( - .CVA6Cfg (CVA6ExtendCfg), - .ASID_WIDTH(ASID_WIDTH) + .CVA6Cfg (CVA6Cfg), + .exception_t(exception_t), + .bp_resolve_t(bp_resolve_t), + .branchpredict_sbe_t(branchpredict_sbe_t), + .fu_data_t(fu_data_t), + .icache_areq_t(icache_areq_t), + .icache_arsp_t(icache_arsp_t), + .icache_dreq_t(icache_dreq_t), + .icache_drsp_t(icache_drsp_t), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t) ) ex_stage_i ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -768,7 +872,9 @@ module cva6 assign no_st_pending_commit = no_st_pending_ex & dcache_commit_wbuffer_empty; commit_stage #( - .CVA6Cfg(CVA6ExtendCfg) + .CVA6Cfg(CVA6Cfg), + .exception_t(exception_t), + .scoreboard_entry_t(scoreboard_entry_t) ) commit_stage_i ( .clk_i, .rst_ni, @@ -807,16 +913,19 @@ module cva6 // CSR // --------- csr_regfile #( - .CVA6Cfg (CVA6ExtendCfg), - .AsidWidth (ASID_WIDTH), + .CVA6Cfg (CVA6Cfg), + .exception_t(exception_t), + .irq_ctrl_t(irq_ctrl_t), + .scoreboard_entry_t(scoreboard_entry_t), + .AsidWidth (CVA6Cfg.ASID_WIDTH), .MHPMCounterNum(MHPMCounterNum) ) csr_regfile_i ( .flush_o (flush_csr_ctrl), .halt_csr_o (halt_csr_ctrl), .commit_instr_i (commit_instr_id_commit), .commit_ack_i (commit_ack), - .boot_addr_i (boot_addr_i[riscv::VLEN-1:0]), - .hart_id_i (hart_id_i[riscv::XLEN-1:0]), + .boot_addr_i (boot_addr_i[CVA6Cfg.VLEN-1:0]), + .hart_id_i (hart_id_i[CVA6Cfg.XLEN-1:0]), .ex_i (ex_commit), .csr_op_i (csr_op_commit_csr), .csr_write_fflags_i (csr_write_fflags_commit_cs), @@ -874,7 +983,13 @@ module cva6 // ------------------------ if (PERF_COUNTER_EN) begin : gen_perf_counter perf_counters #( - .CVA6Cfg (CVA6ExtendCfg), + .CVA6Cfg (CVA6Cfg), + .exception_t(exception_t), + .bp_resolve_t(bp_resolve_t), + .scoreboard_entry_t(scoreboard_entry_t), + .icache_dreq_t(icache_dreq_t), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t), .NumPorts(NumPorts) ) perf_counters_i ( .clk_i (clk_i), @@ -913,7 +1028,8 @@ module cva6 // Controller // ------------ controller #( - .CVA6Cfg(CVA6ExtendCfg) + .CVA6Cfg(CVA6Cfg), + .bp_resolve_t(bp_resolve_t) ) controller_i ( // flush ports .set_pc_commit_o (set_pc_ctrl_pcgen), @@ -977,7 +1093,15 @@ module cva6 if (DCACHE_TYPE == int'(config_pkg::WT)) begin : gen_cache_wt // this is a cache subsystem that is compatible with OpenPiton wt_cache_subsystem #( - .CVA6Cfg (CVA6ExtendCfg), + .CVA6Cfg (CVA6Cfg), + .icache_areq_t(icache_areq_t), + .icache_arsp_t(icache_arsp_t), + .icache_dreq_t(icache_dreq_t), + .icache_drsp_t(icache_drsp_t), + .icache_req_t(icache_req_t), + .icache_rtrn_t(icache_rtrn_t), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t), .NumPorts (NumPorts), .noc_req_t (noc_req_t), .noc_resp_t(noc_resp_t) @@ -1017,7 +1141,15 @@ module cva6 ); end else if (DCACHE_TYPE == int'(config_pkg::HPDCACHE)) begin : gen_cache_hpd cva6_hpdcache_subsystem #( - .CVA6Cfg (CVA6ExtendCfg), + .CVA6Cfg (CVA6Cfg), + .icache_areq_t(icache_areq_t), + .icache_arsp_t(icache_arsp_t), + .icache_dreq_t(icache_dreq_t), + .icache_drsp_t(icache_drsp_t), + .icache_req_t(icache_req_t), + .icache_rtrn_t(icache_rtrn_t), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t), .NumPorts (NumPorts), .noc_req_t (noc_req_t), .noc_resp_t(noc_resp_t), @@ -1072,7 +1204,15 @@ module cva6 // note: this only works with one cacheable region // not as important since this cache subsystem is about to be // deprecated - .CVA6Cfg (CVA6ExtendCfg), + .CVA6Cfg (CVA6Cfg), + .icache_areq_t(icache_areq_t), + .icache_arsp_t(icache_arsp_t), + .icache_dreq_t(icache_dreq_t), + .icache_drsp_t(icache_drsp_t), + .icache_req_t(icache_req_t), + .icache_rtrn_t(icache_rtrn_t), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t), .NumPorts (NumPorts), .axi_ar_chan_t(axi_ar_chan_t), .axi_aw_chan_t(axi_aw_chan_t), @@ -1117,9 +1257,14 @@ module cva6 // Accelerator // ---------------- - if (CVA6ExtendCfg.EnableAccelerator) begin : gen_accelerator + if (CVA6Cfg.EnableAccelerator) begin : gen_accelerator acc_dispatcher #( - .CVA6Cfg (CVA6ExtendCfg), + .CVA6Cfg (CVA6Cfg), + .exception_t(exception_t), + .fu_data_t(fu_data_t), + .scoreboard_entry_t(scoreboard_entry_t), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t), .acc_cfg_t (acc_cfg_t), .AccCfg (AccCfg), .acc_req_t (cvxif_req_t), @@ -1204,14 +1349,15 @@ module cva6 localparam PC_QUEUE_DEPTH = 16; logic piton_pc_vld; - logic [ riscv::VLEN-1:0] piton_pc; - logic [CVA6ExtendCfg.NrCommitPorts-1:0][riscv::VLEN-1:0] pc_data; - logic [CVA6ExtendCfg.NrCommitPorts-1:0] pc_pop, pc_empty; + logic [ CVA6Cfg.VLEN-1:0] piton_pc; + logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.VLEN-1:0] pc_data; + logic [CVA6Cfg.NrCommitPorts-1:0] pc_pop, pc_empty; - for (genvar i = 0; i < CVA6ExtendCfg.NrCommitPorts; i++) begin : gen_pc_fifo + for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_pc_fifo fifo_v3 #( .DATA_WIDTH(64), - .DEPTH(PC_QUEUE_DEPTH) + .DEPTH(PC_QUEUE_DEPTH), + .FPGA_EN(CVA6Cfg.FPGA_EN) ) i_pc_fifo ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -1228,7 +1374,7 @@ module cva6 end rr_arb_tree #( - .NumIn(CVA6ExtendCfg.NrCommitPorts), + .NumIn(CVA6Cfg.NrCommitPorts), .DataWidth(64) ) i_rr_arb_tree ( .clk_i (clk_i), @@ -1283,7 +1429,7 @@ module cva6 assign tracer_if.priv_lvl = priv_lvl; assign tracer_if.debug_mode = debug_mode; - instr_tracer instr_tracer_i ( + instr_tracer #(.CVA6Cfg(CVA6Cfg)) instr_tracer_i ( .tracer_if(tracer_if), .hart_id_i ); @@ -1314,7 +1460,7 @@ module cva6 default: ; // Do nothing endcase end - for (int i = 0; i < CVA6ExtendCfg.NrCommitPorts; i++) begin + for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin if (commit_ack[i] && !commit_instr_id_commit[i].ex.valid) begin $fwrite(f, "%d 0x%0h %s (0x%h) DASM(%h)\n", cycles, commit_instr_id_commit[i].pc, mode, commit_instr_id_commit[i].ex.tval[31:0], commit_instr_id_commit[i].ex.tval[31:0]); @@ -1347,7 +1493,7 @@ module cva6 if (IsRVFI) begin always_comb begin - for (int i = 0; i < CVA6ExtendCfg.NrCommitPorts; i++) begin + for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin logic exception, mem_exception; exception = commit_instr_id_commit[i].valid && ex_commit.valid; mem_exception = exception && @@ -1371,12 +1517,12 @@ module cva6 rvfi_o[i].trap = mem_exception; rvfi_o[i].cause = ex_commit.cause; rvfi_o[i].mode = (CVA6Cfg.DebugEn && debug_mode) ? 2'b10 : priv_lvl; - rvfi_o[i].ixl = riscv::XLEN == 64 ? 2 : 1; + rvfi_o[i].ixl = CVA6Cfg.XLEN == 64 ? 2 : 1; rvfi_o[i].rs1_addr = commit_instr_id_commit[i].rs1[4:0]; rvfi_o[i].rs2_addr = commit_instr_id_commit[i].rs2[4:0]; rvfi_o[i].rd_addr = commit_instr_id_commit[i].rd[4:0]; rvfi_o[i].rd_wdata = - (CVA6ExtendCfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_id_commit[i].op)) ? + (CVA6Cfg.FpPresent && ariane_pkg::is_rd_fpr(commit_instr_id_commit[i].op)) ? commit_instr_id_commit[i].result : wdata_commit_id[i]; rvfi_o[i].pc_rdata = commit_instr_id_commit[i].pc; diff --git a/core/cva6_accel_first_pass_decoder_stub.sv b/core/cva6_accel_first_pass_decoder_stub.sv index 20ffb1e965..bb0fee085f 100644 --- a/core/cva6_accel_first_pass_decoder_stub.sv +++ b/core/cva6_accel_first_pass_decoder_stub.sv @@ -8,8 +8,9 @@ // first pass decoder. module cva6_accel_first_pass_decoder - import ariane_pkg::*; -( + import ariane_pkg::*; #( + parameter type scoreboard_entry_t = logic + ) ( input logic [31:0] instruction_i, // instruction from IF input riscv::xs_t fs_i, // floating point extension status input riscv::xs_t vs_i, // vector extension status diff --git a/core/cvxif_example/cvxif_example_coprocessor.sv b/core/cvxif_example/cvxif_example_coprocessor.sv index 08e801c334..a958678b81 100644 --- a/core/cvxif_example/cvxif_example_coprocessor.sv +++ b/core/cvxif_example/cvxif_example_coprocessor.sv @@ -112,7 +112,8 @@ module cvxif_example_coprocessor .FALL_THROUGH(1), //data_o ready and pop in the same cycle .DATA_WIDTH (64), .DEPTH (8), - .dtype (x_issue_t) + .dtype (x_issue_t), + .FPGA_EN(CVA6Cfg.FPGA_EN) ) fifo_commit_i ( .clk_i (clk_i), .rst_ni (rst_ni), diff --git a/core/cvxif_fu.sv b/core/cvxif_fu.sv index 74be472a7b..8886999545 100644 --- a/core/cvxif_fu.sv +++ b/core/cvxif_fu.sv @@ -13,7 +13,9 @@ module cvxif_fu import ariane_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type exception_t = logic, + parameter type fu_data_t = logic ) ( input logic clk_i, input logic rst_ni, @@ -24,9 +26,9 @@ module cvxif_fu output logic x_ready_o, input logic [ 31:0] x_off_instr_i, //to writeback - output logic [TRANS_ID_BITS-1:0] x_trans_id_o, + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] x_trans_id_o, output exception_t x_exception_o, - output riscv::xlen_t x_result_o, + output logic [CVA6Cfg.XLEN-1:0] x_result_o, output logic x_valid_o, output logic x_we_o, //to coprocessor @@ -36,7 +38,7 @@ module cvxif_fu localparam X_NUM_RS = ariane_pkg::NR_RGPR_PORTS; logic illegal_n, illegal_q; - logic [TRANS_ID_BITS-1:0] illegal_id_n, illegal_id_q; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] illegal_id_n, illegal_id_q; logic [31:0] illegal_instr_n, illegal_instr_q; logic [X_NUM_RS-1:0] rs_valid; @@ -79,7 +81,7 @@ module cvxif_fu x_valid_o = cvxif_resp_i.x_result_valid; //Read result only when CVXIF is enabled x_trans_id_o = x_valid_o ? cvxif_resp_i.x_result.id : '0; x_result_o = x_valid_o ? cvxif_resp_i.x_result.data : '0; - x_exception_o.cause = x_valid_o ? {{(riscv::XLEN-6){1'b0}}, cvxif_resp_i.x_result.exccode} : '0; + x_exception_o.cause = x_valid_o ? {{(CVA6Cfg.XLEN-6){1'b0}}, cvxif_resp_i.x_result.exccode} : '0; x_exception_o.valid = x_valid_o ? cvxif_resp_i.x_result.exc : '0; x_exception_o.tval = '0; x_we_o = x_valid_o ? cvxif_resp_i.x_result.we : '0; diff --git a/core/decoder.sv b/core/decoder.sv index e344c9ce74..5171bb8fd4 100644 --- a/core/decoder.sv +++ b/core/decoder.sv @@ -22,10 +22,14 @@ module decoder import ariane_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type exception_t = logic, + parameter type branchpredict_sbe_t = logic, + parameter type irq_ctrl_t = logic, + parameter type scoreboard_entry_t = logic ) ( input logic debug_req_i, // external debug request - input logic [riscv::VLEN-1:0] pc_i, // PC from IF + input logic [CVA6Cfg.VLEN-1:0] pc_i, // PC from IF input logic is_compressed_i, // is a compressed instruction input logic [15:0] compressed_instr_i, // compressed form of instruction input logic is_illegal_i, // illegal compressed instruction @@ -46,6 +50,14 @@ module decoder output scoreboard_entry_t instruction_o, // scoreboard entry to scoreboard output logic is_control_flow_instr_o // this instruction will change the control flow ); + + localparam logic [CVA6Cfg.XLEN-1:0] S_SW_INTERRUPT = (1 << (CVA6Cfg.XLEN - 1)) | CVA6Cfg.XLEN'(riscv::IRQ_S_SOFT); + localparam logic [CVA6Cfg.XLEN-1:0] M_SW_INTERRUPT = (1 << (CVA6Cfg.XLEN - 1)) | CVA6Cfg.XLEN'(riscv::IRQ_M_SOFT); + localparam logic [CVA6Cfg.XLEN-1:0] S_TIMER_INTERRUPT = (1 << (CVA6Cfg.XLEN - 1)) | CVA6Cfg.XLEN'(riscv::IRQ_S_TIMER); + localparam logic [CVA6Cfg.XLEN-1:0] M_TIMER_INTERRUPT = (1 << (CVA6Cfg.XLEN - 1)) | CVA6Cfg.XLEN'(riscv::IRQ_M_TIMER); + localparam logic [CVA6Cfg.XLEN-1:0] S_EXT_INTERRUPT = (1 << (CVA6Cfg.XLEN - 1)) | CVA6Cfg.XLEN'(riscv::IRQ_S_EXT); + localparam logic [CVA6Cfg.XLEN-1:0] M_EXT_INTERRUPT = (1 << (CVA6Cfg.XLEN - 1)) | CVA6Cfg.XLEN'(riscv::IRQ_M_EXT); + logic illegal_instr; logic illegal_instr_bm; logic illegal_instr_zic; @@ -63,12 +75,12 @@ module decoder // -------------------- enum logic [3:0] {NOIMM, IIMM, SIMM, SBIMM, UIMM, JIMM, RS3} imm_select; - riscv::xlen_t imm_i_type; - riscv::xlen_t imm_s_type; - riscv::xlen_t imm_sb_type; - riscv::xlen_t imm_u_type; - riscv::xlen_t imm_uj_type; - riscv::xlen_t imm_bi_type; + logic [CVA6Cfg.XLEN-1:0] imm_i_type; + logic [CVA6Cfg.XLEN-1:0] imm_s_type; + logic [CVA6Cfg.XLEN-1:0] imm_sb_type; + logic [CVA6Cfg.XLEN-1:0] imm_u_type; + logic [CVA6Cfg.XLEN-1:0] imm_uj_type; + logic [CVA6Cfg.XLEN-1:0] imm_bi_type; // --------------------------------------- // Accelerator instructions' first-pass decoder @@ -82,7 +94,9 @@ module decoder // This module is responsible for a light-weight decoding of accelerator instructions, // identifying them, but also whether they read/write scalar registers. // Accelerators are supposed to define this module. - cva6_accel_first_pass_decoder i_accel_decoder ( + cva6_accel_first_pass_decoder #( + .scoreboard_entry_t(scoreboard_entry_t) + ) i_accel_decoder ( .instruction_i(instruction_i), .fs_i(fs_i), .vs_i(vs_i), @@ -530,7 +544,7 @@ module decoder // Integer Reg-Reg Operations // --------------------------- end else begin - if (ariane_pkg::BITMANIP) begin + if (CVA6Cfg.BITMANIP) begin instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001 || ((instr.rtype.funct7 == 7'b000_0101) && !(instr.rtype.funct3[14]))) ? MULT : ALU; end else begin instruction_o.fu = (instr.rtype.funct7 == 7'b000_0001) ? MULT : ALU; @@ -568,7 +582,7 @@ module decoder illegal_instr_non_bm = 1'b1; end endcase - if (ariane_pkg::BITMANIP) begin + if (CVA6Cfg.BITMANIP) begin unique case ({ instr.rtype.funct7, instr.rtype.funct3 }) @@ -618,7 +632,7 @@ module decoder end //VCS coverage on unique case ({ - ariane_pkg::BITMANIP, CVA6Cfg.ZiCondExtEn + CVA6Cfg.BITMANIP, CVA6Cfg.ZiCondExtEn }) 2'b00: illegal_instr = illegal_instr_non_bm; 2'b01: illegal_instr = illegal_instr_non_bm & illegal_instr_zic; @@ -636,7 +650,7 @@ module decoder instruction_o.rs1[4:0] = instr.rtype.rs1; instruction_o.rs2[4:0] = instr.rtype.rs2; instruction_o.rd[4:0] = instr.rtype.rd; - if (riscv::IS_XLEN64) begin + if (CVA6Cfg.IS_XLEN64) begin unique case ({ instr.rtype.funct7, instr.rtype.funct3 }) @@ -653,7 +667,7 @@ module decoder {7'b000_0001, 3'b111} : instruction_o.op = ariane_pkg::REMUW; default: illegal_instr_non_bm = 1'b1; endcase - if (ariane_pkg::BITMANIP) begin + if (CVA6Cfg.BITMANIP) begin unique case ({ instr.rtype.funct7, instr.rtype.funct3 }) @@ -694,7 +708,7 @@ module decoder 3'b001: begin instruction_o.op = ariane_pkg::SLL; // Shift Left Logical by Immediate if (instr.instr[31:26] != 6'b0) illegal_instr_non_bm = 1'b1; - if (instr.instr[25] != 1'b0 && riscv::XLEN == 32) illegal_instr_non_bm = 1'b1; + if (instr.instr[25] != 1'b0 && CVA6Cfg.XLEN == 32) illegal_instr_non_bm = 1'b1; end 3'b101: begin @@ -703,10 +717,10 @@ module decoder else if (instr.instr[31:26] == 6'b010_000) instruction_o.op = ariane_pkg::SRA; // Shift Right Arithmetically by Immediate else illegal_instr_non_bm = 1'b1; - if (instr.instr[25] != 1'b0 && riscv::XLEN == 32) illegal_instr_non_bm = 1'b1; + if (instr.instr[25] != 1'b0 && CVA6Cfg.XLEN == 32) illegal_instr_non_bm = 1'b1; end endcase - if (ariane_pkg::BITMANIP) begin + if (CVA6Cfg.BITMANIP) begin unique case (instr.itype.funct3) 3'b001: begin if (instr.instr[31:25] == 7'b0110000) begin @@ -744,7 +758,7 @@ module decoder imm_select = IIMM; instruction_o.rs1[4:0] = instr.itype.rs1; instruction_o.rd[4:0] = instr.itype.rd; - if (riscv::IS_XLEN64) begin + if (CVA6Cfg.IS_XLEN64) begin unique case (instr.itype.funct3) 3'b000: instruction_o.op = ariane_pkg::ADDW; // Add Immediate 3'b001: begin @@ -760,7 +774,7 @@ module decoder end default: illegal_instr_non_bm = 1'b1; endcase - if (ariane_pkg::BITMANIP) begin + if (CVA6Cfg.BITMANIP) begin unique case (instr.itype.funct3) 3'b001: begin if (instr.instr[31:25] == 7'b0110000) begin @@ -799,7 +813,7 @@ module decoder 3'b001: instruction_o.op = ariane_pkg::SH; 3'b010: instruction_o.op = ariane_pkg::SW; 3'b011: - if (riscv::XLEN == 64) instruction_o.op = ariane_pkg::SD; + if (CVA6Cfg.XLEN == 64) instruction_o.op = ariane_pkg::SD; else illegal_instr = 1'b1; default: illegal_instr = 1'b1; endcase @@ -818,10 +832,10 @@ module decoder 3'b100: instruction_o.op = ariane_pkg::LBU; 3'b101: instruction_o.op = ariane_pkg::LHU; 3'b110: - if (riscv::XLEN == 64) instruction_o.op = ariane_pkg::LWU; + if (CVA6Cfg.XLEN == 64) instruction_o.op = ariane_pkg::LWU; else illegal_instr = 1'b1; 3'b011: - if (riscv::XLEN == 64) instruction_o.op = ariane_pkg::LD; + if (CVA6Cfg.XLEN == 64) instruction_o.op = ariane_pkg::LD; else illegal_instr = 1'b1; default: illegal_instr = 1'b1; endcase @@ -1116,7 +1130,7 @@ module decoder default: illegal_instr = 1'b1; endcase // double words - end else if (riscv::IS_XLEN64 && CVA6Cfg.RVA && instr.stype.funct3 == 3'h3) begin + end else if (CVA6Cfg.IS_XLEN64 && CVA6Cfg.RVA && instr.stype.funct3 == 3'h3) begin unique case (instr.instr[31:27]) 5'h0: instruction_o.op = ariane_pkg::AMO_ADDD; 5'h1: instruction_o.op = ariane_pkg::AMO_SWAPD; @@ -1229,10 +1243,10 @@ module decoder // Sign extend immediate // -------------------------------- always_comb begin : sign_extend - imm_i_type = {{riscv::XLEN - 12{instruction_i[31]}}, instruction_i[31:20]}; - imm_s_type = {{riscv::XLEN - 12{instruction_i[31]}}, instruction_i[31:25], instruction_i[11:7]}; + imm_i_type = {{CVA6Cfg.XLEN - 12{instruction_i[31]}}, instruction_i[31:20]}; + imm_s_type = {{CVA6Cfg.XLEN - 12{instruction_i[31]}}, instruction_i[31:25], instruction_i[11:7]}; imm_sb_type = { - {riscv::XLEN - 13{instruction_i[31]}}, + {CVA6Cfg.XLEN - 13{instruction_i[31]}}, instruction_i[31], instruction_i[7], instruction_i[30:25], @@ -1240,16 +1254,16 @@ module decoder 1'b0 }; imm_u_type = { - {riscv::XLEN - 32{instruction_i[31]}}, instruction_i[31:12], 12'b0 + {CVA6Cfg.XLEN - 32{instruction_i[31]}}, instruction_i[31:12], 12'b0 }; // JAL, AUIPC, sign extended to 64 bit imm_uj_type = { - {riscv::XLEN - 20{instruction_i[31]}}, + {CVA6Cfg.XLEN - 20{instruction_i[31]}}, instruction_i[19:12], instruction_i[20], instruction_i[30:21], 1'b0 }; - imm_bi_type = {{riscv::XLEN - 5{instruction_i[24]}}, instruction_i[24:20]}; + imm_bi_type = {{CVA6Cfg.XLEN - 5{instruction_i[24]}}, instruction_i[24:20]}; // NOIMM, IIMM, SIMM, BIMM, UIMM, JIMM, RS3 // select immediate @@ -1276,11 +1290,11 @@ module decoder end RS3: begin // result holds address of fp operand rs3 - instruction_o.result = {{riscv::XLEN - 5{1'b0}}, instr.r4type.rs3}; + instruction_o.result = {{CVA6Cfg.XLEN - 5{1'b0}}, instr.r4type.rs3}; instruction_o.use_imm = 1'b0; end default: begin - instruction_o.result = {riscv::XLEN{1'b0}}; + instruction_o.result = {CVA6Cfg.XLEN{1'b0}}; instruction_o.use_imm = 1'b0; end endcase @@ -1296,7 +1310,7 @@ module decoder // --------------------- // Exception handling // --------------------- - riscv::xlen_t interrupt_cause; + logic [CVA6Cfg.XLEN-1:0] interrupt_cause; // this instruction has already executed if the exception is valid assign instruction_o.valid = instruction_o.ex.valid; @@ -1309,7 +1323,7 @@ module decoder if (~ex_i.valid) begin // if we didn't already get an exception save the instruction here as we may need it // in the commit stage if we got a access exception to one of the CSR registers - instruction_o.ex.tval = (is_compressed_i) ? {{riscv::XLEN-16{1'b0}}, compressed_instr_i} : {{riscv::XLEN-32{1'b0}}, instruction_i}; + instruction_o.ex.tval = (is_compressed_i) ? {{CVA6Cfg.XLEN-16{1'b0}}, compressed_instr_i} : {{CVA6Cfg.XLEN-32{1'b0}}, instruction_i}; // instructions which will throw an exception are marked as valid // e.g.: they can be committed anytime and do not need to wait for any functional unit // check here if we decoded an invalid instruction or if the compressed decoder already decoded @@ -1343,61 +1357,61 @@ module decoder // we have three interrupt sources: external interrupts, software interrupts, timer interrupts (order of precedence) // for two privilege levels: Supervisor and Machine Mode // Supervisor Timer Interrupt - if (irq_ctrl_i.mie[riscv::S_TIMER_INTERRUPT[$clog2( - riscv::XLEN - )-1:0]] && irq_ctrl_i.mip[riscv::S_TIMER_INTERRUPT[$clog2( - riscv::XLEN + if (irq_ctrl_i.mie[S_TIMER_INTERRUPT[$clog2( + CVA6Cfg.XLEN + )-1:0]] && irq_ctrl_i.mip[S_TIMER_INTERRUPT[$clog2( + CVA6Cfg.XLEN )-1:0]]) begin - interrupt_cause = riscv::S_TIMER_INTERRUPT; + interrupt_cause = S_TIMER_INTERRUPT; end // Supervisor Software Interrupt - if (irq_ctrl_i.mie[riscv::S_SW_INTERRUPT[$clog2( - riscv::XLEN - )-1:0]] && irq_ctrl_i.mip[riscv::S_SW_INTERRUPT[$clog2( - riscv::XLEN + if (irq_ctrl_i.mie[S_SW_INTERRUPT[$clog2( + CVA6Cfg.XLEN + )-1:0]] && irq_ctrl_i.mip[S_SW_INTERRUPT[$clog2( + CVA6Cfg.XLEN )-1:0]]) begin - interrupt_cause = riscv::S_SW_INTERRUPT; + interrupt_cause = S_SW_INTERRUPT; end // Supervisor External Interrupt // The logical-OR of the software-writable bit and the signal from the external interrupt controller is // used to generate external interrupts to the supervisor - if (irq_ctrl_i.mie[riscv::S_EXT_INTERRUPT[$clog2( - riscv::XLEN - )-1:0]] && (irq_ctrl_i.mip[riscv::S_EXT_INTERRUPT[$clog2( - riscv::XLEN + if (irq_ctrl_i.mie[S_EXT_INTERRUPT[$clog2( + CVA6Cfg.XLEN + )-1:0]] && (irq_ctrl_i.mip[S_EXT_INTERRUPT[$clog2( + CVA6Cfg.XLEN )-1:0]] | irq_i[ariane_pkg::SupervisorIrq])) begin - interrupt_cause = riscv::S_EXT_INTERRUPT; + interrupt_cause = S_EXT_INTERRUPT; end // Machine Timer Interrupt - if (irq_ctrl_i.mip[riscv::M_TIMER_INTERRUPT[$clog2( - riscv::XLEN - )-1:0]] && irq_ctrl_i.mie[riscv::M_TIMER_INTERRUPT[$clog2( - riscv::XLEN + if (irq_ctrl_i.mip[M_TIMER_INTERRUPT[$clog2( + CVA6Cfg.XLEN + )-1:0]] && irq_ctrl_i.mie[M_TIMER_INTERRUPT[$clog2( + CVA6Cfg.XLEN )-1:0]]) begin - interrupt_cause = riscv::M_TIMER_INTERRUPT; + interrupt_cause = M_TIMER_INTERRUPT; end // Machine Mode Software Interrupt - if (irq_ctrl_i.mip[riscv::M_SW_INTERRUPT[$clog2( - riscv::XLEN - )-1:0]] && irq_ctrl_i.mie[riscv::M_SW_INTERRUPT[$clog2( - riscv::XLEN + if (irq_ctrl_i.mip[M_SW_INTERRUPT[$clog2( + CVA6Cfg.XLEN + )-1:0]] && irq_ctrl_i.mie[M_SW_INTERRUPT[$clog2( + CVA6Cfg.XLEN )-1:0]]) begin - interrupt_cause = riscv::M_SW_INTERRUPT; + interrupt_cause = M_SW_INTERRUPT; end // Machine Mode External Interrupt - if (irq_ctrl_i.mip[riscv::M_EXT_INTERRUPT[$clog2( - riscv::XLEN - )-1:0]] && irq_ctrl_i.mie[riscv::M_EXT_INTERRUPT[$clog2( - riscv::XLEN + if (irq_ctrl_i.mip[M_EXT_INTERRUPT[$clog2( + CVA6Cfg.XLEN + )-1:0]] && irq_ctrl_i.mie[M_EXT_INTERRUPT[$clog2( + CVA6Cfg.XLEN )-1:0]]) begin - interrupt_cause = riscv::M_EXT_INTERRUPT; + interrupt_cause = M_EXT_INTERRUPT; end - if (interrupt_cause[riscv::XLEN-1] && irq_ctrl_i.global_enable) begin + if (interrupt_cause[CVA6Cfg.XLEN-1] && irq_ctrl_i.global_enable) begin // However, if bit i in mideleg is set, interrupts are considered to be globally enabled if the hart’s current privilege // mode equals the delegated privilege mode (S or U) and that mode’s interrupt enable bit // (SIE or UIE in mstatus) is set, or if the current privilege mode is less than the delegated privilege mode. - if (irq_ctrl_i.mideleg[interrupt_cause[$clog2(riscv::XLEN)-1:0]]) begin + if (irq_ctrl_i.mideleg[interrupt_cause[$clog2(CVA6Cfg.XLEN)-1:0]]) begin if ((CVA6Cfg.RVS && irq_ctrl_i.sie && priv_lvl_i == riscv::PRIV_LVL_S) || (CVA6Cfg.RVU && priv_lvl_i == riscv::PRIV_LVL_U)) begin instruction_o.ex.valid = 1'b1; instruction_o.ex.cause = interrupt_cause; diff --git a/core/ex_stage.sv b/core/ex_stage.sv index 92f871c8e2..0855513557 100644 --- a/core/ex_stage.sv +++ b/core/ex_stage.sv @@ -18,22 +18,31 @@ module ex_stage import ariane_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, - parameter int unsigned ASID_WIDTH = 1 + parameter type exception_t = logic, + parameter type bp_resolve_t = logic, + parameter type branchpredict_sbe_t = logic, + parameter type fu_data_t = logic, + parameter type icache_areq_t = logic, + parameter type icache_arsp_t = logic, + parameter type icache_dreq_t = logic, + parameter type icache_drsp_t = logic, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low input logic flush_i, input logic debug_mode_i, - input logic [riscv::VLEN-1:0] rs1_forwarding_i, - input logic [riscv::VLEN-1:0] rs2_forwarding_i, + input logic [CVA6Cfg.VLEN-1:0] rs1_forwarding_i, + input logic [CVA6Cfg.VLEN-1:0] rs2_forwarding_i, input fu_data_t fu_data_i, - input logic [riscv::VLEN-1:0] pc_i, // PC of current instruction + input logic [CVA6Cfg.VLEN-1:0] pc_i, // PC of current instruction input logic is_compressed_instr_i, // we need to know if this was a compressed instruction // in order to calculate the next PC on a mis-predict // Fixed latency unit(s) - output riscv::xlen_t flu_result_o, - output logic [TRANS_ID_BITS-1:0] flu_trans_id_o, // ID of scoreboard entry at which to write back + output logic [CVA6Cfg.XLEN-1:0] flu_result_o, + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] flu_trans_id_o, // ID of scoreboard entry at which to write back output exception_t flu_exception_o, output logic flu_ready_o, // FLU is ready output logic flu_valid_o, // FLU result is valid @@ -56,17 +65,17 @@ module ex_stage input logic lsu_valid_i, // Input is valid output logic load_valid_o, - output riscv::xlen_t load_result_o, - output logic [TRANS_ID_BITS-1:0] load_trans_id_o, + output logic [CVA6Cfg.XLEN-1:0] load_result_o, + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] load_trans_id_o, output exception_t load_exception_o, output logic store_valid_o, - output riscv::xlen_t store_result_o, - output logic [TRANS_ID_BITS-1:0] store_trans_id_o, + output logic [CVA6Cfg.XLEN-1:0] store_result_o, + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] store_trans_id_o, output exception_t store_exception_o, input logic lsu_commit_i, output logic lsu_commit_ready_o, // commit queue is ready to accept another commit request - input logic [TRANS_ID_BITS-1:0] commit_tran_id_i, + input logic [CVA6Cfg.TRANS_ID_BITS-1:0] commit_tran_id_i, input logic stall_st_pending_i, output logic no_st_pending_o, input logic amo_valid_commit_i, @@ -77,17 +86,17 @@ module ex_stage input logic [2:0] fpu_rm_i, // FP rm input logic [2:0] fpu_frm_i, // FP frm csr input logic [6:0] fpu_prec_i, // FP precision control - output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o, - output riscv::xlen_t fpu_result_o, + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] fpu_trans_id_o, + output logic [CVA6Cfg.XLEN-1:0] fpu_result_o, output logic fpu_valid_o, output exception_t fpu_exception_o, // CoreV-X-Interface input logic x_valid_i, output logic x_ready_o, input logic [31:0] x_off_instr_i, - output logic [TRANS_ID_BITS-1:0] x_trans_id_o, + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] x_trans_id_o, output exception_t x_exception_o, - output riscv::xlen_t x_result_o, + output logic [CVA6Cfg.XLEN-1:0] x_result_o, output logic x_valid_o, output logic x_we_o, output cvxif_pkg::cvxif_req_t cvxif_req_o, @@ -102,8 +111,8 @@ module ex_stage input riscv::priv_lvl_t ld_st_priv_lvl_i, input logic sum_i, input logic mxr_i, - input logic [riscv::PPNW-1:0] satp_ppn_i, - input logic [ ASID_WIDTH-1:0] asid_i, + input logic [CVA6Cfg.PPNW-1:0] satp_ppn_i, + input logic [ CVA6Cfg.ASID_WIDTH-1:0] asid_i, // icache translation requests input icache_arsp_t icache_areq_i, output icache_areq_t icache_areq_o, @@ -120,14 +129,14 @@ module ex_stage output logic dtlb_miss_o, // PMPs input riscv::pmpcfg_t [15:0] pmpcfg_i, - input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, + input logic [15:0][CVA6Cfg.PLEN-3:0] pmpaddr_i, // RVFI - output [ riscv::VLEN-1:0] lsu_addr_o, - output [ riscv::PLEN-1:0] mem_paddr_o, - output [ (riscv::XLEN/8)-1:0] lsu_rmask_o, - output [ (riscv::XLEN/8)-1:0] lsu_wmask_o, - output [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_o + output [ CVA6Cfg.VLEN-1:0] lsu_addr_o, + output [ CVA6Cfg.PLEN-1:0] mem_paddr_o, + output [ (CVA6Cfg.XLEN/8)-1:0] lsu_rmask_o, + output [ (CVA6Cfg.XLEN/8)-1:0] lsu_wmask_o, + output [CVA6Cfg.TRANS_ID_BITS-1:0] lsu_addr_trans_id_o ); // ------------------------- @@ -154,15 +163,15 @@ module ex_stage logic current_instruction_is_sfence_vma; // These two register store the rs1 and rs2 parameters in case of `SFENCE_VMA` // instruction to be used for TLB flush in the next clock cycle. - logic [ASID_WIDTH-1:0] asid_to_be_flushed; - logic [riscv::VLEN-1:0] vaddr_to_be_flushed; + logic [CVA6Cfg.ASID_WIDTH-1:0] asid_to_be_flushed; + logic [CVA6Cfg.VLEN-1:0] vaddr_to_be_flushed; // from ALU to branch unit logic alu_branch_res; // branch comparison result - riscv::xlen_t alu_result, csr_result, mult_result; - logic [riscv::VLEN-1:0] branch_result; + logic [CVA6Cfg.XLEN-1:0] alu_result, csr_result, mult_result; + logic [CVA6Cfg.VLEN-1:0] branch_result; logic csr_ready, mult_ready; - logic [TRANS_ID_BITS-1:0] mult_trans_id; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] mult_trans_id; logic mult_valid; // 1. ALU (combinatorial) @@ -171,7 +180,8 @@ module ex_stage assign alu_data = (alu_valid_i | branch_valid_i) ? fu_data_i : '0; alu #( - .CVA6Cfg(CVA6Cfg) + .CVA6Cfg(CVA6Cfg), + .fu_data_t(fu_data_t) ) alu_i ( .clk_i, .rst_ni, @@ -184,7 +194,11 @@ module ex_stage // we don't silence the branch unit as this is already critical and we do // not want to add another layer of logic branch_unit #( - .CVA6Cfg(CVA6Cfg) + .CVA6Cfg(CVA6Cfg), + .exception_t(exception_t), + .bp_resolve_t(bp_resolve_t), + .branchpredict_sbe_t(branchpredict_sbe_t), + .fu_data_t(fu_data_t) ) branch_unit_i ( .clk_i, .rst_ni, @@ -205,7 +219,8 @@ module ex_stage // 3. CSR (sequential) csr_buffer #( - .CVA6Cfg(CVA6Cfg) + .CVA6Cfg(CVA6Cfg), + .fu_data_t(fu_data_t) ) csr_buffer_i ( .clk_i, .rst_ni, @@ -223,7 +238,7 @@ module ex_stage // result MUX always_comb begin // Branch result as default case - flu_result_o = {{riscv::XLEN - riscv::VLEN{1'b0}}, branch_result}; + flu_result_o = {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, branch_result}; flu_trans_id_o = fu_data_i.trans_id; // ALU result if (alu_valid_i) begin @@ -248,7 +263,8 @@ module ex_stage assign mult_data = mult_valid_i ? fu_data_i : '0; mult #( - .CVA6Cfg(CVA6Cfg) + .CVA6Cfg(CVA6Cfg), + .fu_data_t(fu_data_t) ) i_mult ( .clk_i, .rst_ni, @@ -270,7 +286,9 @@ module ex_stage assign fpu_data = fpu_valid_i ? fu_data_i : '0; fpu_wrap #( - .CVA6Cfg(CVA6Cfg) + .CVA6Cfg(CVA6Cfg), + .exception_t(exception_t), + .fu_data_t(fu_data_t) ) fpu_i ( .clk_i, .rst_ni, @@ -305,7 +323,14 @@ module ex_stage load_store_unit #( .CVA6Cfg (CVA6Cfg), - .ASID_WIDTH(ASID_WIDTH) + .exception_t(exception_t), + .fu_data_t(fu_data_t), + .icache_areq_t(icache_areq_t), + .icache_arsp_t(icache_arsp_t), + .icache_dreq_t(icache_dreq_t), + .icache_drsp_t(icache_drsp_t), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t) ) lsu_i ( .clk_i, .rst_ni, @@ -361,7 +386,9 @@ module ex_stage fu_data_t cvxif_data; assign cvxif_data = x_valid_i ? fu_data_i : '0; cvxif_fu #( - .CVA6Cfg(CVA6Cfg) + .CVA6Cfg(CVA6Cfg), + .exception_t(exception_t), + .fu_data_t(fu_data_t) ) cvxif_fu_i ( .clk_i, .rst_ni, @@ -407,7 +434,7 @@ module ex_stage // if the current instruction in EX_STAGE is a sfence.vma, in the next cycle no writes will happen end else if ((~current_instruction_is_sfence_vma) && (~((fu_data_i.operation == SFENCE_VMA) && csr_valid_i))) begin vaddr_to_be_flushed <= rs1_forwarding_i; - asid_to_be_flushed <= rs2_forwarding_i[ASID_WIDTH-1:0]; + asid_to_be_flushed <= rs2_forwarding_i[CVA6Cfg.ASID_WIDTH-1:0]; end end end else begin diff --git a/core/fpu_wrap.sv b/core/fpu_wrap.sv index 9219029d18..08413bca4b 100644 --- a/core/fpu_wrap.sv +++ b/core/fpu_wrap.sv @@ -16,7 +16,9 @@ module fpu_wrap import ariane_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type exception_t = logic, + parameter type fu_data_t = logic ) ( input logic clk_i, input logic rst_ni, @@ -29,7 +31,7 @@ module fpu_wrap input logic [ 2:0] fpu_rm_i, input logic [ 2:0] fpu_frm_i, input logic [ 6:0] fpu_prec_i, - output logic [TRANS_ID_BITS-1:0] fpu_trans_id_o, + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] fpu_trans_id_o, output logic [ CVA6Cfg.FLen-1:0] result_o, output logic fpu_valid_o, output exception_t fpu_exception_o @@ -59,7 +61,7 @@ module fpu_wrap // Features (enabled formats, vectors etc.) localparam fpnew_pkg::fpu_features_t FPU_FEATURES = '{ - Width: unsigned'(riscv::XLEN), // parameterized using XLEN + Width: unsigned'(CVA6Cfg.XLEN), // parameterized using CVA6Cfg.XLEN EnableVectors: CVA6Cfg.XFVec, EnableNanBox: 1'b1, FpFmtMask: {CVA6Cfg.RVF, CVA6Cfg.RVD, CVA6Cfg.XF16, CVA6Cfg.XF8, CVA6Cfg.XF16ALT}, @@ -108,7 +110,7 @@ module fpu_wrap logic [2:0] fpu_rm_d, fpu_rm_q, fpu_rm; logic fpu_vec_op_d, fpu_vec_op_q, fpu_vec_op; - logic [TRANS_ID_BITS-1:0] fpu_tag_d, fpu_tag_q, fpu_tag; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] fpu_tag_d, fpu_tag_q, fpu_tag; logic fpu_in_ready, fpu_in_valid; logic fpu_out_ready, fpu_out_valid; @@ -529,7 +531,7 @@ module fpu_wrap fpnew_top #( .Features (FPU_FEATURES), .Implementation(FPU_IMPLEMENTATION), - .TagType (logic [TRANS_ID_BITS-1:0]) + .TagType (logic [CVA6Cfg.TRANS_ID_BITS-1:0]) ) i_fpnew_bulk ( .clk_i, .rst_ni, diff --git a/core/frontend/bht.sv b/core/frontend/bht.sv index 8be8b0bb25..feecb2c093 100644 --- a/core/frontend/bht.sv +++ b/core/frontend/bht.sv @@ -20,24 +20,25 @@ module bht #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type bht_update_t = logic, parameter int unsigned NR_ENTRIES = 1024 ) ( input logic clk_i, input logic rst_ni, input logic flush_i, input logic debug_mode_i, - input logic [ riscv::VLEN-1:0] vpc_i, - input ariane_pkg::bht_update_t bht_update_i, + input logic [ CVA6Cfg.VLEN-1:0] vpc_i, + input bht_update_t bht_update_i, // we potentially need INSTR_PER_FETCH predictions/cycle - output ariane_pkg::bht_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] bht_prediction_o + output ariane_pkg::bht_prediction_t [CVA6Cfg.INSTR_PER_FETCH-1:0] bht_prediction_o ); // the last bit is always zero, we don't need it for indexing localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2; // re-shape the branch history table - localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH; + localparam NR_ROWS = NR_ENTRIES / CVA6Cfg.INSTR_PER_FETCH; // number of bits needed to index the row - localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH); - localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1; + localparam ROW_ADDR_BITS = $clog2(CVA6Cfg.INSTR_PER_FETCH); + localparam ROW_INDEX_BITS = CVA6Cfg.LOG2_INSTR_PER_FETCH; // number of bits we should use for prediction localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS; // we are not interested in all bits of the address @@ -47,8 +48,8 @@ module bht #( logic valid; logic [1:0] saturation_counter; } - bht_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0], - bht_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0]; + bht_d[NR_ROWS-1:0][CVA6Cfg.INSTR_PER_FETCH-1:0], + bht_q[NR_ROWS-1:0][CVA6Cfg.INSTR_PER_FETCH-1:0]; logic [$clog2(NR_ROWS)-1:0] index, update_pc; logic [ROW_INDEX_BITS-1:0] update_row_index; @@ -61,11 +62,11 @@ module bht #( assign update_row_index = '0; end - if (!ariane_pkg::FPGA_EN) begin : gen_asic_bht // ASIC TARGET + if (!CVA6Cfg.FPGA_EN) begin : gen_asic_bht // ASIC TARGET logic [1:0] saturation_counter; // prediction assignment - for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_output + for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_bht_output assign bht_prediction_o[i].valid = bht_q[index][i].valid; assign bht_prediction_o[i].taken = bht_q[index][i].saturation_counter[1] == 1'b1; end @@ -97,7 +98,7 @@ module bht #( always_ff @(posedge clk_i or negedge rst_ni) begin if (!rst_ni) begin for (int unsigned i = 0; i < NR_ROWS; i++) begin - for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin + for (int j = 0; j < CVA6Cfg.INSTR_PER_FETCH; j++) begin bht_q[i][j] <= '0; end end @@ -105,7 +106,7 @@ module bht #( // evict all entries if (flush_i) begin for (int i = 0; i < NR_ROWS; i++) begin - for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin + for (int j = 0; j < CVA6Cfg.INSTR_PER_FETCH; j++) begin bht_q[i][j].valid <= 1'b0; bht_q[i][j].saturation_counter <= 2'b10; end @@ -121,16 +122,16 @@ module bht #( // number of bits par word in the bram localparam BRAM_WORD_BITS = $bits(ariane_pkg::bht_t); logic [ ROW_INDEX_BITS-1:0] row_index; - logic [ ariane_pkg::INSTR_PER_FETCH-1:0] bht_ram_we; - logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_0; - logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_1; - logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_write_address; - logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_wdata; - logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_0; - logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_1; + logic [ CVA6Cfg.INSTR_PER_FETCH-1:0] bht_ram_we; + logic [CVA6Cfg.INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_0; + logic [CVA6Cfg.INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_read_address_1; + logic [CVA6Cfg.INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] bht_ram_write_address; + logic [ CVA6Cfg.INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_wdata; + logic [ CVA6Cfg.INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_0; + logic [ CVA6Cfg.INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] bht_ram_rdata_1; - ariane_pkg::bht_t [ ariane_pkg::INSTR_PER_FETCH-1:0] bht; - ariane_pkg::bht_t [ ariane_pkg::INSTR_PER_FETCH-1:0] bht_updated; + ariane_pkg::bht_t [ CVA6Cfg.INSTR_PER_FETCH-1:0] bht; + ariane_pkg::bht_t [ CVA6Cfg.INSTR_PER_FETCH-1:0] bht_updated; if (CVA6Cfg.RVC) begin : gen_row_index assign row_index = vpc_i[ROW_ADDR_BITS+OFFSET-1:OFFSET]; @@ -150,7 +151,7 @@ module bht #( bht_updated = '0; bht = '0; - for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin + for (int i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin if (row_index == i) begin bht_ram_read_address_0[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = index; bht_prediction_o[i].valid = bht_ram_rdata_0[i*BRAM_WORD_BITS+2]; @@ -159,7 +160,7 @@ module bht #( end if (bht_update_i.valid && !debug_mode_i) begin - for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin + for (int i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin if (update_row_index == i) begin bht_ram_read_address_1[i*$clog2(NR_ROWS)+:$clog2(NR_ROWS)] = update_pc; bht[i].saturation_counter = bht_ram_rdata_1[i*BRAM_WORD_BITS+:2]; @@ -194,7 +195,7 @@ module bht #( end end - for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_bht_ram + for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_bht_ram AsyncThreePortRam #( .ADDR_WIDTH($clog2(NR_ROWS)), .DATA_DEPTH(NR_ROWS), diff --git a/core/frontend/btb.sv b/core/frontend/btb.sv index 9500f37339..72f404f382 100644 --- a/core/frontend/btb.sv +++ b/core/frontend/btb.sv @@ -27,6 +27,8 @@ // branch target buffer module btb #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type btb_update_t = logic, + parameter type btb_prediction_t = logic, parameter int NR_ENTRIES = 8 ) ( input logic clk_i, // Clock @@ -34,23 +36,23 @@ module btb #( input logic flush_i, // flush the btb input logic debug_mode_i, - input logic [riscv::VLEN-1:0] vpc_i, // virtual PC from IF stage - input ariane_pkg::btb_update_t btb_update_i, // update btb with this information - output ariane_pkg::btb_prediction_t [ariane_pkg::INSTR_PER_FETCH-1:0] btb_prediction_o // prediction from btb + input logic [CVA6Cfg.VLEN-1:0] vpc_i, // virtual PC from IF stage + input btb_update_t btb_update_i, // update btb with this information + output btb_prediction_t [CVA6Cfg.INSTR_PER_FETCH-1:0] btb_prediction_o // prediction from btb ); // the last bit is always zero, we don't need it for indexing localparam OFFSET = CVA6Cfg.RVC == 1'b1 ? 1 : 2; // re-shape the branch history table - localparam NR_ROWS = NR_ENTRIES / ariane_pkg::INSTR_PER_FETCH; + localparam NR_ROWS = NR_ENTRIES / CVA6Cfg.INSTR_PER_FETCH; // number of bits needed to index the row - localparam ROW_ADDR_BITS = $clog2(ariane_pkg::INSTR_PER_FETCH); - localparam ROW_INDEX_BITS = CVA6Cfg.RVC == 1'b1 ? $clog2(ariane_pkg::INSTR_PER_FETCH) : 1; + localparam ROW_ADDR_BITS = $clog2(CVA6Cfg.INSTR_PER_FETCH); + localparam ROW_INDEX_BITS = CVA6Cfg.LOG2_INSTR_PER_FETCH; // number of bits we should use for prediction localparam PREDICTION_BITS = $clog2(NR_ROWS) + OFFSET + ROW_ADDR_BITS; // prevent aliasing to degrade performance localparam ANTIALIAS_BITS = 8; // number of bits par word in the bram - localparam BRAM_WORD_BITS = $bits(ariane_pkg::btb_prediction_t); + localparam BRAM_WORD_BITS = $bits(btb_prediction_t); // we are not interested in all bits of the address unread i_unread (.d_i(|vpc_i)); @@ -66,20 +68,20 @@ module btb #( assign update_row_index = '0; end - if (ariane_pkg::FPGA_EN) begin : gen_fpga_btb //FPGA TARGETS - logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_prediction; - logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_prediction; - logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_prediction; - logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_prediction; - logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_rdata_prediction; + if (CVA6Cfg.FPGA_EN) begin : gen_fpga_btb //FPGA TARGETS + logic [ CVA6Cfg.INSTR_PER_FETCH-1:0] btb_ram_csel_prediction; + logic [ CVA6Cfg.INSTR_PER_FETCH-1:0] btb_ram_we_prediction; + logic [CVA6Cfg.INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_prediction; + logic [ CVA6Cfg.INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_prediction; + logic [ CVA6Cfg.INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_rdata_prediction; - logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_csel_update; - logic [ ariane_pkg::INSTR_PER_FETCH-1:0] btb_ram_we_update; - logic [ariane_pkg::INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_update; - logic [ ariane_pkg::INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_update; + logic [ CVA6Cfg.INSTR_PER_FETCH-1:0] btb_ram_csel_update; + logic [ CVA6Cfg.INSTR_PER_FETCH-1:0] btb_ram_we_update; + logic [CVA6Cfg.INSTR_PER_FETCH*$clog2(NR_ROWS)-1:0] btb_ram_addr_update; + logic [ CVA6Cfg.INSTR_PER_FETCH*BRAM_WORD_BITS-1:0] btb_ram_wdata_update; // output matching prediction - for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output + for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_btb_output assign btb_ram_csel_prediction[i] = 1'b1; assign btb_ram_we_prediction[i] = 1'b0; assign btb_ram_wdata_prediction = '0; @@ -98,7 +100,7 @@ module btb #( btb_ram_wdata_update = '0; if (btb_update_i.valid && !debug_mode_i) begin - for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin + for (int i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin if (update_row_index == i) begin btb_ram_csel_update[i] = 1'b1; btb_ram_we_update[i] = 1'b1; @@ -111,7 +113,7 @@ module btb #( end end - for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_ram + for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_btb_ram SyncDpRam #( .ADDR_WIDTH($clog2(NR_ROWS)), .DATA_DEPTH(NR_ROWS), @@ -140,12 +142,12 @@ module btb #( // typedef for all branch target entries // we may want to try to put a tag field that fills the rest of the PC in-order to mitigate aliasing effects - ariane_pkg::btb_prediction_t - btb_d[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0], - btb_q[NR_ROWS-1:0][ariane_pkg::INSTR_PER_FETCH-1:0]; + btb_prediction_t + btb_d[NR_ROWS-1:0][CVA6Cfg.INSTR_PER_FETCH-1:0], + btb_q[NR_ROWS-1:0][CVA6Cfg.INSTR_PER_FETCH-1:0]; // output matching prediction - for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_btb_output + for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_btb_output assign btb_prediction_o[i] = btb_q[index][i]; // workaround end @@ -172,7 +174,7 @@ module btb #( // evict all entries if (flush_i) begin for (int i = 0; i < NR_ROWS; i++) begin - for (int j = 0; j < ariane_pkg::INSTR_PER_FETCH; j++) begin + for (int j = 0; j < CVA6Cfg.INSTR_PER_FETCH; j++) begin btb_q[i][j].valid <= 1'b0; end end diff --git a/core/frontend/frontend.sv b/core/frontend/frontend.sv index 335f088420..a36c1b81ec 100644 --- a/core/frontend/frontend.sv +++ b/core/frontend/frontend.sv @@ -18,7 +18,33 @@ module frontend import ariane_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type bp_resolve_t = logic, + parameter type fetch_entry_t = logic, + parameter type icache_dreq_t = logic, + parameter type icache_drsp_t = logic, + + parameter type btb_update_t = struct packed { + logic valid; + logic [CVA6Cfg.VLEN-1:0] pc; // update at PC + logic [CVA6Cfg.VLEN-1:0] target_address; + }, + + parameter type btb_prediction_t = struct packed { + logic valid; + logic [CVA6Cfg.VLEN-1:0] target_address; + }, + + parameter type ras_t = struct packed { + logic valid; + logic [CVA6Cfg.VLEN-1:0] ra; + }, + + parameter type bht_update_t = struct packed { + logic valid; + logic [CVA6Cfg.VLEN-1:0] pc; // update at PC + logic taken; + } ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -27,17 +53,17 @@ module frontend input logic halt_i, // halt commit stage input logic debug_mode_i, // global input - input logic [riscv::VLEN-1:0] boot_addr_i, + input logic [CVA6Cfg.VLEN-1:0] boot_addr_i, // Set a new PC // mispredict input bp_resolve_t resolved_branch_i, // from controller signaling a branch_predict -> update BTB // from commit, when flushing the whole pipeline input logic set_pc_commit_i, // Take the PC from commit stage - input logic [riscv::VLEN-1:0] pc_commit_i, // PC of instruction in commit stage + input logic [CVA6Cfg.VLEN-1:0] pc_commit_i, // PC of instruction in commit stage // CSR input - input logic [riscv::VLEN-1:0] epc_i, // exception PC which we need to return to + input logic [CVA6Cfg.VLEN-1:0] epc_i, // exception PC which we need to return to input logic eret_i, // return from exception - input logic [riscv::VLEN-1:0] trap_vector_base_i, // base of trap vector + input logic [CVA6Cfg.VLEN-1:0] trap_vector_base_i, // base of trap vector input logic ex_valid_i, // exception is valid - from commit input logic set_debug_pc_i, // jump to debug address // Instruction Fetch @@ -49,30 +75,30 @@ module frontend input logic fetch_entry_ready_i // ID acknowledged this instruction ); // Instruction Cache Registers, from I$ - logic [ FETCH_WIDTH-1:0] icache_data_q; + logic [ CVA6Cfg.FETCH_WIDTH-1:0] icache_data_q; logic icache_valid_q; ariane_pkg::frontend_exception_t icache_ex_valid_q; - logic [ riscv::VLEN-1:0] icache_vaddr_q; + logic [ CVA6Cfg.VLEN-1:0] icache_vaddr_q; logic instr_queue_ready; - logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_consumed; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] instr_queue_consumed; // upper-most branch-prediction from last cycle btb_prediction_t btb_q; bht_prediction_t bht_q; // instruction fetch is ready logic if_ready; - logic [riscv::VLEN-1:0] npc_d, npc_q; // next PC + logic [CVA6Cfg.VLEN-1:0] npc_d, npc_q; // next PC // indicates whether we come out of reset (then we need to load boot_addr_i) logic npc_rst_load_q; logic replay; - logic [ riscv::VLEN-1:0] replay_addr; + logic [ CVA6Cfg.VLEN-1:0] replay_addr; // shift amount - logic [$clog2(ariane_pkg::INSTR_PER_FETCH)-1:0] shamt; + logic [$clog2(CVA6Cfg.INSTR_PER_FETCH)-1:0] shamt; // address will always be 16 bit aligned, make this explicit here if (CVA6Cfg.RVC) begin : gen_shamt - assign shamt = icache_dreq_i.vaddr[$clog2(ariane_pkg::INSTR_PER_FETCH):1]; + assign shamt = icache_dreq_i.vaddr[$clog2(CVA6Cfg.INSTR_PER_FETCH):1]; end else begin assign shamt = 1'b0; end @@ -81,33 +107,33 @@ module frontend // Ctrl Flow Speculation // ----------------------- // RVI ctrl flow prediction - logic [INSTR_PER_FETCH-1:0] rvi_return, rvi_call, rvi_branch, rvi_jalr, rvi_jump; - logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvi_imm; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] rvi_return, rvi_call, rvi_branch, rvi_jalr, rvi_jump; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0][CVA6Cfg.VLEN-1:0] rvi_imm; // RVC branching - logic [INSTR_PER_FETCH-1:0] rvc_branch, rvc_jump, rvc_jr, rvc_return, rvc_jalr, rvc_call; - logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] rvc_imm; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] rvc_branch, rvc_jump, rvc_jr, rvc_return, rvc_jalr, rvc_call; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0][CVA6Cfg.VLEN-1:0] rvc_imm; // re-aligned instruction and address (coming from cache - combinationally) - logic [INSTR_PER_FETCH-1:0][ 31:0] instr; - logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr; - logic [INSTR_PER_FETCH-1:0] instruction_valid; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0][ 31:0] instr; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0][CVA6Cfg.VLEN-1:0] addr; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] instruction_valid; // BHT, BTB and RAS prediction - bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction; - btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction; - bht_prediction_t [INSTR_PER_FETCH-1:0] bht_prediction_shifted; - btb_prediction_t [INSTR_PER_FETCH-1:0] btb_prediction_shifted; + bht_prediction_t [CVA6Cfg.INSTR_PER_FETCH-1:0] bht_prediction; + btb_prediction_t [CVA6Cfg.INSTR_PER_FETCH-1:0] btb_prediction; + bht_prediction_t [CVA6Cfg.INSTR_PER_FETCH-1:0] bht_prediction_shifted; + btb_prediction_t [CVA6Cfg.INSTR_PER_FETCH-1:0] btb_prediction_shifted; ras_t ras_predict; - logic [ riscv::VLEN-1:0] vpc_btb; + logic [ CVA6Cfg.VLEN-1:0] vpc_btb; // branch-predict update logic is_mispredict; logic ras_push, ras_pop; - logic [ riscv::VLEN-1:0] ras_update; + logic [ CVA6Cfg.VLEN-1:0] ras_update; // Instruction FIFO - logic [ riscv::VLEN-1:0] predict_address; - cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type; - logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvi_cf; - logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken_rvc_cf; + logic [ CVA6Cfg.VLEN-1:0] predict_address; + cf_t [CVA6Cfg.INSTR_PER_FETCH-1:0] cf_type; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] taken_rvi_cf; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] taken_rvc_cf; logic serving_unaligned; // Re-align instructions @@ -134,17 +160,17 @@ module frontend // the prediction we saved from the previous fetch if (CVA6Cfg.RVC) begin : gen_btb_prediction_shifted assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][$clog2( - INSTR_PER_FETCH + CVA6Cfg.INSTR_PER_FETCH ):1]]; assign btb_prediction_shifted[0] = (serving_unaligned) ? btb_q : btb_prediction[addr[0][$clog2( - INSTR_PER_FETCH + CVA6Cfg.INSTR_PER_FETCH ):1]]; // for all other predictions we can use the generated address to index // into the branch prediction data structures - for (genvar i = 1; i < INSTR_PER_FETCH; i++) begin : gen_prediction_address - assign bht_prediction_shifted[i] = bht_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]]; - assign btb_prediction_shifted[i] = btb_prediction[addr[i][$clog2(INSTR_PER_FETCH):1]]; + for (genvar i = 1; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_prediction_address + assign bht_prediction_shifted[i] = bht_prediction[addr[i][$clog2(CVA6Cfg.INSTR_PER_FETCH):1]]; + assign btb_prediction_shifted[i] = btb_prediction[addr[i][$clog2(CVA6Cfg.INSTR_PER_FETCH):1]]; end end else begin assign bht_prediction_shifted[0] = (serving_unaligned) ? bht_q : bht_prediction[addr[0][1]]; @@ -156,13 +182,13 @@ module frontend // address of the call/return already logic bp_valid; - logic [INSTR_PER_FETCH-1:0] is_branch; - logic [INSTR_PER_FETCH-1:0] is_call; - logic [INSTR_PER_FETCH-1:0] is_jump; - logic [INSTR_PER_FETCH-1:0] is_return; - logic [INSTR_PER_FETCH-1:0] is_jalr; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] is_branch; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] is_call; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] is_jump; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] is_return; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] is_jalr; - for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin + for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin // branch history table -> BHT assign is_branch[i] = instruction_valid[i] & (rvi_branch[i] | rvc_branch[i]); // function calls -> RAS @@ -181,14 +207,14 @@ module frontend taken_rvc_cf = '0; predict_address = '0; - for (int i = 0; i < INSTR_PER_FETCH; i++) cf_type[i] = ariane_pkg::NoCF; + for (int i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) cf_type[i] = ariane_pkg::NoCF; ras_push = 1'b0; ras_pop = 1'b0; ras_update = '0; // lower most prediction gets precedence - for (int i = INSTR_PER_FETCH - 1; i >= 0; i--) begin + for (int i = CVA6Cfg.INSTR_PER_FETCH - 1; i >= 0; i--) begin unique case ({ is_branch[i], is_return[i], is_jump[i], is_jalr[i] }) @@ -229,8 +255,8 @@ module frontend // otherwise default to static prediction end else begin // set if immediate is negative - static prediction - taken_rvi_cf[i] = rvi_branch[i] & rvi_imm[i][riscv::VLEN-1]; - taken_rvc_cf[i] = rvc_branch[i] & rvc_imm[i][riscv::VLEN-1]; + taken_rvi_cf[i] = rvi_branch[i] & rvi_imm[i][CVA6Cfg.VLEN-1]; + taken_rvc_cf[i] = rvc_branch[i] & rvc_imm[i][CVA6Cfg.VLEN-1]; end if (taken_rvi_cf[i] || taken_rvc_cf[i]) begin cf_type[i] = ariane_pkg::Branch; @@ -257,7 +283,7 @@ module frontend // BP cannot be valid if we have a return instruction and the RAS is not giving a valid address // Check that we encountered a control flow and that for a return the RAS // contains a valid prediction. - for (int i = 0; i < INSTR_PER_FETCH; i++) + for (int i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) bp_valid |= ((cf_type[i] != NoCF & cf_type[i] != Return) | ((cf_type[i] == Return) & ras_predict.valid)); end assign is_mispredict = resolved_branch_i.valid & resolved_branch_i.is_mispredict; @@ -307,7 +333,7 @@ module frontend // Mis-predict handling is a little bit different // select PC a.k.a PC Gen always_comb begin : npc_select - automatic logic [riscv::VLEN-1:0] fetch_address; + automatic logic [CVA6Cfg.VLEN-1:0] fetch_address; // check whether we come out of reset // this is a workaround. some tools have issues // having boot_addr_i in the asynchronous @@ -329,7 +355,7 @@ module frontend end // 1. Default assignment if (if_ready) begin - npc_d = {fetch_address[riscv::VLEN-1:2], 2'b0} + 'h4; + npc_d = {fetch_address[CVA6Cfg.VLEN-1:2], 2'b0} + 'h4; end // 2. Replay instruction fetch if (replay) begin @@ -357,16 +383,16 @@ module frontend // instruction in the commit stage // TODO(zarubaf) This adder can at least be merged with the one in the csr_regfile stage if (set_pc_commit_i) begin - npc_d = pc_commit_i + (halt_i ? '0 : {{riscv::VLEN - 3{1'b0}}, 3'b100}); + npc_d = pc_commit_i + (halt_i ? '0 : {{CVA6Cfg.VLEN - 3{1'b0}}, 3'b100}); end // 7. Debug // enter debug on a hard-coded base-address if (CVA6Cfg.DebugEn && set_debug_pc_i) - npc_d = CVA6Cfg.DmBaseAddress[riscv::VLEN-1:0] + CVA6Cfg.HaltAddress[riscv::VLEN-1:0]; + npc_d = CVA6Cfg.DmBaseAddress[CVA6Cfg.VLEN-1:0] + CVA6Cfg.HaltAddress[CVA6Cfg.VLEN-1:0]; icache_dreq_o.vaddr = fetch_address; end - logic [FETCH_WIDTH-1:0] icache_data; + logic [CVA6Cfg.FETCH_WIDTH-1:0] icache_data; // re-align the cache line assign icache_data = icache_dreq_i.data >> {shamt, 4'b0}; @@ -398,8 +424,8 @@ module frontend icache_ex_valid_q <= ariane_pkg::FE_NONE; end // save the uppermost prediction - btb_q <= btb_prediction[INSTR_PER_FETCH-1]; - bht_q <= bht_prediction[INSTR_PER_FETCH-1]; + btb_q <= btb_prediction[CVA6Cfg.INSTR_PER_FETCH-1]; + bht_q <= bht_prediction[CVA6Cfg.INSTR_PER_FETCH-1]; end end end @@ -409,6 +435,7 @@ module frontend end else begin : ras_gen ras #( .CVA6Cfg(CVA6Cfg), + .ras_t(ras_t), .DEPTH (CVA6Cfg.RASDepth) ) i_ras ( .clk_i, @@ -424,13 +451,15 @@ module frontend //For FPGA, BTB is implemented in read synchronous BRAM //while for ASIC, BTB is implemented in D flip-flop //and can be read at the same cycle. - assign vpc_btb = (ariane_pkg::FPGA_EN) ? icache_dreq_i.vaddr : icache_vaddr_q; + assign vpc_btb = (CVA6Cfg.FPGA_EN) ? icache_dreq_i.vaddr : icache_vaddr_q; if (CVA6Cfg.BTBEntries == 0) begin assign btb_prediction = '0; end else begin : btb_gen btb #( .CVA6Cfg (CVA6Cfg), + .btb_update_t(btb_update_t), + .btb_prediction_t(btb_prediction_t), .NR_ENTRIES(CVA6Cfg.BTBEntries) ) i_btb ( .clk_i, @@ -448,6 +477,7 @@ module frontend end else begin : bht_gen bht #( .CVA6Cfg (CVA6Cfg), + .bht_update_t(bht_update_t), .NR_ENTRIES(CVA6Cfg.BHTEntries) ) i_bht ( .clk_i, @@ -462,7 +492,7 @@ module frontend // we need to inspect up to INSTR_PER_FETCH instructions for branches // and jumps - for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin : gen_instr_scan + for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_instr_scan instr_scan #( .CVA6Cfg(CVA6Cfg) ) i_instr_scan ( @@ -484,7 +514,8 @@ module frontend end instr_queue #( - .CVA6Cfg(CVA6Cfg) + .CVA6Cfg(CVA6Cfg), + .fetch_entry_t(fetch_entry_t) ) i_instr_queue ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -508,7 +539,7 @@ module frontend // pragma translate_off `ifndef VERILATOR initial begin - assert (FETCH_WIDTH == 32 || FETCH_WIDTH == 64) + assert (CVA6Cfg.FETCH_WIDTH == 32 || CVA6Cfg.FETCH_WIDTH == 64) else $fatal(1, "[frontend] fetch width != not supported"); end `endif diff --git a/core/frontend/instr_queue.sv b/core/frontend/instr_queue.sv index 3f955937a9..740aabbdbf 100644 --- a/core/frontend/instr_queue.sv +++ b/core/frontend/instr_queue.sv @@ -46,27 +46,28 @@ module instr_queue import ariane_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type fetch_entry_t = logic ) ( input logic clk_i, input logic rst_ni, input logic flush_i, - input logic [ariane_pkg::INSTR_PER_FETCH-1:0][31:0] instr_i, - input logic [ariane_pkg::INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_i, - input logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid_i, + input logic [CVA6Cfg.INSTR_PER_FETCH-1:0][31:0] instr_i, + input logic [CVA6Cfg.INSTR_PER_FETCH-1:0][CVA6Cfg.VLEN-1:0] addr_i, + input logic [CVA6Cfg.INSTR_PER_FETCH-1:0] valid_i, output logic ready_o, - output logic [ariane_pkg::INSTR_PER_FETCH-1:0] consumed_o, + output logic [CVA6Cfg.INSTR_PER_FETCH-1:0] consumed_o, // we've encountered an exception, at this point the only possible exceptions are page-table faults input ariane_pkg::frontend_exception_t exception_i, - input logic [riscv::VLEN-1:0] exception_addr_i, + input logic [CVA6Cfg.VLEN-1:0] exception_addr_i, // branch predict - input logic [riscv::VLEN-1:0] predict_address_i, - input ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH-1:0] cf_type_i, + input logic [CVA6Cfg.VLEN-1:0] predict_address_i, + input ariane_pkg::cf_t [CVA6Cfg.INSTR_PER_FETCH-1:0] cf_type_i, // replay instruction because one of the FIFO was already full output logic replay_o, - output logic [riscv::VLEN-1:0] replay_addr_o, // address at which to replay this instruction + output logic [CVA6Cfg.VLEN-1:0] replay_addr_o, // address at which to replay this instruction // to processor backend - output ariane_pkg::fetch_entry_t fetch_entry_o, + output fetch_entry_t fetch_entry_o, output logic fetch_entry_valid_o, input logic fetch_entry_ready_i ); @@ -75,64 +76,64 @@ module instr_queue logic [31:0] instr; // instruction word ariane_pkg::cf_t cf; // branch was taken ariane_pkg::frontend_exception_t ex; // exception happened - logic [riscv::VLEN-1:0] ex_vaddr; // lower VLEN bits of tval for exception + logic [CVA6Cfg.VLEN-1:0] ex_vaddr; // lower CVA6Cfg.VLEN bits of tval for exception } instr_data_t; - logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] branch_index; + logic [CVA6Cfg.LOG2_INSTR_PER_FETCH-1:0] branch_index; // instruction queues - logic [ariane_pkg::INSTR_PER_FETCH-1:0][$clog2( + logic [CVA6Cfg.INSTR_PER_FETCH-1:0][$clog2( ariane_pkg::FETCH_FIFO_DEPTH )-1:0] instr_queue_usage; - instr_data_t [ariane_pkg::INSTR_PER_FETCH-1:0] instr_data_in, instr_data_out; - logic [ariane_pkg::INSTR_PER_FETCH-1:0] push_instr, push_instr_fifo; - logic [ ariane_pkg::INSTR_PER_FETCH-1:0] pop_instr; - logic [ ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_full; - logic [ ariane_pkg::INSTR_PER_FETCH-1:0] instr_queue_empty; + instr_data_t [CVA6Cfg.INSTR_PER_FETCH-1:0] instr_data_in, instr_data_out; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] push_instr, push_instr_fifo; + logic [ CVA6Cfg.INSTR_PER_FETCH-1:0] pop_instr; + logic [ CVA6Cfg.INSTR_PER_FETCH-1:0] instr_queue_full; + logic [ CVA6Cfg.INSTR_PER_FETCH-1:0] instr_queue_empty; logic instr_overflow; // address queue logic [$clog2(ariane_pkg::FETCH_FIFO_DEPTH)-1:0] address_queue_usage; - logic [ riscv::VLEN-1:0] address_out; + logic [ CVA6Cfg.VLEN-1:0] address_out; logic pop_address; logic push_address; logic full_address; logic empty_address; logic address_overflow; // input stream counter - logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] idx_is_d, idx_is_q; + logic [CVA6Cfg.LOG2_INSTR_PER_FETCH-1:0] idx_is_d, idx_is_q; // Registers // output FIFO select, one-hot - logic [ariane_pkg::INSTR_PER_FETCH-1:0] idx_ds_d, idx_ds_q; - logic [riscv::VLEN-1:0] pc_d, pc_q; // current PC + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] idx_ds_d, idx_ds_q; + logic [CVA6Cfg.VLEN-1:0] pc_d, pc_q; // current PC logic reset_address_d, reset_address_q; // we need to re-set the address because of a flush - logic [ariane_pkg::INSTR_PER_FETCH*2-2:0] branch_mask_extended; - logic [ariane_pkg::INSTR_PER_FETCH-1:0] branch_mask; + logic [CVA6Cfg.INSTR_PER_FETCH*2-2:0] branch_mask_extended; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] branch_mask; logic branch_empty; - logic [ariane_pkg::INSTR_PER_FETCH-1:0] taken; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] taken; // shift amount, e.g.: instructions we want to retire - logic [ariane_pkg::LOG2_INSTR_PER_FETCH:0] popcount; - logic [ariane_pkg::LOG2_INSTR_PER_FETCH-1:0] shamt; - logic [ariane_pkg::INSTR_PER_FETCH-1:0] valid; - logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] consumed_extended; + logic [CVA6Cfg.LOG2_INSTR_PER_FETCH:0] popcount; + logic [CVA6Cfg.LOG2_INSTR_PER_FETCH-1:0] shamt; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] valid; + logic [CVA6Cfg.INSTR_PER_FETCH*2-1:0] consumed_extended; // FIFO mask - logic [ariane_pkg::INSTR_PER_FETCH*2-1:0] fifo_pos_extended; - logic [ariane_pkg::INSTR_PER_FETCH-1:0] fifo_pos; - logic [ariane_pkg::INSTR_PER_FETCH*2-1:0][31:0] instr; - ariane_pkg::cf_t [ariane_pkg::INSTR_PER_FETCH*2-1:0] cf; + logic [CVA6Cfg.INSTR_PER_FETCH*2-1:0] fifo_pos_extended; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] fifo_pos; + logic [CVA6Cfg.INSTR_PER_FETCH*2-1:0][31:0] instr; + ariane_pkg::cf_t [CVA6Cfg.INSTR_PER_FETCH*2-1:0] cf; // replay interface - logic [ariane_pkg::INSTR_PER_FETCH-1:0] instr_overflow_fifo; + logic [CVA6Cfg.INSTR_PER_FETCH-1:0] instr_overflow_fifo; assign ready_o = ~(|instr_queue_full) & ~full_address; if (ariane_pkg::RVC) begin : gen_multiple_instr_per_fetch_with_C - for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_unpack_taken + for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_unpack_taken assign taken[i] = cf_type_i[i] != ariane_pkg::NoCF; end // calculate a branch mask, e.g.: get the first taken branch lzc #( - .WIDTH(ariane_pkg::INSTR_PER_FETCH), + .WIDTH(CVA6Cfg.INSTR_PER_FETCH), .MODE (0) // count trailing zeros ) i_lzc_branch_index ( .in_i (taken), // we want to count trailing zeros @@ -147,17 +148,17 @@ ariane_pkg::FETCH_FIFO_DEPTH // leading zero count = 1 // 0 0 0 1, 1 1 1 << 1 = 0 0 1 1, 1 1 0 // take the upper 4 bits: 0 0 1 1 - assign branch_mask_extended = {{{ariane_pkg::INSTR_PER_FETCH-1}{1'b0}}, {{ariane_pkg::INSTR_PER_FETCH}{1'b1}}} << branch_index; - assign branch_mask = branch_mask_extended[ariane_pkg::INSTR_PER_FETCH * 2 - 2:ariane_pkg::INSTR_PER_FETCH - 1]; + assign branch_mask_extended = {{{CVA6Cfg.INSTR_PER_FETCH-1}{1'b0}}, {{CVA6Cfg.INSTR_PER_FETCH}{1'b1}}} << branch_index; + assign branch_mask = branch_mask_extended[CVA6Cfg.INSTR_PER_FETCH * 2 - 2:CVA6Cfg.INSTR_PER_FETCH - 1]; // mask with taken branches to get the actual amount of instructions we want to push assign valid = valid_i & branch_mask; // rotate right again assign consumed_extended = {push_instr_fifo, push_instr_fifo} >> idx_is_q; - assign consumed_o = consumed_extended[ariane_pkg::INSTR_PER_FETCH-1:0]; + assign consumed_o = consumed_extended[CVA6Cfg.INSTR_PER_FETCH-1:0]; // count the numbers of valid instructions we've pushed from this package popcount #( - .INPUT_WIDTH(ariane_pkg::INSTR_PER_FETCH) + .INPUT_WIDTH(CVA6Cfg.INSTR_PER_FETCH) ) i_popcount ( .data_i (push_instr_fifo), .popcount_o(popcount) @@ -173,21 +174,21 @@ ariane_pkg::FETCH_FIFO_DEPTH // rotate left by the current position assign fifo_pos_extended = {valid, valid} << idx_is_q; // we just care about the upper bits - assign fifo_pos = fifo_pos_extended[ariane_pkg::INSTR_PER_FETCH*2-1:ariane_pkg::INSTR_PER_FETCH]; + assign fifo_pos = fifo_pos_extended[CVA6Cfg.INSTR_PER_FETCH*2-1:CVA6Cfg.INSTR_PER_FETCH]; // the fifo_position signal can directly be used to guide the push signal of each FIFO // make sure it is not full assign push_instr = fifo_pos & ~instr_queue_full; // duplicate the entries for easier selection e.g.: 3 2 1 0 3 2 1 0 - for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_duplicate_instr_input + for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_duplicate_instr_input assign instr[i] = instr_i[i]; - assign instr[i+ariane_pkg::INSTR_PER_FETCH] = instr_i[i]; + assign instr[i+CVA6Cfg.INSTR_PER_FETCH] = instr_i[i]; assign cf[i] = cf_type_i[i]; - assign cf[i+ariane_pkg::INSTR_PER_FETCH] = cf_type_i[i]; + assign cf[i+CVA6Cfg.INSTR_PER_FETCH] = cf_type_i[i]; end // shift the inputs - for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_fifo_input_select + for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_fifo_input_select /* verilator lint_off WIDTH */ assign instr_data_in[i].instr = instr[i+idx_is_q]; assign instr_data_in[i].cf = cf[i+idx_is_q]; @@ -273,7 +274,7 @@ ariane_pkg::FETCH_FIFO_DEPTH fetch_entry_o.branch_predict.predict_address = address_out; fetch_entry_o.branch_predict.cf = ariane_pkg::NoCF; // output mux select - for (int unsigned i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin + for (int unsigned i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin if (idx_ds_q[i]) begin if (instr_data_out[i].ex == ariane_pkg::FE_INSTR_ACCESS_FAULT) begin fetch_entry_o.ex.cause = riscv::INSTR_ACCESS_FAULT; @@ -283,7 +284,7 @@ ariane_pkg::FETCH_FIFO_DEPTH fetch_entry_o.instruction = instr_data_out[i].instr; fetch_entry_o.ex.valid = instr_data_out[i].ex != ariane_pkg::FE_NONE; fetch_entry_o.ex.tval = { - {(riscv::XLEN - riscv::VLEN) {1'b0}}, instr_data_out[i].ex_vaddr + {(CVA6Cfg.XLEN - CVA6Cfg.VLEN) {1'b0}}, instr_data_out[i].ex_vaddr }; fetch_entry_o.branch_predict.cf = instr_data_out[i].cf; pop_instr[i] = fetch_entry_valid_o & fetch_entry_ready_i; @@ -292,7 +293,7 @@ ariane_pkg::FETCH_FIFO_DEPTH // rotate the pointer left if (fetch_entry_ready_i) begin idx_ds_d = { - idx_ds_q[ariane_pkg::INSTR_PER_FETCH-2:0], idx_ds_q[ariane_pkg::INSTR_PER_FETCH-1] + idx_ds_q[CVA6Cfg.INSTR_PER_FETCH-2:0], idx_ds_q[CVA6Cfg.INSTR_PER_FETCH-1] }; end end @@ -309,7 +310,7 @@ ariane_pkg::FETCH_FIFO_DEPTH end else begin fetch_entry_o.ex.cause = riscv::INSTR_PAGE_FAULT; end - fetch_entry_o.ex.tval = {{64 - riscv::VLEN{1'b0}}, instr_data_out[0].ex_vaddr}; + fetch_entry_o.ex.tval = {{64 - CVA6Cfg.VLEN{1'b0}}, instr_data_out[0].ex_vaddr}; fetch_entry_o.branch_predict.predict_address = address_out; fetch_entry_o.branch_predict.cf = instr_data_out[0].cf; @@ -350,12 +351,13 @@ ariane_pkg::FETCH_FIFO_DEPTH end // FIFOs - for (genvar i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin : gen_instr_fifo + for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin : gen_instr_fifo // Make sure we don't save any instructions if we couldn't save the address assign push_instr_fifo[i] = push_instr[i] & ~address_overflow; fifo_v3 #( .DEPTH(ariane_pkg::FETCH_FIFO_DEPTH), - .dtype(instr_data_t) + .dtype(instr_data_t), + .FPGA_EN(CVA6Cfg.FPGA_EN) ) i_fifo_instr_data ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -375,14 +377,15 @@ ariane_pkg::FETCH_FIFO_DEPTH always_comb begin push_address = 1'b0; // check if we are pushing a ctrl flow change, if so save the address - for (int i = 0; i < ariane_pkg::INSTR_PER_FETCH; i++) begin + for (int i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin push_address |= push_instr[i] & (instr_data_in[i].cf != ariane_pkg::NoCF); end end fifo_v3 #( .DEPTH (ariane_pkg::FETCH_FIFO_DEPTH), // TODO(zarubaf): Fork out to separate param - .DATA_WIDTH(riscv::VLEN) + .DATA_WIDTH(CVA6Cfg.VLEN), + .FPGA_EN(CVA6Cfg.FPGA_EN) ) i_fifo_address ( .clk_i (clk_i), .rst_ni (rst_ni), diff --git a/core/frontend/instr_scan.sv b/core/frontend/instr_scan.sv index 592d5d34a4..cc39628bc7 100644 --- a/core/frontend/instr_scan.sv +++ b/core/frontend/instr_scan.sv @@ -24,20 +24,20 @@ module instr_scan #( output logic rvi_branch_o, output logic rvi_jalr_o, output logic rvi_jump_o, - output logic [riscv::VLEN-1:0] rvi_imm_o, + output logic [CVA6Cfg.VLEN-1:0] rvi_imm_o, output logic rvc_branch_o, output logic rvc_jump_o, output logic rvc_jr_o, output logic rvc_return_o, output logic rvc_jalr_o, output logic rvc_call_o, - output logic [riscv::VLEN-1:0] rvc_imm_o + output logic [CVA6Cfg.VLEN-1:0] rvc_imm_o ); logic is_rvc; assign is_rvc = (instr_i[1:0] != 2'b11); logic rv32_rvc_jal; - assign rv32_rvc_jal = (riscv::XLEN == 32) & ((instr_i[15:13] == riscv::OpcodeC1Jal) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1)); + assign rv32_rvc_jal = (CVA6Cfg.XLEN == 32) & ((instr_i[15:13] == riscv::OpcodeC1Jal) & is_rvc & (instr_i[1:0] == riscv::OpcodeC1)); logic is_xret; assign is_xret = logic'(instr_i[31:30] == 2'b00) & logic'(instr_i[28:0] == 29'b10000001000000000000001110011); @@ -48,11 +48,22 @@ module instr_scan #( // Opocde is JAL[R] and destination register is either x1 or x5 assign rvi_call_o = (rvi_jalr_o | rvi_jump_o) & ((instr_i[11:7] == 5'd1) | instr_i[11:7] == 5'd5); // differentiates between JAL and BRANCH opcode, JALR comes from BHT - assign rvi_imm_o = is_xret ? '0 : (instr_i[3]) ? ariane_pkg::uj_imm( - instr_i - ) : ariane_pkg::sb_imm( - instr_i - ); + assign rvi_imm_o = is_xret ? '0 : (instr_i[3]) ? { + // uj_imm + {44 + CVA6Cfg.VLEN - 64{instr_i[31]}}, + instr_i[19:12], + instr_i[20], + instr_i[30:21], + 1'b0 + } : { + // sb_imm + {51 + CVA6Cfg.VLEN - 64{instr_i[31]}}, + instr_i[31], + instr_i[7], + instr_i[30:25], + instr_i[11:8], + 1'b0 + }; assign rvi_branch_o = (instr_i[6:0] == riscv::OpcodeBranch); assign rvi_jalr_o = (instr_i[6:0] == riscv::OpcodeJalr); assign rvi_jump_o = logic'(instr_i[6:0] == riscv::OpcodeJal) | is_xret; @@ -78,6 +89,6 @@ module instr_scan #( assign rvc_return_o = ((instr_i[11:7] == 5'd1) | (instr_i[11:7] == 5'd5)) & rvc_jr_o; // differentiates between JAL and BRANCH opcode, JALR comes from BHT - assign rvc_imm_o = (instr_i[14]) ? {{56+riscv::VLEN-64{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0} - : {{53+riscv::VLEN-64{instr_i[12]}}, instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], 1'b0}; + assign rvc_imm_o = (instr_i[14]) ? {{56+CVA6Cfg.VLEN-64{instr_i[12]}}, instr_i[6:5], instr_i[2], instr_i[11:10], instr_i[4:3], 1'b0} + : {{53+CVA6Cfg.VLEN-64{instr_i[12]}}, instr_i[8], instr_i[10:9], instr_i[6], instr_i[7], instr_i[2], instr_i[11], instr_i[5:3], 1'b0}; endmodule diff --git a/core/frontend/ras.sv b/core/frontend/ras.sv index f092b50020..b065fd22d3 100644 --- a/core/frontend/ras.sv +++ b/core/frontend/ras.sv @@ -16,6 +16,7 @@ // return address stack module ras #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type ras_t = logic, parameter int unsigned DEPTH = 2 ) ( input logic clk_i, @@ -23,11 +24,11 @@ module ras #( input logic flush_i, input logic push_i, input logic pop_i, - input logic [riscv::VLEN-1:0] data_i, - output ariane_pkg::ras_t data_o + input logic [CVA6Cfg.VLEN-1:0] data_i, + output ras_t data_o ); - ariane_pkg::ras_t [DEPTH-1:0] stack_d, stack_q; + ras_t [DEPTH-1:0] stack_d, stack_q; assign data_o = stack_q[0]; diff --git a/core/id_stage.sv b/core/id_stage.sv index dfd8dd698f..253bc81016 100644 --- a/core/id_stage.sv +++ b/core/id_stage.sv @@ -14,7 +14,12 @@ // issue and read operands. module id_stage #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type exception_t = logic, + parameter type branchpredict_sbe_t = logic, + parameter type irq_ctrl_t = logic, + parameter type fetch_entry_t = logic, + parameter type scoreboard_entry_t = logic ) ( input logic clk_i, input logic rst_ni, @@ -22,11 +27,11 @@ module id_stage #( input logic flush_i, input logic debug_req_i, // from IF - input ariane_pkg::fetch_entry_t fetch_entry_i, + input fetch_entry_t fetch_entry_i, input logic fetch_entry_valid_i, output logic fetch_entry_ready_o, // acknowledge the instruction (fetch entry) // to ID - output ariane_pkg::scoreboard_entry_t issue_entry_o, // a decoded instruction + output scoreboard_entry_t issue_entry_o, // a decoded instruction output logic issue_entry_valid_o, // issue entry is valid output logic is_ctrl_flow_o, // the instruction we issue is a ctrl flow instructions input logic issue_instr_ack_i, // issue stage acknowledged sampling of instructions @@ -36,7 +41,7 @@ module id_stage #( input logic [2:0] frm_i, // floating-point dynamic rounding mode input riscv::xs_t vs_i, // vector extension status input logic [1:0] irq_i, - input ariane_pkg::irq_ctrl_t irq_ctrl_i, + input irq_ctrl_t irq_ctrl_i, input logic debug_mode_i, // we are in debug mode input logic tvm_i, input logic tw_i, @@ -45,13 +50,13 @@ module id_stage #( // ID/ISSUE register stage typedef struct packed { logic valid; - ariane_pkg::scoreboard_entry_t sbe; + scoreboard_entry_t sbe; logic is_ctrl_flow; } issue_struct_t; issue_struct_t issue_n, issue_q; logic is_control_flow_instr; - ariane_pkg::scoreboard_entry_t decoded_instruction; + scoreboard_entry_t decoded_instruction; logic is_illegal; logic [31:0] instruction; @@ -78,7 +83,11 @@ module id_stage #( // 2. Decode and emit instruction to issue stage // --------------------------------------------------------- decoder #( - .CVA6Cfg(CVA6Cfg) + .CVA6Cfg(CVA6Cfg), + .exception_t(exception_t), + .branchpredict_sbe_t(branchpredict_sbe_t), + .irq_ctrl_t(irq_ctrl_t), + .scoreboard_entry_t(scoreboard_entry_t) ) decoder_i ( .debug_req_i, .irq_ctrl_i, diff --git a/core/include/acc_pkg.sv b/core/include/acc_pkg.sv deleted file mode 100644 index bcd3c70a65..0000000000 --- a/core/include/acc_pkg.sv +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2023 ETH Zurich and University of Bologna. -// Solderpad Hardware License, Version 0.51, see LICENSE for details. -// SPDX-License-Identifier: SHL-0.51 - -// Authors: Matheus Cavalcante -// Nils Wistoff - -// Package defining the accelerator interface as used by Ara + CVA6 - -package acc_pkg; - - // ---------------------- - // Accelerator Interface - // ---------------------- - - typedef struct packed { - logic req_valid; - logic resp_ready; - riscv::instruction_t insn; - riscv::xlen_t rs1; - riscv::xlen_t rs2; - fpnew_pkg::roundmode_e frm; - logic [ariane_pkg::TRANS_ID_BITS-1:0] trans_id; - logic store_pending; - // Invalidation interface - logic acc_cons_en; - logic inval_ready; - } accelerator_req_t; - - typedef struct packed { - logic req_ready; - logic resp_valid; - riscv::xlen_t result; - logic [ariane_pkg::TRANS_ID_BITS-1:0] trans_id; - logic error; - // Metadata - logic store_pending; - logic store_complete; - logic load_complete; - logic [4:0] fflags; - logic fflags_valid; - // Invalidation interface - logic inval_valid; - logic [63:0] inval_addr; - } accelerator_resp_t; - -endpackage diff --git a/core/include/ariane_pkg.sv b/core/include/ariane_pkg.sv index ddeed84539..e3f2bf77bd 100644 --- a/core/include/ariane_pkg.sv +++ b/core/include/ariane_pkg.sv @@ -29,12 +29,6 @@ package ariane_pkg; // TODO: Slowly move those parameters to the new system. - localparam NR_SB_ENTRIES = cva6_config_pkg::CVA6ConfigNrScoreboardEntries; // number of scoreboard entries - localparam TRANS_ID_BITS = $clog2( - NR_SB_ENTRIES - ); // depending on the number of scoreboard entries we need that many bits - // to uniquely identify the entry in the scoreboard - localparam ASID_WIDTH = (riscv::XLEN == 64) ? 16 : 1; localparam BITS_SATURATION_COUNTER = 2; localparam ISSUE_WIDTH = 1; @@ -51,8 +45,6 @@ package ariane_pkg; // allocate more space for the commit buffer to be on the save side, this needs to be a power of two localparam logic [2:0] DEPTH_COMMIT = 'd4; - localparam bit FPGA_EN = cva6_config_pkg::CVA6ConfigFPGAEn; // Is FPGA optimization of CV32A6 - localparam bit RVC = cva6_config_pkg::CVA6ConfigCExtEn; // Is C extension configuration // Transprecision float unit @@ -65,8 +57,8 @@ package ariane_pkg; localparam int unsigned LAT_NONCOMP = 'd1; localparam int unsigned LAT_CONV = 'd2; - localparam riscv::xlen_t OPENHWGROUP_MVENDORID = {{riscv::XLEN - 32{1'b0}}, 32'h0602}; - localparam riscv::xlen_t ARIANE_MARCHID = {{riscv::XLEN - 32{1'b0}}, 32'd3}; + localparam logic [31:0] OPENHWGROUP_MVENDORID = 32'h0602; + localparam logic [31:0] ARIANE_MARCHID = 32'd3; // 32 registers localparam REG_ADDR_SIZE = 5; @@ -125,37 +117,11 @@ package ariane_pkg; localparam bit ENABLE_WFI = 1'b1; localparam bit ZERO_TVAL = 1'b0; `endif - // read mask for SSTATUS over MMSTATUS - localparam logic [63:0] SMODE_STATUS_READ_MASK = riscv::SSTATUS_UIE - | riscv::SSTATUS_SIE - | riscv::SSTATUS_SPIE - | riscv::SSTATUS_SPP - | riscv::SSTATUS_FS - | riscv::SSTATUS_XS - | riscv::SSTATUS_SUM - | riscv::SSTATUS_MXR - | riscv::SSTATUS_UPIE - | riscv::SSTATUS_SPIE - | riscv::SSTATUS_UXL - | riscv::SSTATUS_SD; - - localparam logic [63:0] SMODE_STATUS_WRITE_MASK = riscv::SSTATUS_SIE - | riscv::SSTATUS_SPIE - | riscv::SSTATUS_SPP - | riscv::SSTATUS_FS - | riscv::SSTATUS_SUM - | riscv::SSTATUS_MXR; + // --------------- // AXI // --------------- - localparam FETCH_USER_WIDTH = cva6_config_pkg::CVA6ConfigFetchUserWidth; - localparam DATA_USER_WIDTH = cva6_config_pkg::CVA6ConfigDataUserWidth; - localparam AXI_USER_EN = cva6_config_pkg::CVA6ConfigDataUserEn | cva6_config_pkg::CVA6ConfigFetchUserEn; - localparam AXI_USER_WIDTH = cva6_config_pkg::CVA6ConfigDataUserWidth; - localparam DATA_USER_EN = cva6_config_pkg::CVA6ConfigDataUserEn; - localparam FETCH_USER_EN = cva6_config_pkg::CVA6ConfigFetchUserEn; - typedef enum logic { SINGLE_REQ, CACHE_LINE_REQ @@ -167,26 +133,6 @@ package ariane_pkg; // leave as is (fails with >8 entries and wider fetch width) localparam int unsigned FETCH_FIFO_DEPTH = 4; - localparam int unsigned FETCH_WIDTH = 32; - // maximum instructions we can fetch on one request (we support compressed instructions) - localparam int unsigned INSTR_PER_FETCH = RVC == 1'b1 ? (FETCH_WIDTH / 16) : 1; - localparam int unsigned LOG2_INSTR_PER_FETCH = RVC == 1'b1 ? $clog2( - ariane_pkg::INSTR_PER_FETCH - ) : 1; - - // --------------- - // Enable BITMANIP - // --------------- - localparam bit BITMANIP = cva6_config_pkg::CVA6ConfigBExtEn; - - // Only use struct when signals have same direction - // exception - typedef struct packed { - riscv::xlen_t cause; // cause of exception - riscv::xlen_t tval; // additional information of causing exception (e.g.: instruction causing it), - // address of LD/ST fault - logic valid; - } exception_t; typedef enum logic [2:0] { NoCF, // No control flow prediction @@ -196,49 +142,6 @@ package ariane_pkg; Return // Return Address Prediction } cf_t; - // branch-predict - // this is the struct we get back from ex stage and we will use it to update - // all the necessary data structures - // bp_resolve_t - typedef struct packed { - logic valid; // prediction with all its values is valid - logic [riscv::VLEN-1:0] pc; // PC of predict or mis-predict - logic [riscv::VLEN-1:0] target_address; // target address at which to jump, or not - logic is_mispredict; // set if this was a mis-predict - logic is_taken; // branch is taken - cf_t cf_type; // Type of control flow change - } bp_resolve_t; - - // branchpredict scoreboard entry - // this is the struct which we will inject into the pipeline to guide the various - // units towards the correct branch decision and resolve - typedef struct packed { - cf_t cf; // type of control flow prediction - logic [riscv::VLEN-1:0] predict_address; // target address at which to jump, or not - } branchpredict_sbe_t; - - typedef struct packed { - logic valid; - logic [riscv::VLEN-1:0] pc; // update at PC - logic [riscv::VLEN-1:0] target_address; - } btb_update_t; - - typedef struct packed { - logic valid; - logic [riscv::VLEN-1:0] target_address; - } btb_prediction_t; - - typedef struct packed { - logic valid; - logic [riscv::VLEN-1:0] ra; - } ras_t; - - typedef struct packed { - logic valid; - logic [riscv::VLEN-1:0] pc; // update at PC - logic taken; - } bht_update_t; - typedef struct packed { logic valid; logic taken; @@ -268,16 +171,6 @@ package ariane_pkg; localparam SupervisorIrq = 1; localparam MachineIrq = 0; - // All information needed to determine whether we need to associate an interrupt - // with the corresponding instruction or not. - typedef struct packed { - riscv::xlen_t mie; - riscv::xlen_t mip; - riscv::xlen_t mideleg; - logic sie; - logic global_enable; - } irq_ctrl_t; - // --------------- // Cache config // --------------- @@ -328,28 +221,6 @@ package ariane_pkg; localparam int unsigned DCACHE_USER_WIDTH = DATA_USER_WIDTH; localparam int unsigned MEM_TID_WIDTH = `L15_THREADID_WIDTH; -`else - // I$ - localparam int unsigned CONFIG_L1I_SIZE = cva6_config_pkg::CVA6ConfigIcacheByteSize; // in byte - localparam int unsigned ICACHE_SET_ASSOC = cva6_config_pkg::CVA6ConfigIcacheSetAssoc; // number of ways - localparam int unsigned ICACHE_INDEX_WIDTH = $clog2( - CONFIG_L1I_SIZE / ICACHE_SET_ASSOC - ); // in bit, contains also offset width - localparam int unsigned ICACHE_TAG_WIDTH = riscv::PLEN - ICACHE_INDEX_WIDTH; // in bit - localparam int unsigned ICACHE_LINE_WIDTH = cva6_config_pkg::CVA6ConfigIcacheLineWidth; // in bit - localparam int unsigned ICACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : cva6_config_pkg::CVA6ConfigIcacheLineWidth; // in bit - // D$ - localparam int unsigned CONFIG_L1D_SIZE = cva6_config_pkg::CVA6ConfigDcacheByteSize; // in byte - localparam int unsigned DCACHE_SET_ASSOC = cva6_config_pkg::CVA6ConfigDcacheSetAssoc; // number of ways - localparam int unsigned DCACHE_INDEX_WIDTH = $clog2( - CONFIG_L1D_SIZE / DCACHE_SET_ASSOC - ); // in bit, contains also offset width - localparam int unsigned DCACHE_TAG_WIDTH = riscv::PLEN - DCACHE_INDEX_WIDTH; // in bit - localparam int unsigned DCACHE_LINE_WIDTH = cva6_config_pkg::CVA6ConfigDcacheLineWidth; // in bit - localparam int unsigned DCACHE_USER_LINE_WIDTH = (AXI_USER_WIDTH == 1) ? 4 : cva6_config_pkg::CVA6ConfigDcacheLineWidth; // in bit - localparam int unsigned DCACHE_USER_WIDTH = DATA_USER_WIDTH; - - localparam int unsigned MEM_TID_WIDTH = cva6_config_pkg::CVA6ConfigMemTidWidth; `endif localparam int unsigned DCACHE_TID_WIDTH = cva6_config_pkg::CVA6ConfigDcacheIdWidth; @@ -566,15 +437,6 @@ package ariane_pkg; CZERO_NEZ } fu_op; - typedef struct packed { - fu_t fu; - fu_op operation; - riscv::xlen_t operand_a; - riscv::xlen_t operand_b; - riscv::xlen_t imm; - logic [TRANS_ID_BITS-1:0] trans_id; - } fu_data_t; - function automatic logic op_is_branch(input fu_op op); unique case (op) inside EQ, NE, LTS, GES, LTU, GEU: return 1'b1; @@ -659,64 +521,12 @@ package ariane_pkg; endcase endfunction - typedef struct packed { - logic valid; - logic [riscv::VLEN-1:0] vaddr; - logic overflow; - riscv::xlen_t data; - logic [(riscv::XLEN/8)-1:0] be; - fu_t fu; - fu_op operation; - logic [TRANS_ID_BITS-1:0] trans_id; - } lsu_ctrl_t; - - // --------------- - // IF/ID Stage - // --------------- - // store the decompressed instruction - typedef struct packed { - logic [riscv::VLEN-1:0] address; // the address of the instructions from below - logic [31:0] instruction; // instruction word - branchpredict_sbe_t branch_predict; // this field contains branch prediction information regarding the forward branch path - exception_t ex; // this field contains exceptions which might have happened earlier, e.g.: fetch exceptions - } fetch_entry_t; - // --------------- // ID/EX/WB Stage // --------------- localparam RVFI = cva6_config_pkg::CVA6ConfigRvfiTrace; - typedef struct packed { - logic [riscv::VLEN-1:0] pc; // PC of instruction - logic [TRANS_ID_BITS-1:0] trans_id; // this can potentially be simplified, we could index the scoreboard entry - // with the transaction id in any case make the width more generic - fu_t fu; // functional unit to use - fu_op op; // operation to perform in each functional unit - logic [REG_ADDR_SIZE-1:0] rs1; // register source address 1 - logic [REG_ADDR_SIZE-1:0] rs2; // register source address 2 - logic [REG_ADDR_SIZE-1:0] rd; // register destination address - riscv::xlen_t result; // for unfinished instructions this field also holds the immediate, - // for unfinished floating-point that are partly encoded in rs2, this field also holds rs2 - // for unfinished floating-point fused operations (FMADD, FMSUB, FNMADD, FNMSUB) - // this field holds the address of the third operand from the floating-point register file - logic valid; // is the result valid - logic use_imm; // should we use the immediate as operand b? - logic use_zimm; // use zimm as operand a - logic use_pc; // set if we need to use the PC as operand a, PC from exception - exception_t ex; // exception has occurred - branchpredict_sbe_t bp; // branch predict scoreboard data structure - logic is_compressed; // signals a compressed instructions, we need this information at the commit stage if - // we want jump accordingly e.g.: +4, +2 - riscv::xlen_t rs1_rdata; // information needed by RVFI - riscv::xlen_t rs2_rdata; // information needed by RVFI - logic [riscv::VLEN-1:0] lsu_addr; // information needed by RVFI - logic [(riscv::XLEN/8)-1:0] lsu_rmask; // information needed by RVFI - logic [(riscv::XLEN/8)-1:0] lsu_wmask; // information needed by RVFI - riscv::xlen_t lsu_wdata; // information needed by RVFI - logic vfp; // is this a vector floating-point instruction? - } scoreboard_entry_t; - // --------------- // MMU instanciation // --------------- @@ -751,15 +561,6 @@ package ariane_pkg; AMO_CAS2 = 4'b1101 // unused, not part of riscv spec, but provided in OpenPiton } amo_t; - typedef struct packed { - logic valid; // valid flag - logic is_2M; // - logic is_1G; // - logic [27-1:0] vpn; // VPN (39bits) = 27bits + 12bits offset - logic [ASID_WIDTH-1:0] asid; - riscv::pte_t content; - } tlb_update_t; - // Bits required for representation of physical address space as 4K pages // (e.g. 27*4K == 39bit address space). localparam PPN4K_WIDTH = 38; @@ -778,39 +579,6 @@ package ariane_pkg; FE_INSTR_PAGE_FAULT } frontend_exception_t; - // ---------------------- - // cache request ports - // ---------------------- - // I$ address translation requests - typedef struct packed { - logic fetch_valid; // address translation valid - logic [riscv::PLEN-1:0] fetch_paddr; // physical address in - exception_t fetch_exception; // exception occurred during fetch - } icache_areq_t; - - typedef struct packed { - logic fetch_req; // address translation request - logic [riscv::VLEN-1:0] fetch_vaddr; // virtual address out - } icache_arsp_t; - - // I$ data requests - typedef struct packed { - logic req; // we request a new word - logic kill_s1; // kill the current request - logic kill_s2; // kill the last request - logic spec; // request is speculative - logic [riscv::VLEN-1:0] vaddr; // 1st cycle: 12 bit index is taken for lookup - } icache_dreq_t; - - typedef struct packed { - logic ready; // icache is ready - logic valid; // signals a valid read - logic [FETCH_WIDTH-1:0] data; // 2+ cycle out: tag - logic [FETCH_USER_WIDTH-1:0] user; // User bits - logic [riscv::VLEN-1:0] vaddr; // virtual address out - exception_t ex; // we've encountered an exception - } icache_drsp_t; - // AMO request going to cache. this request is unconditionally valid as soon // as request goes high. // Furthermore, those signals are kept stable until the response indicates @@ -829,88 +597,6 @@ package ariane_pkg; logic [63:0] result; // sign-extended, result } amo_resp_t; - // D$ data requests - typedef struct packed { - logic [DCACHE_INDEX_WIDTH-1:0] address_index; - logic [DCACHE_TAG_WIDTH-1:0] address_tag; - riscv::xlen_t data_wdata; - logic [DCACHE_USER_WIDTH-1:0] data_wuser; - logic data_req; - logic data_we; - logic [(riscv::XLEN/8)-1:0] data_be; - logic [1:0] data_size; - logic [DCACHE_TID_WIDTH-1:0] data_id; - logic kill_req; - logic tag_valid; - } dcache_req_i_t; - - typedef struct packed { - logic data_gnt; - logic data_rvalid; - logic [DCACHE_TID_WIDTH-1:0] data_rid; - riscv::xlen_t data_rdata; - logic [DCACHE_USER_WIDTH-1:0] data_ruser; - } dcache_req_o_t; - - // ---------------------- - // Arithmetic Functions - // ---------------------- - function automatic riscv::xlen_t sext32(logic [31:0] operand); - return {{riscv::XLEN - 32{operand[31]}}, operand[31:0]}; - endfunction - - // ---------------------- - // Immediate functions - // ---------------------- - function automatic logic [riscv::VLEN-1:0] uj_imm(logic [31:0] instruction_i); - return { - {44 + riscv::VLEN - 64{instruction_i[31]}}, - instruction_i[19:12], - instruction_i[20], - instruction_i[30:21], - 1'b0 - }; - endfunction - - function automatic logic [riscv::VLEN-1:0] i_imm(logic [31:0] instruction_i); - return {{52 + riscv::VLEN - 64{instruction_i[31]}}, instruction_i[31:20]}; - endfunction - - function automatic logic [riscv::VLEN-1:0] sb_imm(logic [31:0] instruction_i); - return { - {51 + riscv::VLEN - 64{instruction_i[31]}}, - instruction_i[31], - instruction_i[7], - instruction_i[30:25], - instruction_i[11:8], - 1'b0 - }; - endfunction - - // ---------------------- - // LSU Functions - // ---------------------- - // align data to address e.g.: shift data to be naturally 64 - function automatic riscv::xlen_t data_align(logic [2:0] addr, logic [63:0] data); - // Set addr[2] to 1'b0 when 32bits - logic [ 2:0] addr_tmp = {(addr[2] && riscv::IS_XLEN64), addr[1:0]}; - logic [63:0] data_tmp = {64{1'b0}}; - case (addr_tmp) - 3'b000: data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-1:0]}; - 3'b001: - data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-9:0], data[riscv::XLEN-1:riscv::XLEN-8]}; - 3'b010: - data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-17:0], data[riscv::XLEN-1:riscv::XLEN-16]}; - 3'b011: - data_tmp[riscv::XLEN-1:0] = {data[riscv::XLEN-25:0], data[riscv::XLEN-1:riscv::XLEN-24]}; - 3'b100: data_tmp = {data[31:0], data[63:32]}; - 3'b101: data_tmp = {data[23:0], data[63:24]}; - 3'b110: data_tmp = {data[15:0], data[63:16]}; - 3'b111: data_tmp = {data[7:0], data[63:8]}; - endcase - return data_tmp[riscv::XLEN-1:0]; - endfunction - // generate byte enable mask function automatic logic [7:0] be_gen(logic [2:0] addr, logic [1:0] size); case (size) diff --git a/core/include/config_pkg.sv b/core/include/config_pkg.sv index 2bfb9d3037..b00cd83fb5 100644 --- a/core/include/config_pkg.sv +++ b/core/include/config_pkg.sv @@ -36,6 +36,110 @@ package config_pkg; localparam NrMaxRules = 16; typedef struct packed { + int unsigned XLEN; + bit BITMANIP; + int unsigned NR_SB_ENTRIES; + bit FPGA_EN; // Is FPGA optimization of CV32A6 + /// Number of commit ports, i.e., maximum number of instructions that the + /// core can retire per cycle. It can be beneficial to have more commit + /// ports than issue ports, for the scoreboard to empty out in case one + /// instruction stalls a little longer. + int unsigned NrCommitPorts; + /// AXI parameters. + int unsigned AxiAddrWidth; + int unsigned AxiDataWidth; + int unsigned AxiIdWidth; + int unsigned AxiUserWidth; + + int unsigned MemTidWidth; + + // I$ + int unsigned IcacheByteSize; // in byte + int unsigned IcacheSetAssoc; // number of ways + int unsigned IcacheLineWidth; // number of ways + // D$ + int unsigned DcacheByteSize; // in byte + int unsigned DcacheSetAssoc; // number of ways + int unsigned DcacheLineWidth; // number of ways + + int unsigned NrLoadBufEntries; + bit FpuEn; + bit XF16; + bit XF16ALT; + bit XF8; + bit RVA; + bit RVV; + bit RVC; + bit RVZCB; + bit XFVec; + bit CvxifEn; + bit ZiCondExtEn; + bit RVS; + bit RVU; + int unsigned FETCH_USER_WIDTH; + int unsigned DATA_USER_WIDTH; + int unsigned AXI_USER_WIDTH; + bit DATA_USER_EN; + bit FETCH_USER_EN; + + /// Return address stack depth, good values are around 2 to 4. + int unsigned RASDepth; + /// Branch target buffer entries. + int unsigned BTBEntries; + /// Branch history (2-bit saturation counter) size, to keep track of + /// branch otucomes. + int unsigned BHTEntries; + /// Offset of the debug module. + logic [63:0] DmBaseAddress; + /// Number of PMP entries. + int unsigned NrPMPEntries; + /// Set to the bus type in use. + noc_type_e NOCType; + /// Physical Memory Attributes (PMAs) + /// Number of non idempotent rules. + int unsigned NrNonIdempotentRules; + /// Base which needs to match. + logic [NrMaxRules-1:0][63:0] NonIdempotentAddrBase; + /// Bit mask which bits to consider when matching the rule. + logic [NrMaxRules-1:0][63:0] NonIdempotentLength; + /// Number of regions which have execute property. + int unsigned NrExecuteRegionRules; + /// Base which needs to match. + logic [NrMaxRules-1:0][63:0] ExecuteRegionAddrBase; + /// Bit mask which bits to consider when matching the rule. + logic [NrMaxRules-1:0][63:0] ExecuteRegionLength; + /// Number of regions which have cached property. + int unsigned NrCachedRegionRules; + /// Base which needs to match. + logic [NrMaxRules-1:0][63:0] CachedRegionAddrBase; + /// Bit mask which bits to consider when matching the rule. + logic [NrMaxRules-1:0][63:0] CachedRegionLength; + /// Maximum number of outstanding stores. + int unsigned MaxOutstandingStores; + bit DebugEn; + } cva6_user_cfg_t; + + typedef struct packed { + int unsigned XLEN; + bit BITMANIP; + int unsigned NR_SB_ENTRIES; + // depending on the number of scoreboard entries we need that many bits + // to uniquely identify the entry in the scoreboard + int unsigned TRANS_ID_BITS; + int unsigned ASID_WIDTH; + bit FPGA_EN; + int unsigned VLEN; // virtual address length + int unsigned PLEN; // physical address length + bit IS_XLEN32; + bit IS_XLEN64; + int unsigned XLEN_ALIGN_BYTES; + int unsigned ModeW; + int unsigned ASIDW; + int unsigned PPNW; + riscv::vm_mode_t MODE_SV; + int unsigned SV; + int unsigned VPN2; + /// Number of commit ports, i.e., maximum number of instructions that the /// core can retire per cycle. It can be beneficial to have more commit /// ports than issue ports, for the scoreboard to empty out in case one @@ -73,53 +177,218 @@ package config_pkg; bit EnableAccelerator; bit RVS; //Supervisor mode bit RVU; //User mode - // Debug Module - // address to which a hart should jump when it was requested to halt + logic [63:0] HaltAddress; logic [63:0] ExceptionAddress; - /// Return address stack depth, good values are around 2 to 4. int unsigned RASDepth; - /// Branch target buffer entries. int unsigned BTBEntries; - /// Branch history (2-bit saturation counter) size, to keep track of - /// branch otucomes. int unsigned BHTEntries; - /// Offset of the debug module. logic [63:0] DmBaseAddress; - /// Number of PMP entries. int unsigned NrPMPEntries; - /// Set to the bus type in use. noc_type_e NOCType; - /// Physical Memory Attributes (PMAs) - /// Number of non idempotent rules. int unsigned NrNonIdempotentRules; - /// Base which needs to match. logic [NrMaxRules-1:0][63:0] NonIdempotentAddrBase; - /// Bit mask which bits to consider when matching the rule. logic [NrMaxRules-1:0][63:0] NonIdempotentLength; - /// Number of regions which have execute property. int unsigned NrExecuteRegionRules; - /// Base which needs to match. logic [NrMaxRules-1:0][63:0] ExecuteRegionAddrBase; - /// Bit mask which bits to consider when matching the rule. logic [NrMaxRules-1:0][63:0] ExecuteRegionLength; - /// Number of regions which have cached property. int unsigned NrCachedRegionRules; - /// Base which needs to match. logic [NrMaxRules-1:0][63:0] CachedRegionAddrBase; - /// Bit mask which bits to consider when matching the rule. logic [NrMaxRules-1:0][63:0] CachedRegionLength; - /// Maximum number of outstanding stores. int unsigned MaxOutstandingStores; bit DebugEn; + + int unsigned FETCH_USER_WIDTH; + int unsigned DATA_USER_WIDTH; + bit AXI_USER_EN; + int unsigned AXI_USER_WIDTH; + int unsigned MEM_TID_WIDTH; + int unsigned DCACHE_MAX_TX; + // I$ + int unsigned ICACHE_SET_ASSOC; + int unsigned ICACHE_SET_ASSOC_WIDTH; + int unsigned ICACHE_INDEX_WIDTH; // in bit, contains also offset width + int unsigned ICACHE_TAG_WIDTH; + int unsigned ICACHE_LINE_WIDTH; // in bit + int unsigned ICACHE_USER_LINE_WIDTH; // in bit + // D$ + int unsigned DCACHE_SET_ASSOC; + int unsigned DCACHE_SET_ASSOC_WIDTH; + int unsigned DCACHE_INDEX_WIDTH; // in bit, contains also offset width + int unsigned DCACHE_TAG_WIDTH; + int unsigned DCACHE_LINE_WIDTH; + int unsigned DCACHE_USER_LINE_WIDTH; // in bit + int unsigned DCACHE_USER_WIDTH; + int unsigned DCACHE_OFFSET_WIDTH; + int unsigned DCACHE_NUM_WORDS; + + bit DATA_USER_EN; + bit FETCH_USER_EN; + int unsigned FETCH_WIDTH; + int unsigned INSTR_PER_FETCH; + // maximum instructions we can fetch on one request (we support compressed instructions) + int unsigned LOG2_INSTR_PER_FETCH; + + int unsigned X_DATAWIDTH; + int unsigned X_NUM_RS; + int unsigned X_ID_WIDTH; + int unsigned X_MEM_WIDTH; + int unsigned X_RFR_WIDTH; + int unsigned X_RFW_WIDTH; } cva6_cfg_t; + function automatic cva6_cfg_t build_config(cva6_user_cfg_t CVA6Cfg); + bit IS_XLEN32 = (CVA6Cfg.XLEN == 32) ? 1'b1 : 1'b0; + bit IS_XLEN64 = (CVA6Cfg.XLEN == 32) ? 1'b0 : 1'b1; + bit RVF = (IS_XLEN64 | IS_XLEN32) & CVA6Cfg.FpuEn; + bit RVD = (IS_XLEN64 ? 1 : 0) & CVA6Cfg.FpuEn; + bit FpPresent = RVF | RVD | CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8; + bit NSX = CVA6Cfg.XF16 | CVA6Cfg.XF16ALT | CVA6Cfg.XF8 | CVA6Cfg.XFVec; // Are non-standard extensions present? + int unsigned FLen = RVD ? 64 : // D ext. + RVF ? 32 : // F ext. + CVA6Cfg.XF16 ? 16 : // Xf16 ext. + CVA6Cfg.XF16ALT ? 16 : // Xf16alt ext. + CVA6Cfg.XF8 ? 8 : // Xf8 ext. + 1; // Unused in case of no FP + + // Transprecision floating-point extensions configuration + bit RVFVec = RVF & CVA6Cfg.XFVec & FLen>32; // FP32 vectors available if vectors and larger fmt enabled + bit XF16Vec = CVA6Cfg.XF16 & CVA6Cfg.XFVec & FLen>16; // FP16 vectors available if vectors and larger fmt enabled + bit XF16ALTVec = CVA6Cfg.XF16ALT & CVA6Cfg.XFVec & FLen>16; // FP16ALT vectors available if vectors and larger fmt enabled + bit XF8Vec = CVA6Cfg.XF8 & CVA6Cfg.XFVec & FLen>8; // FP8 vectors available if vectors and larger fmt enabled + + bit EnableAccelerator = CVA6Cfg.RVV; // Currently only used by V extension (Ara) + int unsigned NrWbPorts = (CVA6Cfg.CvxifEn || EnableAccelerator) ? 5 : 4; + + riscv::vm_mode_t MODE_SV = (CVA6Cfg.XLEN == 32) ? riscv::ModeSv32 : riscv::ModeSv39; + int unsigned VLEN = (CVA6Cfg.XLEN == 32) ? 32 : 64; + int unsigned PLEN = (CVA6Cfg.XLEN == 32) ? 34 : 56; + int unsigned FETCH_WIDTH = 32; + int unsigned INSTR_PER_FETCH = CVA6Cfg.RVC == 1'b1 ? (FETCH_WIDTH / 16) : 1; + + int unsigned ICACHE_SET_ASSOC = CVA6Cfg.IcacheSetAssoc; + int unsigned DCACHE_SET_ASSOC = CVA6Cfg.DcacheSetAssoc; + int unsigned ICACHE_INDEX_WIDTH = $clog2(CVA6Cfg.IcacheByteSize / ICACHE_SET_ASSOC); + int unsigned DCACHE_INDEX_WIDTH = $clog2(CVA6Cfg.DcacheByteSize / DCACHE_SET_ASSOC); + int unsigned DCACHE_OFFSET_WIDTH = $clog2(CVA6Cfg.DcacheLineWidth / 8); + + int unsigned NrRgprPorts = 2; + + return + '{ + XLEN: CVA6Cfg.XLEN, + BITMANIP: CVA6Cfg.BITMANIP, + NR_SB_ENTRIES: CVA6Cfg.NR_SB_ENTRIES, + TRANS_ID_BITS: $clog2(CVA6Cfg.NR_SB_ENTRIES), + ASID_WIDTH: unsigned'((CVA6Cfg.XLEN == 64) ? 16 : 1), + FPGA_EN: CVA6Cfg.FPGA_EN, + VLEN: VLEN, + PLEN: PLEN, + IS_XLEN32: IS_XLEN32, + IS_XLEN64: IS_XLEN64, + XLEN_ALIGN_BYTES: $clog2(CVA6Cfg.XLEN / 8), + ModeW: unsigned'((CVA6Cfg.XLEN == 32) ? 1 : 4), + ASIDW: unsigned'((CVA6Cfg.XLEN == 32) ? 9 : 16), + PPNW: unsigned'((CVA6Cfg.XLEN == 32) ? 22 : 44), + MODE_SV: MODE_SV, + SV: unsigned'((MODE_SV == riscv::ModeSv32) ? 32 : 39), + VPN2: unsigned'((VLEN - 31 < 8) ? VLEN - 31 : 8), + NrCommitPorts: CVA6Cfg.NrCommitPorts, + AxiAddrWidth: CVA6Cfg.AxiAddrWidth, + AxiDataWidth: CVA6Cfg.AxiDataWidth, + AxiIdWidth: CVA6Cfg.AxiIdWidth, + AxiUserWidth: CVA6Cfg.AxiUserWidth, + NrLoadBufEntries: CVA6Cfg.NrLoadBufEntries, + FpuEn: CVA6Cfg.FpuEn, + XF16: CVA6Cfg.XF16, + XF16ALT: CVA6Cfg.XF16ALT, + XF8: CVA6Cfg.XF8, + RVA: CVA6Cfg.RVA, + RVV: CVA6Cfg.RVV, + RVC: CVA6Cfg.RVC, + RVZCB: CVA6Cfg.RVZCB, + XFVec: CVA6Cfg.XFVec, + CvxifEn: CVA6Cfg.CvxifEn, + ZiCondExtEn: CVA6Cfg.ZiCondExtEn, + RVF: bit'(RVF), + RVD: bit'(RVD), + FpPresent: bit'(FpPresent), + NSX: bit'(NSX), + FLen: unsigned'(FLen), + RVFVec: bit'(RVFVec), + XF16Vec: bit'(XF16Vec), + XF16ALTVec: bit'(XF16ALTVec), + XF8Vec: bit'(XF8Vec), + NrRgprPorts: unsigned'(NrRgprPorts), + NrWbPorts: unsigned'(NrWbPorts), + EnableAccelerator: bit'(EnableAccelerator), + RVS: CVA6Cfg.RVS, + RVU: CVA6Cfg.RVU, + HaltAddress: 64'h800, + ExceptionAddress: 64'h808, + RASDepth: CVA6Cfg.RASDepth, + BTBEntries: CVA6Cfg.BTBEntries, + BHTEntries: CVA6Cfg.BHTEntries, + DmBaseAddress: CVA6Cfg.DmBaseAddress, + NrPMPEntries: CVA6Cfg.NrPMPEntries, + NOCType: CVA6Cfg.NOCType, + NrNonIdempotentRules: CVA6Cfg.NrNonIdempotentRules, + NonIdempotentAddrBase: CVA6Cfg.NonIdempotentAddrBase, + NonIdempotentLength: CVA6Cfg.NonIdempotentLength, + NrExecuteRegionRules: CVA6Cfg.NrExecuteRegionRules, + ExecuteRegionAddrBase: CVA6Cfg.ExecuteRegionAddrBase, + ExecuteRegionLength: CVA6Cfg.ExecuteRegionLength, + NrCachedRegionRules: CVA6Cfg.NrCachedRegionRules, + CachedRegionAddrBase: CVA6Cfg.CachedRegionAddrBase, + CachedRegionLength: CVA6Cfg.CachedRegionLength, + MaxOutstandingStores: CVA6Cfg.MaxOutstandingStores, + DebugEn: CVA6Cfg.DebugEn, + + FETCH_USER_WIDTH: CVA6Cfg.FETCH_USER_WIDTH, + DATA_USER_WIDTH: CVA6Cfg.DATA_USER_WIDTH, + AXI_USER_EN: CVA6Cfg.DATA_USER_EN | CVA6Cfg.FETCH_USER_EN, + AXI_USER_WIDTH: CVA6Cfg.AXI_USER_WIDTH, + MEM_TID_WIDTH: CVA6Cfg.MemTidWidth, + DCACHE_MAX_TX: 2 ** CVA6Cfg.MemTidWidth, + + ICACHE_SET_ASSOC: ICACHE_SET_ASSOC, + ICACHE_SET_ASSOC_WIDTH: $clog2(ICACHE_SET_ASSOC), + ICACHE_INDEX_WIDTH: ICACHE_INDEX_WIDTH, + ICACHE_TAG_WIDTH: PLEN - ICACHE_INDEX_WIDTH, + ICACHE_LINE_WIDTH: CVA6Cfg.IcacheLineWidth, + ICACHE_USER_LINE_WIDTH: (CVA6Cfg.AXI_USER_WIDTH == 1) ? 4 : CVA6Cfg.IcacheLineWidth, + + DCACHE_SET_ASSOC: DCACHE_SET_ASSOC, + DCACHE_SET_ASSOC_WIDTH: $clog2(DCACHE_SET_ASSOC), + DCACHE_INDEX_WIDTH: DCACHE_INDEX_WIDTH, + DCACHE_TAG_WIDTH: PLEN - DCACHE_INDEX_WIDTH, + DCACHE_LINE_WIDTH: CVA6Cfg.DcacheLineWidth, + DCACHE_USER_LINE_WIDTH: (CVA6Cfg.AXI_USER_WIDTH == 1) ? 4 : CVA6Cfg.DcacheLineWidth, + DCACHE_USER_WIDTH: CVA6Cfg.DATA_USER_WIDTH, + DCACHE_OFFSET_WIDTH: DCACHE_OFFSET_WIDTH, + DCACHE_NUM_WORDS: 2 ** (DCACHE_INDEX_WIDTH - DCACHE_OFFSET_WIDTH), + + DATA_USER_EN: CVA6Cfg.DATA_USER_EN, + FETCH_USER_EN: CVA6Cfg.FETCH_USER_EN, + FETCH_WIDTH: FETCH_WIDTH, + INSTR_PER_FETCH: INSTR_PER_FETCH, + LOG2_INSTR_PER_FETCH: CVA6Cfg.RVC == 1'b1 ? $clog2(INSTR_PER_FETCH) : 1, + + X_DATAWIDTH: CVA6Cfg.XLEN, + X_NUM_RS: unsigned'(NrRgprPorts), //2 or 3 + X_ID_WIDTH: $clog2(4), + X_MEM_WIDTH: unsigned'(64), + X_RFR_WIDTH: CVA6Cfg.XLEN, + X_RFW_WIDTH: CVA6Cfg.XLEN + } + ; + + endfunction /// Empty configuration to sanity check proper parameter passing. Whenever /// you develop a module that resides within the core, assign this constant. localparam cva6_cfg_t cva6_cfg_empty = '0; - /// Utility function being called to check parameters. Not all values make /// sense for all parameters, here is the place to sanity check them. function automatic void check_cfg(cva6_cfg_t Cfg); @@ -132,6 +401,8 @@ package config_pkg; assert (Cfg.NrExecuteRegionRules <= NrMaxRules); assert (Cfg.NrCachedRegionRules <= NrMaxRules); assert (Cfg.NrPMPEntries <= 16); + assert (!Cfg.STD_CACHE || (Cfg.PLEN == 56 && Cfg.VLEN == 64)); + assert (Cfg.VLEN >= Cfg.PLEN); `endif // pragma translate_on endfunction diff --git a/core/include/cv32a6_embedded_config_pkg.sv b/core/include/cv32a6_embedded_config_pkg.sv index 5f92c6568c..48714641be 100644 --- a/core/include/cv32a6_embedded_config_pkg.sv +++ b/core/include/cv32a6_embedded_config_pkg.sv @@ -71,73 +71,67 @@ package cva6_config_pkg; localparam CVA6ConfigRvfiTrace = 1; - localparam config_pkg::cva6_cfg_t cva6_cfg = '{ - NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), - AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), - AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), - AxiIdWidth: unsigned'(CVA6ConfigAxiIdWidth), - AxiUserWidth: unsigned'(CVA6ConfigDataUserWidth), - NrLoadBufEntries: unsigned'(CVA6ConfigNrLoadBufEntries), - FpuEn: bit'(CVA6ConfigFpuEn), - XF16: bit'(CVA6ConfigF16En), - XF16ALT: bit'(CVA6ConfigF16AltEn), - XF8: bit'(CVA6ConfigF8En), - RVA: bit'(CVA6ConfigAExtEn), - RVV: bit'(CVA6ConfigVExtEn), - RVC: bit'(CVA6ConfigCExtEn), - RVZCB: bit'(CVA6ConfigZcbExtEn), - XFVec: bit'(CVA6ConfigFVecEn), - CvxifEn: bit'(CVA6ConfigCvxifEn), - ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn), - // Extended - RVF: - bit'( - 0 - ), - RVD: bit'(0), - FpPresent: bit'(0), - NSX: bit'(0), - FLen: unsigned'(0), - RVFVec: bit'(0), - XF16Vec: bit'(0), - XF16ALTVec: bit'(0), - XF8Vec: bit'(0), - NrRgprPorts: unsigned'(0), - NrWbPorts: unsigned'(0), - EnableAccelerator: bit'(0), - RVS: bit'(0), - RVU: bit'(0), - HaltAddress: 64'h800, - ExceptionAddress: 64'h808, - RASDepth: unsigned'(CVA6ConfigRASDepth), - BTBEntries: unsigned'(CVA6ConfigBTBEntries), - BHTEntries: unsigned'(CVA6ConfigBHTEntries), - DmBaseAddress: 64'h0, - NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries), - NOCType: config_pkg::NOC_TYPE_AXI4_ATOP, - // idempotent region - NrNonIdempotentRules: - unsigned'( - 2 - ), - NonIdempotentAddrBase: 1024'({64'b0, 64'b0}), - NonIdempotentLength: 1024'({64'b0, 64'b0}), - NrExecuteRegionRules: unsigned'(3), - // DRAM, Boot ROM, Debug Module - ExecuteRegionAddrBase: - 1024'( - {64'h8000_0000, 64'h1_0000, 64'h0} - ), - ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}), - // cached region - NrCachedRegionRules: - unsigned'( - 1 - ), - CachedRegionAddrBase: 1024'({64'h8000_0000}), - CachedRegionLength: 1024'({64'h40000000}), - MaxOutstandingStores: unsigned'(7), - DebugEn: bit'(0) - }; + localparam config_pkg::cva6_user_cfg_t cva6_cfg = '{ + XLEN: unsigned'(CVA6ConfigXlen), + BITMANIP: bit'(CVA6ConfigBExtEn), + NR_SB_ENTRIES: unsigned'(CVA6ConfigNrScoreboardEntries), + FPGA_EN: bit'(CVA6ConfigFPGAEn), + NrCommitPorts: unsigned'(CVA6ConfigNrCommitPorts), + + AxiAddrWidth: unsigned'(CVA6ConfigAxiAddrWidth), + AxiDataWidth: unsigned'(CVA6ConfigAxiDataWidth), + AxiIdWidth: unsigned'(CVA6ConfigAxiIdWidth), + AxiUserWidth: unsigned'(CVA6ConfigDataUserWidth), + MemTidWidth: unsigned'(CVA6ConfigMemTidWidth), + + IcacheByteSize: unsigned'(CVA6ConfigIcacheByteSize), + IcacheSetAssoc: unsigned'(CVA6ConfigIcacheSetAssoc), + IcacheLineWidth: unsigned'(CVA6ConfigIcacheLineWidth), + DcacheByteSize: unsigned'(CVA6ConfigDcacheByteSize), + DcacheSetAssoc: unsigned'(CVA6ConfigDcacheSetAssoc), + DcacheLineWidth: unsigned'(CVA6ConfigDcacheLineWidth), + + NrLoadBufEntries: unsigned'(CVA6ConfigNrLoadBufEntries), + FpuEn: bit'(CVA6ConfigFpuEn), + XF16: bit'(CVA6ConfigF16En), + XF16ALT: bit'(CVA6ConfigF16AltEn), + XF8: bit'(CVA6ConfigF8En), + RVA: bit'(CVA6ConfigAExtEn), + RVV: bit'(CVA6ConfigVExtEn), + RVC: bit'(CVA6ConfigCExtEn), + RVS: bit'(0), + RVU: bit'(0), + RVZCB: bit'(CVA6ConfigZcbExtEn), + XFVec: bit'(CVA6ConfigFVecEn), + CvxifEn: bit'(CVA6ConfigCvxifEn), + ZiCondExtEn: bit'(CVA6ConfigZiCondExtEn), + + FETCH_USER_WIDTH: unsigned'(CVA6ConfigFetchUserWidth), + DATA_USER_WIDTH: unsigned'(CVA6ConfigDataUserWidth), + AXI_USER_WIDTH: unsigned'(CVA6ConfigDataUserWidth), + DATA_USER_EN: bit'(CVA6ConfigDataUserEn), + FETCH_USER_EN: bit'(CVA6ConfigFetchUserEn), + + RASDepth: unsigned'(CVA6ConfigRASDepth), + BTBEntries: unsigned'(CVA6ConfigBTBEntries), + BHTEntries: unsigned'(CVA6ConfigBHTEntries), + DmBaseAddress: 64'h0, + NrPMPEntries: unsigned'(CVA6ConfigNrPMPEntries), + NOCType: config_pkg::NOC_TYPE_AXI4_ATOP, + NrNonIdempotentRules: unsigned'(2), + NonIdempotentAddrBase: 1024'({64'b0, 64'b0}), + NonIdempotentLength: 1024'({64'b0, 64'b0}), + + NrExecuteRegionRules: unsigned'(3), + // DRAM, Boot ROM, Debug Module + ExecuteRegionAddrBase: 1024'({64'h8000_0000, 64'h1_0000, 64'h0}), + ExecuteRegionLength: 1024'({64'h40000000, 64'h10000, 64'h1000}), + + NrCachedRegionRules: unsigned'(1), + CachedRegionAddrBase: 1024'({64'h8000_0000}), + CachedRegionLength: 1024'({64'h40000000}), + MaxOutstandingStores: unsigned'(7), + DebugEn: bit'(0) +}; endpackage diff --git a/core/include/cva6_hpdcache_default_config_pkg.sv b/core/include/cva6_hpdcache_default_config_pkg.sv index 6072f23d87..a82e542cfe 100644 --- a/core/include/cva6_hpdcache_default_config_pkg.sv +++ b/core/include/cva6_hpdcache_default_config_pkg.sv @@ -35,7 +35,7 @@ package hpdcache_params_pkg; // Definition of global constants for the HPDcache data and directory // {{{ // HPDcache physical address width (in bits) - localparam int unsigned PARAM_PA_WIDTH = riscv::PLEN; + localparam int unsigned PARAM_PA_WIDTH = (cva6_config_pkg::CVA6ConfigXlen == 32) ? 34 : 56; // HPDcache number of sets localparam int unsigned PARAM_SETS = __BYTES_PER_WAY / __BYTES_PER_CACHELINE; diff --git a/core/include/cvxif_pkg.sv b/core/include/cvxif_pkg.sv index 39e77b4471..a28c24abb0 100644 --- a/core/include/cvxif_pkg.sv +++ b/core/include/cvxif_pkg.sv @@ -11,12 +11,12 @@ package cvxif_pkg; - localparam X_DATAWIDTH = riscv::XLEN; + localparam X_DATAWIDTH = cva6_config_pkg::CVA6ConfigXlen; localparam X_NUM_RS = ariane_pkg::NR_RGPR_PORTS; //2 or 3 - localparam X_ID_WIDTH = ariane_pkg::TRANS_ID_BITS; + localparam X_ID_WIDTH = $clog2(4); localparam X_MEM_WIDTH = 64; - localparam X_RFR_WIDTH = riscv::XLEN; - localparam X_RFW_WIDTH = riscv::XLEN; + localparam X_RFR_WIDTH = cva6_config_pkg::CVA6ConfigXlen; + localparam X_RFW_WIDTH = cva6_config_pkg::CVA6ConfigXlen; typedef struct packed { logic [15:0] instr; diff --git a/core/include/riscv_pkg.sv b/core/include/riscv_pkg.sv index 2a9d919c1a..7ff5de0836 100644 --- a/core/include/riscv_pkg.sv +++ b/core/include/riscv_pkg.sv @@ -17,12 +17,6 @@ package riscv; - // ---------------------- - // Import cva6 config from cva6_config_pkg - // ---------------------- - localparam XLEN = cva6_config_pkg::CVA6ConfigXlen; - localparam FPU_EN = cva6_config_pkg::CVA6ConfigFpuEn; - // ---------------------- // Data and Address length // ---------------------- @@ -35,23 +29,6 @@ package riscv; ModeSv64 = 11 } vm_mode_t; - // Warning: When using STD_CACHE, configuration must be PLEN=56 and VLEN=64 - // Warning: VLEN must be superior or equal to PLEN - localparam VLEN = (XLEN == 32) ? 32 : 64; // virtual address length - localparam PLEN = (XLEN == 32) ? 34 : 56; // physical address length - - localparam IS_XLEN32 = (XLEN == 32) ? 1'b1 : 1'b0; - localparam IS_XLEN64 = (XLEN == 32) ? 1'b0 : 1'b1; - localparam ModeW = (XLEN == 32) ? 1 : 4; - localparam ASIDW = (XLEN == 32) ? 9 : 16; - localparam PPNW = (XLEN == 32) ? 22 : 44; - localparam vm_mode_t MODE_SV = (XLEN == 32) ? ModeSv32 : ModeSv39; - localparam SV = (MODE_SV == ModeSv32) ? 32 : 39; - localparam VPN2 = (VLEN - 31 < 8) ? VLEN - 31 : 8; - localparam XLEN_ALIGN_BYTES = $clog2(XLEN / 8); - - typedef logic [XLEN-1:0] xlen_t; - // -------------------- // Privilege Spec // -------------------- @@ -123,12 +100,6 @@ package riscv; logic wpri0; // writes preserved reads ignored } mstatus_rv_t; - typedef struct packed { - logic [ModeW-1:0] mode; - logic [ASIDW-1:0] asid; - logic [PPNW-1:0] ppn; - } satp_t; - // -------------------- // Instruction Types // -------------------- @@ -324,21 +295,21 @@ package riscv; // ---------------------- // Exception Cause Codes // ---------------------- - localparam logic [XLEN-1:0] INSTR_ADDR_MISALIGNED = 0; - localparam logic [XLEN-1:0] INSTR_ACCESS_FAULT = 1; // Illegal access as governed by PMPs and PMAs - localparam logic [XLEN-1:0] ILLEGAL_INSTR = 2; - localparam logic [XLEN-1:0] BREAKPOINT = 3; - localparam logic [XLEN-1:0] LD_ADDR_MISALIGNED = 4; - localparam logic [XLEN-1:0] LD_ACCESS_FAULT = 5; // Illegal access as governed by PMPs and PMAs - localparam logic [XLEN-1:0] ST_ADDR_MISALIGNED = 6; - localparam logic [XLEN-1:0] ST_ACCESS_FAULT = 7; // Illegal access as governed by PMPs and PMAs - localparam logic [XLEN-1:0] ENV_CALL_UMODE = 8; // environment call from user mode - localparam logic [XLEN-1:0] ENV_CALL_SMODE = 9; // environment call from supervisor mode - localparam logic [XLEN-1:0] ENV_CALL_MMODE = 11; // environment call from machine mode - localparam logic [XLEN-1:0] INSTR_PAGE_FAULT = 12; // Instruction page fault - localparam logic [XLEN-1:0] LOAD_PAGE_FAULT = 13; // Load page fault - localparam logic [XLEN-1:0] STORE_PAGE_FAULT = 15; // Store page fault - localparam logic [XLEN-1:0] DEBUG_REQUEST = 24; // Debug request + localparam logic [31:0] INSTR_ADDR_MISALIGNED = 0; + localparam logic [31:0] INSTR_ACCESS_FAULT = 1; // Illegal access as governed by PMPs and PMAs + localparam logic [31:0] ILLEGAL_INSTR = 2; + localparam logic [31:0] BREAKPOINT = 3; + localparam logic [31:0] LD_ADDR_MISALIGNED = 4; + localparam logic [31:0] LD_ACCESS_FAULT = 5; // Illegal access as governed by PMPs and PMAs + localparam logic [31:0] ST_ADDR_MISALIGNED = 6; + localparam logic [31:0] ST_ACCESS_FAULT = 7; // Illegal access as governed by PMPs and PMAs + localparam logic [31:0] ENV_CALL_UMODE = 8; // environment call from user mode + localparam logic [31:0] ENV_CALL_SMODE = 9; // environment call from supervisor mode + localparam logic [31:0] ENV_CALL_MMODE = 11; // environment call from machine mode + localparam logic [31:0] INSTR_PAGE_FAULT = 12; // Instruction page fault + localparam logic [31:0] LOAD_PAGE_FAULT = 13; // Load page fault + localparam logic [31:0] STORE_PAGE_FAULT = 15; // Store page fault + localparam logic [31:0] DEBUG_REQUEST = 24; // Debug request localparam int unsigned IRQ_S_SOFT = 1; localparam int unsigned IRQ_M_SOFT = 3; @@ -347,19 +318,12 @@ package riscv; localparam int unsigned IRQ_S_EXT = 9; localparam int unsigned IRQ_M_EXT = 11; - localparam logic [XLEN-1:0] MIP_SSIP = 1 << IRQ_S_SOFT; - localparam logic [XLEN-1:0] MIP_MSIP = 1 << IRQ_M_SOFT; - localparam logic [XLEN-1:0] MIP_STIP = 1 << IRQ_S_TIMER; - localparam logic [XLEN-1:0] MIP_MTIP = 1 << IRQ_M_TIMER; - localparam logic [XLEN-1:0] MIP_SEIP = 1 << IRQ_S_EXT; - localparam logic [XLEN-1:0] MIP_MEIP = 1 << IRQ_M_EXT; - - localparam logic [XLEN-1:0] S_SW_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_S_SOFT); - localparam logic [XLEN-1:0] M_SW_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_M_SOFT); - localparam logic [XLEN-1:0] S_TIMER_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_S_TIMER); - localparam logic [XLEN-1:0] M_TIMER_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_M_TIMER); - localparam logic [XLEN-1:0] S_EXT_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_S_EXT); - localparam logic [XLEN-1:0] M_EXT_INTERRUPT = (1 << (XLEN - 1)) | XLEN'(IRQ_M_EXT); + localparam logic [31:0] MIP_SSIP = 1 << IRQ_S_SOFT; + localparam logic [31:0] MIP_MSIP = 1 << IRQ_M_SOFT; + localparam logic [31:0] MIP_STIP = 1 << IRQ_S_TIMER; + localparam logic [31:0] MIP_MTIP = 1 << IRQ_M_TIMER; + localparam logic [31:0] MIP_SEIP = 1 << IRQ_S_EXT; + localparam logic [31:0] MIP_MEIP = 1 << IRQ_M_EXT; // ----- // CSRs @@ -605,41 +569,6 @@ package riscv; CSR_HPM_COUNTER_31H = 12'hC9F // reserved } csr_reg_t; - localparam logic [63:0] SSTATUS_UIE = 'h00000001; - localparam logic [63:0] SSTATUS_SIE = 'h00000002; - localparam logic [63:0] SSTATUS_SPIE = 'h00000020; - localparam logic [63:0] SSTATUS_SPP = 'h00000100; - localparam logic [63:0] SSTATUS_FS = 'h00006000; - localparam logic [63:0] SSTATUS_XS = 'h00018000; - localparam logic [63:0] SSTATUS_SUM = 'h00040000; - localparam logic [63:0] SSTATUS_MXR = 'h00080000; - localparam logic [63:0] SSTATUS_UPIE = 'h00000010; - localparam logic [63:0] SSTATUS_UXL = 64'h0000000300000000; - localparam logic [63:0] SSTATUS_SD = {IS_XLEN64, 31'h00000000, ~IS_XLEN64, 31'h00000000}; - - localparam logic [63:0] MSTATUS_UIE = 'h00000001; - localparam logic [63:0] MSTATUS_SIE = 'h00000002; - localparam logic [63:0] MSTATUS_HIE = 'h00000004; - localparam logic [63:0] MSTATUS_MIE = 'h00000008; - localparam logic [63:0] MSTATUS_UPIE = 'h00000010; - localparam logic [63:0] MSTATUS_SPIE = 'h00000020; - localparam logic [63:0] MSTATUS_HPIE = 'h00000040; - localparam logic [63:0] MSTATUS_MPIE = 'h00000080; - localparam logic [63:0] MSTATUS_SPP = 'h00000100; - localparam logic [63:0] MSTATUS_HPP = 'h00000600; - localparam logic [63:0] MSTATUS_MPP = 'h00001800; - localparam logic [63:0] MSTATUS_FS = 'h00006000; - localparam logic [63:0] MSTATUS_XS = 'h00018000; - localparam logic [63:0] MSTATUS_MPRV = 'h00020000; - localparam logic [63:0] MSTATUS_SUM = 'h00040000; - localparam logic [63:0] MSTATUS_MXR = 'h00080000; - localparam logic [63:0] MSTATUS_TVM = 'h00100000; - localparam logic [63:0] MSTATUS_TW = 'h00200000; - localparam logic [63:0] MSTATUS_TSR = 'h00400000; - localparam logic [63:0] MSTATUS_UXL = {30'h0000000, IS_XLEN64, IS_XLEN64, 32'h00000000}; - localparam logic [63:0] MSTATUS_SXL = {28'h0000000, IS_XLEN64, IS_XLEN64, 34'h00000000}; - localparam logic [63:0] MSTATUS_SD = {IS_XLEN64, 31'h00000000, ~IS_XLEN64, 31'h00000000}; - typedef enum logic [2:0] { CSRRW = 3'h1, CSRRS = 3'h2, diff --git a/core/include/std_cache_pkg.sv b/core/include/std_cache_pkg.sv index 6324d0e94d..276c519bff 100644 --- a/core/include/std_cache_pkg.sv +++ b/core/include/std_cache_pkg.sv @@ -16,13 +16,6 @@ // Description: package for the standard Ariane cache subsystem. package std_cache_pkg; - - // Calculated parameter - localparam DCACHE_BYTE_OFFSET = $clog2(ariane_pkg::DCACHE_LINE_WIDTH / 8); - localparam DCACHE_NUM_WORDS = 2 ** (ariane_pkg::DCACHE_INDEX_WIDTH - DCACHE_BYTE_OFFSET); - localparam DCACHE_DIRTY_WIDTH = ariane_pkg::DCACHE_SET_ASSOC * 2; - // localparam DECISION_BIT = 30; // bit on which to decide whether the request is cache-able or not - typedef struct packed { logic [1:0] id; // id for which we handle the miss logic valid; @@ -59,39 +52,5 @@ package std_cache_pkg; logic valid; logic [63:0] rdata; } bypass_rsp_t; - - typedef struct packed { - logic [ariane_pkg::DCACHE_TAG_WIDTH-1:0] tag; // tag array - logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // data array - logic valid; // state array - logic dirty; // state array - } cache_line_t; - - // cache line byte enable - typedef struct packed { - logic [(ariane_pkg::DCACHE_TAG_WIDTH+7)/8-1:0] tag; // byte enable into tag array - logic [(ariane_pkg::DCACHE_LINE_WIDTH+7)/8-1:0] data; // byte enable into data array - logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] vldrty; // bit enable into state array (valid for a pair of dirty/valid bits) - } cl_be_t; - - // convert one hot to bin for -> needed for cache replacement - function automatic logic [$clog2(ariane_pkg::DCACHE_SET_ASSOC)-1:0] one_hot_to_bin( - input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] in); - for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin - if (in[i]) return i; - end - endfunction - // get the first bit set, returns one hot value - function automatic logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] get_victim_cl( - input logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] valid_dirty); - // one-hot return vector - logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] oh = '0; - for (int unsigned i = 0; i < ariane_pkg::DCACHE_SET_ASSOC; i++) begin - if (valid_dirty[i]) begin - oh[i] = 1'b1; - return oh; - end - end - endfunction endpackage : std_cache_pkg diff --git a/core/include/wt_cache_pkg.sv b/core/include/wt_cache_pkg.sv index 3884a6b08d..22b3693e8e 100644 --- a/core/include/wt_cache_pkg.sv +++ b/core/include/wt_cache_pkg.sv @@ -37,58 +37,20 @@ package wt_cache_pkg; localparam L15_SET_ASSOC = `CONFIG_L15_ASSOCIATIVITY; localparam L15_TLB_CSM_WIDTH = `TLB_CSM_WIDTH; `else - localparam L15_SET_ASSOC = ariane_pkg::DCACHE_SET_ASSOC;// align with dcache for compatibility with the standard Ariane setup localparam L15_TLB_CSM_WIDTH = 33; `endif - localparam L15_TID_WIDTH = ariane_pkg::MEM_TID_WIDTH; - localparam L15_WAY_WIDTH = $clog2(L15_SET_ASSOC); - localparam L1I_WAY_WIDTH = $clog2(ariane_pkg::ICACHE_SET_ASSOC); - localparam L1D_WAY_WIDTH = $clog2(ariane_pkg::DCACHE_SET_ASSOC); // FIFO depths of L15 adapter localparam ADAPTER_REQ_FIFO_DEPTH = 2; localparam ADAPTER_RTRN_FIFO_DEPTH = 2; - - // Calculated parameter - localparam ICACHE_OFFSET_WIDTH = $clog2(ariane_pkg::ICACHE_LINE_WIDTH / 8); - localparam ICACHE_NUM_WORDS = 2 ** (ariane_pkg::ICACHE_INDEX_WIDTH - ICACHE_OFFSET_WIDTH); - localparam ICACHE_CL_IDX_WIDTH = $clog2(ICACHE_NUM_WORDS); // excluding byte offset - - localparam DCACHE_OFFSET_WIDTH = $clog2(ariane_pkg::DCACHE_LINE_WIDTH / 8); - localparam DCACHE_NUM_WORDS = 2 ** (ariane_pkg::DCACHE_INDEX_WIDTH - DCACHE_OFFSET_WIDTH); - localparam DCACHE_CL_IDX_WIDTH = $clog2(DCACHE_NUM_WORDS); // excluding byte offset - - localparam DCACHE_NUM_BANKS = ariane_pkg::DCACHE_LINE_WIDTH / riscv::XLEN; - localparam DCACHE_NUM_BANKS_WIDTH = $clog2(DCACHE_NUM_BANKS); - // write buffer parameterization localparam DCACHE_WBUF_DEPTH = ariane_pkg::WT_DCACHE_WBUF_DEPTH; - localparam DCACHE_MAX_TX = 2 ** L15_TID_WIDTH; - localparam CACHE_ID_WIDTH = L15_TID_WIDTH; - - - typedef struct packed { - logic [ariane_pkg::DCACHE_TAG_WIDTH+(ariane_pkg::DCACHE_INDEX_WIDTH-riscv::XLEN_ALIGN_BYTES)-1:0] wtag; - riscv::xlen_t data; - logic [ariane_pkg::DCACHE_USER_WIDTH-1:0] user; - logic [(riscv::XLEN/8)-1:0] dirty; // byte is dirty - logic [(riscv::XLEN/8)-1:0] valid; // byte is valid - logic [(riscv::XLEN/8)-1:0] txblock; // byte is part of transaction in-flight - logic checked; // if cache state of this word has been checked - logic [ariane_pkg::DCACHE_SET_ASSOC-1:0] hit_oh; // valid way in the cache - } wbuffer_t; // TX status registers are indexed with the transaction ID // they basically store which bytes from which buffer entry are part // of that transaction - typedef struct packed { - logic vld; - logic [(riscv::XLEN/8)-1:0] be; - logic [$clog2(DCACHE_WBUF_DEPTH)-1:0] ptr; - } tx_stat_t; - // local interfaces between caches and L15 adapter typedef enum logic [1:0] { DCACHE_STORE_REQ, @@ -110,58 +72,6 @@ package wt_cache_pkg; ICACHE_IFILL_ACK } icache_in_t; - // icache interface - typedef struct packed { - logic vld; // invalidate only affected way - logic all; // invalidate all ways - logic [ariane_pkg::ICACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate - logic [L1I_WAY_WIDTH-1:0] way; // way to invalidate - } icache_inval_t; - - typedef struct packed { - logic [$clog2(ariane_pkg::ICACHE_SET_ASSOC)-1:0] way; // way to replace - logic [riscv::PLEN-1:0] paddr; // physical address - logic nc; // noncacheable - logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) - } icache_req_t; - - typedef struct packed { - icache_in_t rtype; // see definitions above - logic [ariane_pkg::ICACHE_LINE_WIDTH-1:0] data; // full cache line width - logic [ariane_pkg::ICACHE_USER_LINE_WIDTH-1:0] user; // user bits - icache_inval_t inv; // invalidation vector - logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) - } icache_rtrn_t; - - // dcache interface - typedef struct packed { - logic vld; // invalidate only affected way - logic all; // invalidate all ways - logic [ariane_pkg::DCACHE_INDEX_WIDTH-1:0] idx; // physical address to invalidate - logic [L15_WAY_WIDTH-1:0] way; // way to invalidate - } dcache_inval_t; - - typedef struct packed { - dcache_out_t rtype; // see definitions above - logic [2:0] size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte) - logic [L1D_WAY_WIDTH-1:0] way; // way to replace - logic [riscv::PLEN-1:0] paddr; // physical address - riscv::xlen_t data; // word width of processor (no block stores at the moment) - logic [ariane_pkg::DATA_USER_WIDTH-1:0] user; // user width of processor (no block stores at the moment) - logic nc; // noncacheable - logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) - ariane_pkg::amo_t amo_op; // amo opcode - } dcache_req_t; - - typedef struct packed { - dcache_in_t rtype; // see definitions above - logic [ariane_pkg::DCACHE_LINE_WIDTH-1:0] data; // full cache line width - logic [ariane_pkg::DCACHE_USER_LINE_WIDTH-1:0] user; // user bits - dcache_inval_t inv; // invalidation vector - logic [CACHE_ID_WIDTH-1:0] tid; // threadi id (used as transaction id in Ariane) - } dcache_rtrn_t; - - // taken from iop.h in openpiton // to l1.5 (only marked subset is used) typedef enum logic [4:0] { @@ -201,53 +111,6 @@ package wt_cache_pkg; L15_CPX_RESTYPE_ATOMIC_RES = 4'b1110 // custom type for atomic responses } l15_rtrntypes_t; - - typedef struct packed { - logic l15_val; // valid signal, asserted with request - logic l15_req_ack; // ack for response - l15_reqtypes_t l15_rqtype; // see below for encoding - logic l15_nc; // non-cacheable bit - logic [2:0] l15_size; // transaction size: 000=Byte 001=2Byte; 010=4Byte; 011=8Byte; 111=Cache line (16/32Byte) - logic [L15_TID_WIDTH-1:0] l15_threadid; // currently 0 or 1 - logic l15_prefetch; // unused in openpiton - logic l15_invalidate_cacheline; // unused by Ariane as L1 has no ECC at the moment - logic l15_blockstore; // unused in openpiton - logic l15_blockinitstore; // unused in openpiton - logic [L15_WAY_WIDTH-1:0] l15_l1rplway; // way to replace - logic [39:0] l15_address; // physical address - logic [63:0] l15_data; // word to write - logic [63:0] l15_data_next_entry; // unused in Ariane (only used for CAS atomic requests) - logic [L15_TLB_CSM_WIDTH-1:0] l15_csm_data; // unused in Ariane - logic [3:0] l15_amo_op; // atomic operation type - } l15_req_t; - - typedef struct packed { - logic l15_ack; // ack for request struct - logic l15_header_ack; // ack for request struct - logic l15_val; // valid signal for return struct - l15_rtrntypes_t l15_returntype; // see below for encoding - logic l15_l2miss; // unused in Ariane - logic [1:0] l15_error; // unused in openpiton - logic l15_noncacheable; // non-cacheable bit - logic l15_atomic; // asserted in load return and store ack packets of atomic tx - logic [L15_TID_WIDTH-1:0] l15_threadid; // used as transaction ID - logic l15_prefetch; // unused in openpiton - logic l15_f4b; // 4byte instruction fill from I/O space (nc). - logic [63:0] l15_data_0; // used for both caches - logic [63:0] l15_data_1; // used for both caches - logic [63:0] l15_data_2; // currently only used for I$ - logic [63:0] l15_data_3; // currently only used for I$ - logic l15_inval_icache_all_way; // invalidate all ways - logic l15_inval_dcache_all_way; // unused in openpiton - logic [15:4] l15_inval_address_15_4; // invalidate selected cacheline - logic l15_cross_invalidate; // unused in openpiton - logic [L15_WAY_WIDTH-1:0] l15_cross_invalidate_way; // unused in openpiton - logic l15_inval_dcache_inval; // invalidate selected cacheline and way - logic l15_inval_icache_inval; // unused in openpiton - logic [L15_WAY_WIDTH-1:0] l15_inval_way; // way to invalidate - logic l15_blockinitstore; // unused in openpiton - } l15_rtrn_t; - // swap endianess in a 64bit word function automatic logic [63:0] swendian64(input logic [63:0] in); automatic logic [63:0] out; @@ -265,53 +128,6 @@ package wt_cache_pkg; return cnt; endfunction : popcnt64 - function automatic logic [(riscv::XLEN/8)-1:0] to_byte_enable8(input logic [riscv::XLEN_ALIGN_BYTES-1:0] offset, input logic [1:0] size); - logic [(riscv::XLEN/8)-1:0] be; - be = '0; - unique case (size) - 2'b00: be[offset] = '1; // byte - 2'b01: be[offset+:2] = '1; // hword - 2'b10: be[offset+:4] = '1; // word - default: be = '1; // dword - endcase // size - return be; - endfunction : to_byte_enable8 - - function automatic logic [(riscv::XLEN/8)-1:0] to_byte_enable4(input logic [riscv::XLEN_ALIGN_BYTES-1:0] offset, input logic [1:0] size); - logic [3:0] be; - be = '0; - unique case (size) - 2'b00: be[offset] = '1; // byte - 2'b01: be[offset+:2] = '1; // hword - default: be = '1; // word - endcase // size - return be; - endfunction : to_byte_enable4 - - // openpiton requires the data to be replicated in case of smaller sizes than dwords - function automatic riscv::xlen_t repData64(input riscv::xlen_t data, input logic [riscv::XLEN_ALIGN_BYTES-1:0] offset, - input logic [1:0] size); - riscv::xlen_t out; - unique case (size) - 2'b00: for (int k = 0; k < 8; k++) out[k*8+:8] = data[offset*8+:8]; // byte - 2'b01: for (int k = 0; k < 4; k++) out[k*16+:16] = data[offset*8+:16]; // hword - 2'b10: for (int k = 0; k < 2; k++) out[k*32+:32] = data[offset*8+:32]; // word - default: out = data; // dword - endcase // size - return out; - endfunction : repData64 - - function automatic riscv::xlen_t repData32(input riscv::xlen_t data, input logic [riscv::XLEN_ALIGN_BYTES-1:0] offset, - input logic [1:0] size); - riscv::xlen_t out; - unique case (size) - 2'b00: for (int k = 0; k < 4; k++) out[k*8+:8] = data[offset*8+:8]; // byte - 2'b01: for (int k = 0; k < 2; k++) out[k*16+:16] = data[offset*8+:16]; // hword - default: out = data; // word - endcase // size - return out; - endfunction : repData32 - // note: this is openpiton specific. cannot transmit unaligned words. // hence we default to individual bytes in that case, and they have to be transmitted // one after the other diff --git a/core/instr_realign.sv b/core/instr_realign.sv index c4273295cd..a28fa0845d 100644 --- a/core/instr_realign.sv +++ b/core/instr_realign.sv @@ -30,16 +30,16 @@ module instr_realign input logic flush_i, input logic valid_i, output logic serving_unaligned_o, // we have an unaligned instruction in [0] - input logic [riscv::VLEN-1:0] address_i, - input logic [FETCH_WIDTH-1:0] data_i, - output logic [INSTR_PER_FETCH-1:0] valid_o, - output logic [INSTR_PER_FETCH-1:0][riscv::VLEN-1:0] addr_o, - output logic [INSTR_PER_FETCH-1:0][31:0] instr_o + input logic [CVA6Cfg.VLEN-1:0] address_i, + input logic [CVA6Cfg.FETCH_WIDTH-1:0] data_i, + output logic [CVA6Cfg.INSTR_PER_FETCH-1:0] valid_o, + output logic [CVA6Cfg.INSTR_PER_FETCH-1:0][CVA6Cfg.VLEN-1:0] addr_o, + output logic [CVA6Cfg.INSTR_PER_FETCH-1:0][31:0] instr_o ); // as a maximum we support a fetch width of 64-bit, hence there can be 4 compressed instructions logic [3:0] instr_is_compressed; - for (genvar i = 0; i < INSTR_PER_FETCH; i++) begin + for (genvar i = 0; i < CVA6Cfg.INSTR_PER_FETCH; i++) begin // LSB != 2'b11 assign instr_is_compressed[i] = ~&data_i[i*16+:2]; end @@ -49,15 +49,15 @@ module instr_realign // the last instruction was unaligned logic unaligned_d, unaligned_q; // register to save the unaligned address - logic [riscv::VLEN-1:0] unaligned_address_d, unaligned_address_q; + logic [CVA6Cfg.VLEN-1:0] unaligned_address_d, unaligned_address_q; // we have an unaligned instruction assign serving_unaligned_o = unaligned_q; // Instruction re-alignment - if (FETCH_WIDTH == 32) begin : realign_bp_32 + if (CVA6Cfg.FETCH_WIDTH == 32) begin : realign_bp_32 always_comb begin : re_align unaligned_d = unaligned_q; - unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10}; + unaligned_address_d = {address_i[CVA6Cfg.VLEN-1:2], 2'b10}; unaligned_instr_d = data_i[31:16]; valid_o[0] = valid_i; @@ -66,7 +66,7 @@ module instr_realign valid_o[1] = 1'b0; instr_o[1] = '0; - addr_o[1] = {address_i[riscv::VLEN-1:2], 2'b10}; + addr_o[1] = {address_i[CVA6Cfg.VLEN-1:2], 2'b10}; // this instruction is compressed or the last instruction was unaligned if (instr_is_compressed[0] || unaligned_q) begin @@ -84,7 +84,7 @@ module instr_realign // save the upper bits for next cycle unaligned_d = 1'b1; unaligned_instr_d = data_i[31:16]; - unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10}; + unaligned_address_d = {address_i[CVA6Cfg.VLEN-1:2], 2'b10}; end end // else -> normal fetch @@ -95,7 +95,7 @@ module instr_realign if (!instr_is_compressed[0]) begin valid_o = '0; unaligned_d = 1'b1; - unaligned_address_d = {address_i[riscv::VLEN-1:2], 2'b10}; + unaligned_address_d = {address_i[CVA6Cfg.VLEN-1:2], 2'b10}; unaligned_instr_d = data_i[15:0]; // the instruction isn't compressed but only the lower is ready end else begin @@ -104,7 +104,7 @@ module instr_realign end end // TODO(zarubaf): Fix 64 bit FETCH_WIDTH, maybe generalize to arbitrary fetch width - end else if (FETCH_WIDTH == 64) begin : realign_bp_64 + end else if (CVA6Cfg.FETCH_WIDTH == 64) begin : realign_bp_64 initial begin $error("Not propperly implemented"); end @@ -120,13 +120,13 @@ module instr_realign addr_o[0] = address_i; instr_o[1] = '0; - addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b010}; + addr_o[1] = {address_i[CVA6Cfg.VLEN-1:3], 3'b010}; instr_o[2] = {16'b0, data_i[47:32]}; - addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b100}; + addr_o[2] = {address_i[CVA6Cfg.VLEN-1:3], 3'b100}; instr_o[3] = {16'b0, data_i[63:48]}; - addr_o[3] = {address_i[riscv::VLEN-1:3], 3'b110}; + addr_o[3] = {address_i[CVA6Cfg.VLEN-1:3], 3'b110}; // last instruction was unaligned if (unaligned_q) begin @@ -162,7 +162,7 @@ module instr_realign end else begin instr_o[1] = data_i[47:16]; valid_o[1] = valid_i; - addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110}; + addr_o[2] = {address_i[CVA6Cfg.VLEN-1:3], 3'b110}; if (instr_is_compressed[2]) begin unaligned_d = 1'b0; instr_o[2] = {16'b0, data_i[63:48]}; @@ -201,7 +201,7 @@ module instr_realign end else begin instr_o[1] = data_i[47:16]; valid_o[1] = valid_i; - addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110}; + addr_o[2] = {address_i[CVA6Cfg.VLEN-1:3], 3'b110}; if (instr_is_compressed[3]) begin instr_o[2] = data_i[63:48]; valid_o[2] = valid_i; @@ -219,12 +219,12 @@ module instr_realign // | * | C | C | I | // | * | I | I | end else begin - addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100}; + addr_o[1] = {address_i[CVA6Cfg.VLEN-1:3], 3'b100}; if (instr_is_compressed[2]) begin instr_o[1] = {16'b0, data_i[47:32]}; valid_o[1] = valid_i; - addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110}; + addr_o[2] = {address_i[CVA6Cfg.VLEN-1:3], 3'b110}; if (instr_is_compressed[3]) begin // | * | C | C | I | valid_o[2] = valid_i; @@ -257,7 +257,7 @@ module instr_realign // | * | I | C | x -> aligned // | I | C | C | x -> again unaligned // | * | C | C | C | x -> aligned - addr_o[0] = {address_i[riscv::VLEN-1:3], 3'b010}; + addr_o[0] = {address_i[CVA6Cfg.VLEN-1:3], 3'b010}; if (instr_is_compressed[1]) begin instr_o[0] = {16'b0, data_i[31:16]}; @@ -266,10 +266,10 @@ module instr_realign if (instr_is_compressed[2]) begin valid_o[1] = valid_i; instr_o[1] = {16'b0, data_i[47:32]}; - addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100}; + addr_o[1] = {address_i[CVA6Cfg.VLEN-1:3], 3'b100}; if (instr_is_compressed[3]) begin instr_o[2] = {16'b0, data_i[63:48]}; - addr_o[2] = {address_i[riscv::VLEN-1:3], 3'b110}; + addr_o[2] = {address_i[CVA6Cfg.VLEN-1:3], 3'b110}; valid_o[2] = valid_i; end else begin // this instruction is unaligned @@ -279,14 +279,14 @@ module instr_realign end end else begin instr_o[1] = data_i[63:32]; - addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b100}; + addr_o[1] = {address_i[CVA6Cfg.VLEN-1:3], 3'b100}; valid_o[1] = valid_i; end // instruction 1 is not compressed -> check slot 3 end else begin instr_o[0] = data_i[47:16]; valid_o[0] = valid_i; - addr_o[1] = {address_i[riscv::VLEN-1:3], 3'b110}; + addr_o[1] = {address_i[CVA6Cfg.VLEN-1:3], 3'b110}; if (instr_is_compressed[3]) begin instr_o[1] = data_i[63:48]; valid_o[1] = valid_i; @@ -314,7 +314,7 @@ module instr_realign // regular instruction -> unaligned end else begin unaligned_d = 1'b1; - unaligned_address_d = {address_i[riscv::VLEN-1:3], 3'b110}; + unaligned_address_d = {address_i[CVA6Cfg.VLEN-1:3], 3'b110}; unaligned_instr_d = data_i[63:48]; end // instruction is a regular instruction @@ -330,7 +330,7 @@ module instr_realign valid_o = '0; if (!instr_is_compressed[3]) begin unaligned_d = 1'b1; - unaligned_address_d = {address_i[riscv::VLEN-1:3], 3'b110}; + unaligned_address_d = {address_i[CVA6Cfg.VLEN-1:3], 3'b110}; unaligned_instr_d = data_i[63:48]; end else begin valid_o[3] = valid_i; diff --git a/core/issue_read_operands.sv b/core/issue_read_operands.sv index c28e826627..7dd7605168 100644 --- a/core/issue_read_operands.sv +++ b/core/issue_read_operands.sv @@ -18,6 +18,9 @@ module issue_read_operands import ariane_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type branchpredict_sbe_t = logic, + parameter type fu_data_t = logic, + parameter type scoreboard_entry_t = logic, parameter type rs3_len_t = logic ) ( input logic clk_i, // Clock @@ -32,10 +35,10 @@ module issue_read_operands output logic issue_ack_o, // lookup rd in scoreboard output logic [REG_ADDR_SIZE-1:0] rs1_o, - input riscv::xlen_t rs1_i, + input logic [CVA6Cfg.XLEN-1:0] rs1_i, input logic rs1_valid_i, output logic [REG_ADDR_SIZE-1:0] rs2_o, - input riscv::xlen_t rs2_i, + input logic [CVA6Cfg.XLEN-1:0] rs2_i, input logic rs2_valid_i, output logic [REG_ADDR_SIZE-1:0] rs3_o, input rs3_len_t rs3_i, @@ -45,9 +48,9 @@ module issue_read_operands input fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_i, // To FU, just single issue for now output fu_data_t fu_data_o, - output riscv::xlen_t rs1_forwarding_o, // unregistered version of fu_data_o.operanda - output riscv::xlen_t rs2_forwarding_o, // unregistered version of fu_data_o.operandb - output logic [riscv::VLEN-1:0] pc_o, + output logic [CVA6Cfg.XLEN-1:0] rs1_forwarding_o, // unregistered version of fu_data_o.operanda + output logic [CVA6Cfg.XLEN-1:0] rs2_forwarding_o, // unregistered version of fu_data_o.operandb + output logic [CVA6Cfg.VLEN-1:0] pc_o, output logic is_compressed_instr_o, // ALU 1 input logic flu_ready_i, // Fixed latency unit ready to accept a new request @@ -73,7 +76,7 @@ module issue_read_operands output logic [31:0] cvxif_off_instr_o, // commit port input logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr_i, - input logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i, + input logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.XLEN-1:0] wdata_i, input logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_i, input logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_i, @@ -85,10 +88,10 @@ module issue_read_operands ); logic stall; logic fu_busy; // functional unit is busy - riscv::xlen_t operand_a_regfile, operand_b_regfile; // operands coming from regfile + logic [CVA6Cfg.XLEN-1:0] operand_a_regfile, operand_b_regfile; // operands coming from regfile rs3_len_t operand_c_regfile, operand_c_fpr, operand_c_gpr; // third operand from fp regfile or gp regfile if NR_RGPR_PORTS == 3 // output flipflop (ID <-> EX) - riscv::xlen_t operand_a_n, operand_a_q, operand_b_n, operand_b_q, imm_n, imm_q, imm_forward_rs3; + logic [CVA6Cfg.XLEN-1:0] operand_a_n, operand_a_q, operand_b_n, operand_b_q, imm_n, imm_q, imm_forward_rs3; logic alu_valid_q; logic mult_valid_q; @@ -101,7 +104,7 @@ module issue_read_operands logic cvxif_valid_q; logic [31:0] cvxif_off_instr_q; - logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; fu_op operator_n, operator_q; // operation to perform fu_t fu_n, fu_q; // functional unit to use @@ -114,8 +117,8 @@ module issue_read_operands // ID <-> EX registers - assign rs1_forwarding_o = operand_a_n[riscv::VLEN-1:0]; //forwarding or unregistered rs1 value - assign rs2_forwarding_o = operand_b_n[riscv::VLEN-1:0]; //forwarding or unregistered rs2 value + assign rs1_forwarding_o = operand_a_n[CVA6Cfg.VLEN-1:0]; //forwarding or unregistered rs1 value + assign rs2_forwarding_o = operand_b_n[CVA6Cfg.VLEN-1:0]; //forwarding or unregistered rs2 value assign fu_data_o.operand_a = operand_a_q; assign fu_data_o.operand_b = operand_b_q; @@ -225,7 +228,7 @@ module issue_read_operands if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_rs3 assign imm_forward_rs3 = rs3_i; end else begin : gen_fp_rs3 - assign imm_forward_rs3 = {{riscv::XLEN-CVA6Cfg.FLen{1'b0}}, rs3_i}; + assign imm_forward_rs3 = {{CVA6Cfg.XLEN-CVA6Cfg.FLen{1'b0}}, rs3_i}; end // Forwarding/Output MUX @@ -237,11 +240,11 @@ module issue_read_operands // for FP operations, the imm field can also be the third operand from the regfile if (CVA6Cfg.NrRgprPorts == 3) begin imm_n = (CVA6Cfg.FpPresent && is_imm_fpr(issue_instr_i.op)) ? - {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, operand_c_regfile} : + {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, operand_c_regfile} : issue_instr_i.op == OFFLOAD ? operand_c_regfile : issue_instr_i.result; end else begin imm_n = (CVA6Cfg.FpPresent && is_imm_fpr(issue_instr_i.op)) ? - {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, operand_c_regfile} : issue_instr_i.result; + {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, operand_c_regfile} : issue_instr_i.result; end trans_id_n = issue_instr_i.trans_id; fu_n = issue_instr_i.fu; @@ -262,14 +265,14 @@ module issue_read_operands // use the PC as operand a if (issue_instr_i.use_pc) begin operand_a_n = { - {riscv::XLEN - riscv::VLEN{issue_instr_i.pc[riscv::VLEN-1]}}, issue_instr_i.pc + {CVA6Cfg.XLEN - CVA6Cfg.VLEN{issue_instr_i.pc[CVA6Cfg.VLEN-1]}}, issue_instr_i.pc }; end // use the zimm as operand a if (issue_instr_i.use_zimm) begin // zero extend operand a - operand_a_n = {{riscv::XLEN - 5{1'b0}}, issue_instr_i.rs1[4:0]}; + operand_a_n = {{CVA6Cfg.XLEN - 5{1'b0}}, issue_instr_i.rs1[4:0]}; end // or is it an immediate (including PC), this is not the case for a store, control flow, and accelerator instructions // also make sure operand B is not already used as an FP operand @@ -431,12 +434,12 @@ module issue_read_operands // ---------------------- // Integer Register File // ---------------------- - logic [ CVA6Cfg.NrRgprPorts-1:0][riscv::XLEN-1:0] rdata; + logic [ CVA6Cfg.NrRgprPorts-1:0][CVA6Cfg.XLEN-1:0] rdata; logic [ CVA6Cfg.NrRgprPorts-1:0][ 4:0] raddr_pack; // pack signals logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_pack; - logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_pack; + logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.XLEN-1:0] wdata_pack; logic [CVA6Cfg.NrCommitPorts-1:0] we_pack; if (CVA6Cfg.NrRgprPorts == 3) begin : gen_rs3 @@ -450,10 +453,10 @@ module issue_read_operands assign wdata_pack[i] = wdata_i[i]; assign we_pack[i] = we_gpr_i[i]; end - if (ariane_pkg::FPGA_EN) begin : gen_fpga_regfile + if (CVA6Cfg.FPGA_EN) begin : gen_fpga_regfile ariane_regfile_fpga #( .CVA6Cfg (CVA6Cfg), - .DATA_WIDTH (riscv::XLEN), + .DATA_WIDTH (CVA6Cfg.XLEN), .NR_READ_PORTS(CVA6Cfg.NrRgprPorts), .ZERO_REG_ZERO(1) ) i_ariane_regfile_fpga ( @@ -468,7 +471,7 @@ module issue_read_operands end else begin : gen_asic_regfile ariane_regfile #( .CVA6Cfg (CVA6Cfg), - .DATA_WIDTH (riscv::XLEN), + .DATA_WIDTH (CVA6Cfg.XLEN), .NR_READ_PORTS(CVA6Cfg.NrRgprPorts), .ZERO_REG_ZERO(1) ) i_ariane_regfile ( @@ -489,7 +492,7 @@ module issue_read_operands // pack signals logic [2:0][4:0] fp_raddr_pack; - logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] fp_wdata_pack; + logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.XLEN-1:0] fp_wdata_pack; generate if (CVA6Cfg.FpPresent) begin : float_regfile_gen @@ -499,7 +502,7 @@ module issue_read_operands for (genvar i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin : gen_fp_wdata_pack assign fp_wdata_pack[i] = {wdata_i[i][CVA6Cfg.FLen-1:0]}; end - if (ariane_pkg::FPGA_EN) begin : gen_fpga_fp_regfile + if (CVA6Cfg.FPGA_EN) begin : gen_fpga_fp_regfile ariane_regfile_fpga #( .CVA6Cfg (CVA6Cfg), .DATA_WIDTH (CVA6Cfg.FLen), @@ -536,7 +539,7 @@ module issue_read_operands endgenerate if (CVA6Cfg.NrRgprPorts == 3) begin : gen_operand_c - assign operand_c_fpr = {{riscv::XLEN-CVA6Cfg.FLen{1'b0}}, fprdata[2]}; + assign operand_c_fpr = {{CVA6Cfg.XLEN-CVA6Cfg.FLen{1'b0}}, fprdata[2]}; assign operand_c_gpr = rdata[2]; end else begin assign operand_c_fpr = fprdata[2]; @@ -544,10 +547,10 @@ module issue_read_operands assign operand_a_regfile = (CVA6Cfg.FpPresent && is_rs1_fpr( issue_instr_i.op - )) ? {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[0]} : rdata[0]; + )) ? {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[0]} : rdata[0]; assign operand_b_regfile = (CVA6Cfg.FpPresent && is_rs2_fpr( issue_instr_i.op - )) ? {{riscv::XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[1]} : rdata[1]; + )) ? {{CVA6Cfg.XLEN - CVA6Cfg.FLen{1'b0}}, fprdata[1]} : rdata[1]; assign operand_c_regfile = (CVA6Cfg.NrRgprPorts == 3) ? ((CVA6Cfg.FpPresent && is_imm_fpr(issue_instr_i.op)) ? operand_c_fpr : operand_c_gpr) : operand_c_fpr; @@ -564,7 +567,7 @@ module issue_read_operands trans_id_q <= '0; pc_o <= '0; is_compressed_instr_o <= 1'b0; - branch_predict_o <= {cf_t'(0), {riscv::VLEN{1'b0}}}; + branch_predict_o <= {cf_t'(0), {CVA6Cfg.VLEN{1'b0}}}; end else begin operand_a_q <= operand_a_n; operand_b_q <= operand_b_n; diff --git a/core/issue_stage.sv b/core/issue_stage.sv index 5b1241466b..614c2c08ae 100644 --- a/core/issue_stage.sv +++ b/core/issue_stage.sv @@ -18,6 +18,11 @@ module issue_stage import ariane_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type exception_t = logic, + parameter type bp_resolve_t = logic, + parameter type branchpredict_sbe_t = logic, + parameter type fu_data_t = logic, + parameter type scoreboard_entry_t = logic, parameter bit IsRVFI = bit'(0), parameter int unsigned NR_ENTRIES = 8 ) ( @@ -34,10 +39,10 @@ module issue_stage input logic is_ctrl_flow_i, output logic decoded_instr_ack_o, // to EX - output [riscv::VLEN-1:0] rs1_forwarding_o, // unregistered version of fu_data_o.operanda - output [riscv::VLEN-1:0] rs2_forwarding_o, // unregistered version of fu_data_o.operandb + output [CVA6Cfg.VLEN-1:0] rs1_forwarding_o, // unregistered version of fu_data_o.operanda + output [CVA6Cfg.VLEN-1:0] rs2_forwarding_o, // unregistered version of fu_data_o.operandb output fu_data_t fu_data_o, - output logic [riscv::VLEN-1:0] pc_o, + output logic [CVA6Cfg.VLEN-1:0] pc_o, output logic is_compressed_instr_o, input logic flu_ready_i, output logic alu_valid_o, @@ -70,16 +75,16 @@ module issue_stage output logic issue_instr_hs_o, // write back port - input logic [CVA6Cfg.NrWbPorts-1:0][TRANS_ID_BITS-1:0] trans_id_i, + input logic [CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.TRANS_ID_BITS-1:0] trans_id_i, input bp_resolve_t resolved_branch_i, - input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i, + input logic [CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.XLEN-1:0] wbdata_i, input exception_t [CVA6Cfg.NrWbPorts-1:0] ex_ex_i, // exception from execute stage or CVXIF offloaded instruction input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i, input logic x_we_i, // commit port input logic [CVA6Cfg.NrCommitPorts-1:0][ 4:0] waddr_i, - input logic [CVA6Cfg.NrCommitPorts-1:0][riscv::XLEN-1:0] wdata_i, + input logic [CVA6Cfg.NrCommitPorts-1:0][CVA6Cfg.XLEN-1:0] wdata_i, input logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr_i, input logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr_i, @@ -89,25 +94,25 @@ module issue_stage output logic stall_issue_o, // Used in Performance Counters //RVFI - input [ riscv::VLEN-1:0] lsu_addr_i, - input [ (riscv::XLEN/8)-1:0] lsu_rmask_i, - input [ (riscv::XLEN/8)-1:0] lsu_wmask_i, - input [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_i + input [ CVA6Cfg.VLEN-1:0] lsu_addr_i, + input [ (CVA6Cfg.XLEN/8)-1:0] lsu_rmask_i, + input [ (CVA6Cfg.XLEN/8)-1:0] lsu_wmask_i, + input [CVA6Cfg.TRANS_ID_BITS-1:0] lsu_addr_trans_id_i ); // --------------------------------------------------- // Scoreboard (SB) <-> Issue and Read Operands (IRO) // --------------------------------------------------- - typedef logic [(CVA6Cfg.NrRgprPorts == 3 ? riscv::XLEN : CVA6Cfg.FLen)-1:0] rs3_len_t; + typedef logic [(CVA6Cfg.NrRgprPorts == 3 ? CVA6Cfg.XLEN : CVA6Cfg.FLen)-1:0] rs3_len_t; fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_gpr_sb_iro; fu_t [2**REG_ADDR_SIZE-1:0] rd_clobber_fpr_sb_iro; logic [ REG_ADDR_SIZE-1:0] rs1_iro_sb; - riscv::xlen_t rs1_sb_iro; + logic [CVA6Cfg.XLEN-1:0] rs1_sb_iro; logic rs1_valid_sb_iro; logic [ REG_ADDR_SIZE-1:0] rs2_iro_sb; - riscv::xlen_t rs2_sb_iro; + logic [CVA6Cfg.XLEN-1:0] rs2_sb_iro; logic rs2_valid_iro_sb; logic [ REG_ADDR_SIZE-1:0] rs3_iro_sb; @@ -118,11 +123,11 @@ module issue_stage logic issue_instr_valid_sb_iro; logic issue_ack_iro_sb; - riscv::xlen_t rs1_forwarding_xlen; - riscv::xlen_t rs2_forwarding_xlen; + logic [CVA6Cfg.XLEN-1:0] rs1_forwarding_xlen; + logic [CVA6Cfg.XLEN-1:0] rs2_forwarding_xlen; - assign rs1_forwarding_o = rs1_forwarding_xlen[riscv::VLEN-1:0]; - assign rs2_forwarding_o = rs2_forwarding_xlen[riscv::VLEN-1:0]; + assign rs1_forwarding_o = rs1_forwarding_xlen[CVA6Cfg.VLEN-1:0]; + assign rs2_forwarding_o = rs2_forwarding_xlen[CVA6Cfg.VLEN-1:0]; assign issue_instr_o = issue_instr_sb_iro; assign issue_instr_hs_o = issue_instr_valid_sb_iro & issue_ack_iro_sb; @@ -133,6 +138,9 @@ module issue_stage // --------------------------------------------------------- scoreboard #( .CVA6Cfg (CVA6Cfg), + .exception_t(exception_t), + .bp_resolve_t(bp_resolve_t), + .scoreboard_entry_t(scoreboard_entry_t), .IsRVFI (IsRVFI), .rs3_len_t (rs3_len_t), .NR_ENTRIES(NR_ENTRIES) @@ -176,6 +184,9 @@ module issue_stage // --------------------------------------------------------- issue_read_operands #( .CVA6Cfg (CVA6Cfg), + .branchpredict_sbe_t(branchpredict_sbe_t), + .fu_data_t(fu_data_t), + .scoreboard_entry_t(scoreboard_entry_t), .rs3_len_t(rs3_len_t) ) i_issue_read_operands ( .flush_i (flush_unissued_instr_i), diff --git a/core/load_store_unit.sv b/core/load_store_unit.sv index 94e1c4f119..88e6cc9055 100644 --- a/core/load_store_unit.sv +++ b/core/load_store_unit.sv @@ -17,7 +17,24 @@ module load_store_unit import ariane_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, - parameter int unsigned ASID_WIDTH = 1 + parameter type exception_t = logic, + parameter type fu_data_t = logic, + parameter type icache_areq_t = logic, + parameter type icache_arsp_t = logic, + parameter type icache_dreq_t = logic, + parameter type icache_drsp_t = logic, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic, + parameter type lsu_ctrl_t = struct packed { + logic valid; + logic [CVA6Cfg.VLEN-1:0] vaddr; + logic overflow; + logic [CVA6Cfg.XLEN-1:0] data; + logic [(CVA6Cfg.XLEN/8)-1:0] be; + fu_t fu; + fu_op operation; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id; + } ) ( input logic clk_i, input logic rst_ni, @@ -30,19 +47,19 @@ module load_store_unit output logic lsu_ready_o, // FU is ready e.g. not busy input logic lsu_valid_i, // Input is valid - output logic [TRANS_ID_BITS-1:0] load_trans_id_o, // ID of scoreboard entry at which to write back - output riscv::xlen_t load_result_o, + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] load_trans_id_o, // ID of scoreboard entry at which to write back + output logic [CVA6Cfg.XLEN-1:0] load_result_o, output logic load_valid_o, output exception_t load_exception_o, // to WB, signal exception status LD exception - output logic [TRANS_ID_BITS-1:0] store_trans_id_o, // ID of scoreboard entry at which to write back - output riscv::xlen_t store_result_o, + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] store_trans_id_o, // ID of scoreboard entry at which to write back + output logic [CVA6Cfg.XLEN-1:0] store_result_o, output logic store_valid_o, output exception_t store_exception_o, // to WB, signal exception status ST exception input logic commit_i, // commit the pending store output logic commit_ready_o, // commit queue is ready to accept another commit request - input logic [TRANS_ID_BITS-1:0] commit_tran_id_i, + input logic [CVA6Cfg.TRANS_ID_BITS-1:0] commit_tran_id_i, input logic enable_translation_i, // enable virtual memory translation input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores @@ -55,10 +72,10 @@ module load_store_unit input riscv::priv_lvl_t ld_st_priv_lvl_i, // From CSR register file input logic sum_i, // From CSR register file input logic mxr_i, // From CSR register file - input logic [riscv::PPNW-1:0] satp_ppn_i, // From CSR register file - input logic [ ASID_WIDTH-1:0] asid_i, // From CSR register file - input logic [ ASID_WIDTH-1:0] asid_to_be_flushed_i, - input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + input logic [CVA6Cfg.PPNW-1:0] satp_ppn_i, // From CSR register file + input logic [ CVA6Cfg.ASID_WIDTH-1:0] asid_i, // From CSR register file + input logic [ CVA6Cfg.ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [CVA6Cfg.VLEN-1:0] vaddr_to_be_flushed_i, input logic flush_tlb_i, // Performance counters output logic itlb_miss_o, @@ -74,14 +91,14 @@ module load_store_unit input amo_resp_t amo_resp_i, // PMP input riscv::pmpcfg_t [15:0] pmpcfg_i, - input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, + input logic [15:0][CVA6Cfg.PLEN-3:0] pmpaddr_i, //RVFI - output [ riscv::VLEN-1:0] lsu_addr_o, - output [ riscv::PLEN-1:0] mem_paddr_o, - output [ (riscv::XLEN/8)-1:0] lsu_rmask_o, - output [ (riscv::XLEN/8)-1:0] lsu_wmask_o, - output [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_o + output [ CVA6Cfg.VLEN-1:0] lsu_addr_o, + output [ CVA6Cfg.PLEN-1:0] mem_paddr_o, + output [ (CVA6Cfg.XLEN/8)-1:0] lsu_rmask_o, + output [ (CVA6Cfg.XLEN/8)-1:0] lsu_wmask_o, + output [CVA6Cfg.TRANS_ID_BITS-1:0] lsu_addr_trans_id_o ); // data is misaligned logic data_misaligned; @@ -99,36 +116,36 @@ module load_store_unit // Address Generation Unit (AGU) // ------------------------------ // virtual address as calculated by the AGU in the first cycle - logic [ riscv::VLEN-1:0] vaddr_i; - riscv::xlen_t vaddr_xlen; + logic [ CVA6Cfg.VLEN-1:0] vaddr_i; + logic [CVA6Cfg.XLEN-1:0] vaddr_xlen; logic overflow; - logic [(riscv::XLEN/8)-1:0] be_i; + logic [(CVA6Cfg.XLEN/8)-1:0] be_i; assign vaddr_xlen = $unsigned($signed(fu_data_i.imm) + $signed(fu_data_i.operand_a)); - assign vaddr_i = vaddr_xlen[riscv::VLEN-1:0]; - // we work with SV39 or SV32, so if VM is enabled, check that all bits [XLEN-1:38] or [XLEN-1:31] are equal - assign overflow = !((&vaddr_xlen[riscv::XLEN-1:riscv::SV-1]) == 1'b1 || (|vaddr_xlen[riscv::XLEN-1:riscv::SV-1]) == 1'b0); + assign vaddr_i = vaddr_xlen[CVA6Cfg.VLEN-1:0]; + // we work with SV39 or SV32, so if VM is enabled, check that all bits [CVA6Cfg.XLEN-1:38] or [CVA6Cfg.XLEN-1:31] are equal + assign overflow = !((&vaddr_xlen[CVA6Cfg.XLEN-1:CVA6Cfg.SV-1]) == 1'b1 || (|vaddr_xlen[CVA6Cfg.XLEN-1:CVA6Cfg.SV-1]) == 1'b0); logic st_valid_i; logic ld_valid_i; logic ld_translation_req; logic st_translation_req; - logic [riscv::VLEN-1:0] ld_vaddr; - logic [riscv::VLEN-1:0] st_vaddr; + logic [CVA6Cfg.VLEN-1:0] ld_vaddr; + logic [CVA6Cfg.VLEN-1:0] st_vaddr; logic translation_req; logic translation_valid; - logic [riscv::VLEN-1:0] mmu_vaddr; - logic [riscv::PLEN-1:0] mmu_paddr, mmu_vaddr_plen, fetch_vaddr_plen; + logic [CVA6Cfg.VLEN-1:0] mmu_vaddr; + logic [CVA6Cfg.PLEN-1:0] mmu_paddr, mmu_vaddr_plen, fetch_vaddr_plen; exception_t mmu_exception; logic dtlb_hit; - logic [ riscv::PPNW-1:0] dtlb_ppn; + logic [ CVA6Cfg.PPNW-1:0] dtlb_ppn; logic ld_valid; - logic [TRANS_ID_BITS-1:0] ld_trans_id; - riscv::xlen_t ld_result; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] ld_trans_id; + logic [CVA6Cfg.XLEN-1:0] ld_result; logic st_valid; - logic [TRANS_ID_BITS-1:0] st_trans_id; - riscv::xlen_t st_result; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] st_trans_id; + logic [CVA6Cfg.XLEN-1:0] st_result; logic [ 11:0] page_offset; logic page_offset_matches; @@ -140,12 +157,18 @@ module load_store_unit // ------------------- // MMU e.g.: TLBs/PTW // ------------------- - if (MMU_PRESENT && (riscv::XLEN == 64)) begin : gen_mmu_sv39 + if (MMU_PRESENT && (CVA6Cfg.XLEN == 64)) begin : gen_mmu_sv39 mmu #( .CVA6Cfg (CVA6Cfg), + .exception_t(exception_t), + .icache_areq_t(icache_areq_t), + .icache_arsp_t(icache_arsp_t), + .icache_dreq_t(icache_dreq_t), + .icache_drsp_t(icache_drsp_t), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t), .INSTR_TLB_ENTRIES(ariane_pkg::INSTR_TLB_ENTRIES), - .DATA_TLB_ENTRIES (ariane_pkg::DATA_TLB_ENTRIES), - .ASID_WIDTH (ASID_WIDTH) + .DATA_TLB_ENTRIES (ariane_pkg::DATA_TLB_ENTRIES) ) i_cva6_mmu ( // misaligned bypass .misaligned_ex_i(misaligned_exception), @@ -169,12 +192,18 @@ module load_store_unit .pmpaddr_i, .* ); - end else if (MMU_PRESENT && (riscv::XLEN == 32)) begin : gen_mmu_sv32 + end else if (MMU_PRESENT && (CVA6Cfg.XLEN == 32)) begin : gen_mmu_sv32 cva6_mmu_sv32 #( .CVA6Cfg (CVA6Cfg), + .exception_t(exception_t), + .icache_areq_t(icache_areq_t), + .icache_arsp_t(icache_arsp_t), + .icache_dreq_t(icache_dreq_t), + .icache_drsp_t(icache_drsp_t), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t), .INSTR_TLB_ENTRIES(ariane_pkg::INSTR_TLB_ENTRIES), - .DATA_TLB_ENTRIES (ariane_pkg::DATA_TLB_ENTRIES), - .ASID_WIDTH (ASID_WIDTH) + .DATA_TLB_ENTRIES (ariane_pkg::DATA_TLB_ENTRIES) ) i_cva6_mmu ( // misaligned bypass .misaligned_ex_i(misaligned_exception), @@ -200,12 +229,12 @@ module load_store_unit ); end else begin : gen_no_mmu - if (riscv::VLEN > riscv::PLEN) begin - assign mmu_vaddr_plen = mmu_vaddr[riscv::PLEN-1:0]; - assign fetch_vaddr_plen = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0]; + if (CVA6Cfg.VLEN > CVA6Cfg.PLEN) begin + assign mmu_vaddr_plen = mmu_vaddr[CVA6Cfg.PLEN-1:0]; + assign fetch_vaddr_plen = icache_areq_i.fetch_vaddr[CVA6Cfg.PLEN-1:0]; end else begin - assign mmu_vaddr_plen = {{{riscv::PLEN - riscv::VLEN} {1'b0}}, mmu_vaddr}; - assign fetch_vaddr_plen = {{{riscv::PLEN - riscv::VLEN} {1'b0}}, icache_areq_i.fetch_vaddr}; + assign mmu_vaddr_plen = {{{CVA6Cfg.PLEN - CVA6Cfg.VLEN} {1'b0}}, mmu_vaddr}; + assign fetch_vaddr_plen = {{{CVA6Cfg.PLEN - CVA6Cfg.VLEN} {1'b0}}, icache_areq_i.fetch_vaddr}; end assign icache_areq_o.fetch_valid = icache_areq_i.fetch_req; @@ -224,7 +253,7 @@ module load_store_unit assign itlb_miss_o = 1'b0; assign dtlb_miss_o = 1'b0; - assign dtlb_ppn = mmu_vaddr_plen[riscv::PLEN-1:12]; + assign dtlb_ppn = mmu_vaddr_plen[CVA6Cfg.PLEN-1:12]; assign dtlb_hit = 1'b1; always_ff @(posedge clk_i or negedge rst_ni) begin @@ -246,7 +275,11 @@ module load_store_unit // Store Unit // ------------------ store_unit #( - .CVA6Cfg(CVA6Cfg) + .CVA6Cfg(CVA6Cfg), + .exception_t(exception_t), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t), + .lsu_ctrl_t(lsu_ctrl_t) ) i_store_unit ( .clk_i, .rst_ni, @@ -288,7 +321,11 @@ module load_store_unit // Load Unit // ------------------ load_unit #( - .CVA6Cfg(CVA6Cfg) + .CVA6Cfg(CVA6Cfg), + .exception_t(exception_t), + .lsu_ctrl_t(lsu_ctrl_t), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t) ) i_load_unit ( .valid_i (ld_valid_i), .lsu_ctrl_i(lsu_ctrl), @@ -351,7 +388,7 @@ module load_store_unit st_valid_i = 1'b0; translation_req = 1'b0; - mmu_vaddr = {riscv::VLEN{1'b0}}; + mmu_vaddr = {CVA6Cfg.VLEN{1'b0}}; // check the operation to activate the right functional unit accordingly unique case (lsu_ctrl.fu) @@ -380,7 +417,7 @@ module load_store_unit // 12 bit are the same anyway // and we can always generate the byte enable from the address at hand - if (riscv::IS_XLEN64) begin : gen_8b_be + if (CVA6Cfg.IS_XLEN64) begin : gen_8b_be assign be_i = be_gen(vaddr_i[2:0], extract_transfer_size(fu_data_i.operation)); end else begin : gen_4b_be assign be_i = be_gen_32(vaddr_i[1:0], extract_transfer_size(fu_data_i.operation)); @@ -394,7 +431,7 @@ module load_store_unit // can augment the exception if other memory related exceptions like a page fault or access errors always_comb begin : data_misaligned_detection - misaligned_exception = {{riscv::XLEN{1'b0}}, {riscv::XLEN{1'b0}}, 1'b0}; + misaligned_exception = {{CVA6Cfg.XLEN{1'b0}}, {CVA6Cfg.XLEN{1'b0}}, 1'b0}; data_misaligned = 1'b0; @@ -435,12 +472,12 @@ module load_store_unit if (lsu_ctrl.fu == LOAD) begin misaligned_exception = { - riscv::LD_ADDR_MISALIGNED, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1 + riscv::LD_ADDR_MISALIGNED, {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1 }; end else if (lsu_ctrl.fu == STORE) begin misaligned_exception = { - riscv::ST_ADDR_MISALIGNED, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1 + riscv::ST_ADDR_MISALIGNED, {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1 }; end end @@ -449,12 +486,12 @@ module load_store_unit if (lsu_ctrl.fu == LOAD) begin misaligned_exception = { - riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1 + riscv::LD_ACCESS_FAULT, {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1 }; end else if (lsu_ctrl.fu == STORE) begin misaligned_exception = { - riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1 + riscv::ST_ACCESS_FAULT, {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_ctrl.vaddr}, 1'b1 }; end end @@ -478,7 +515,8 @@ module load_store_unit }; lsu_bypass #( - .CVA6Cfg(CVA6Cfg) + .CVA6Cfg(CVA6Cfg), + .lsu_ctrl_t(lsu_ctrl_t) ) lsu_bypass_i ( .lsu_req_i (lsu_req_i), .lsu_req_valid_i(lsu_valid_i), diff --git a/core/load_unit.sv b/core/load_unit.sv index 385aa5d2ef..db4be0f4bd 100644 --- a/core/load_unit.sv +++ b/core/load_unit.sv @@ -21,7 +21,11 @@ module load_unit import ariane_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type exception_t = logic, + parameter type lsu_ctrl_t = logic, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -32,21 +36,21 @@ module load_unit output logic pop_ld_o, // load unit output port output logic valid_o, - output logic [TRANS_ID_BITS-1:0] trans_id_o, - output riscv::xlen_t result_o, + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id_o, + output logic [CVA6Cfg.XLEN-1:0] result_o, output exception_t ex_o, // MMU -> Address Translation output logic translation_req_o, // request address translation - output logic [riscv::VLEN-1:0] vaddr_o, // virtual address out - input logic [riscv::PLEN-1:0] paddr_i, // physical address in + output logic [CVA6Cfg.VLEN-1:0] vaddr_o, // virtual address out + input logic [CVA6Cfg.PLEN-1:0] paddr_i, // physical address in input exception_t ex_i, // exception which may has happened earlier. for example: mis-aligned exception input logic dtlb_hit_i, // hit on the dtlb, send in the same cycle as the request - input logic [riscv::PPNW-1:0] dtlb_ppn_i, // ppn on the dtlb, send in the same cycle as the request + input logic [CVA6Cfg.PPNW-1:0] dtlb_ppn_i, // ppn on the dtlb, send in the same cycle as the request // address checker output logic [11:0] page_offset_o, input logic page_offset_matches_i, input logic store_buffer_empty_i, // the entire store-buffer is empty - input logic [TRANS_ID_BITS-1:0] commit_tran_id_i, + input logic [CVA6Cfg.TRANS_ID_BITS-1:0] commit_tran_id_i, // D$ interface input dcache_req_o_t req_port_i, output dcache_req_i_t req_port_o, @@ -68,8 +72,8 @@ module load_unit // in order to decouple the response interface from the request interface, // we need a a buffer which can hold all inflight memory load requests typedef struct packed { - logic [TRANS_ID_BITS-1:0] trans_id; // scoreboard identifier - logic [riscv::XLEN_ALIGN_BYTES-1:0] address_offset; // least significant bits of the address + logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id; // scoreboard identifier + logic [CVA6Cfg.XLEN_ALIGN_BYTES-1:0] address_offset; // least significant bits of the address fu_op operation; // type of load } ldbuf_t; @@ -166,15 +170,15 @@ module load_unit assign req_port_o.data_wdata = '0; // compose the load buffer write data, control is handled in the FSM assign ldbuf_wdata = { - lsu_ctrl_i.trans_id, lsu_ctrl_i.vaddr[riscv::XLEN_ALIGN_BYTES-1:0], lsu_ctrl_i.operation + lsu_ctrl_i.trans_id, lsu_ctrl_i.vaddr[CVA6Cfg.XLEN_ALIGN_BYTES-1:0], lsu_ctrl_i.operation }; // output address // we can now output the lower 12 bit as the index to the cache - assign req_port_o.address_index = lsu_ctrl_i.vaddr[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; + assign req_port_o.address_index = lsu_ctrl_i.vaddr[CVA6Cfg.DCACHE_INDEX_WIDTH-1:0]; // translation from last cycle, again: control is handled in the FSM - assign req_port_o.address_tag = paddr_i[ariane_pkg::DCACHE_TAG_WIDTH + - ariane_pkg::DCACHE_INDEX_WIDTH-1 : - ariane_pkg::DCACHE_INDEX_WIDTH]; + assign req_port_o.address_tag = paddr_i[CVA6Cfg.DCACHE_TAG_WIDTH + + CVA6Cfg.DCACHE_INDEX_WIDTH-1 : + CVA6Cfg.DCACHE_INDEX_WIDTH]; // request id = index of the load buffer's entry assign req_port_o.data_id = ldbuf_windex; // directly forward exception fields (valid bit is set below) @@ -186,7 +190,7 @@ module load_unit logic not_commit_time; logic inflight_stores; logic stall_ni; - assign paddr_ni = config_pkg::is_inside_nonidempotent_regions(CVA6Cfg, {{52-riscv::PPNW{1'b0}}, dtlb_ppn_i, 12'd0}); + assign paddr_ni = config_pkg::is_inside_nonidempotent_regions(CVA6Cfg, {{52-CVA6Cfg.PPNW{1'b0}}, dtlb_ppn_i, 12'd0}); assign not_commit_time = commit_tran_id_i != lsu_ctrl_i.trans_id; assign inflight_stores = (!dcache_wbuffer_not_ni_i || !store_buffer_empty_i); assign stall_ni = (inflight_stores || not_commit_time) && paddr_ni; @@ -426,7 +430,7 @@ module load_unit // --------------- // Sign Extend // --------------- - riscv::xlen_t shifted_data; + logic [CVA6Cfg.XLEN-1:0] shifted_data; // realign as needed assign shifted_data = req_port_i.data_rdata >> {ldbuf_rdata.address_offset, 3'b000}; @@ -446,19 +450,19 @@ module load_unit end */ // result mux fast - logic [ (riscv::XLEN/8)-1:0] rdata_sign_bits; - logic [riscv::XLEN_ALIGN_BYTES-1:0] rdata_offset; + logic [ (CVA6Cfg.XLEN/8)-1:0] rdata_sign_bits; + logic [CVA6Cfg.XLEN_ALIGN_BYTES-1:0] rdata_offset; logic rdata_sign_bit, rdata_is_signed, rdata_is_fp_signed; // prepare these signals for faster selection in the next cycle assign rdata_is_signed = ldbuf_rdata.operation inside {ariane_pkg::LW, ariane_pkg::LH, ariane_pkg::LB}; assign rdata_is_fp_signed = ldbuf_rdata.operation inside {ariane_pkg::FLW, ariane_pkg::FLH, ariane_pkg::FLB}; - assign rdata_offset = ((ldbuf_rdata.operation inside {ariane_pkg::LW, ariane_pkg::FLW}) & riscv::IS_XLEN64) ? ldbuf_rdata.address_offset + 3 : + assign rdata_offset = ((ldbuf_rdata.operation inside {ariane_pkg::LW, ariane_pkg::FLW}) & CVA6Cfg.IS_XLEN64) ? ldbuf_rdata.address_offset + 3 : ( ldbuf_rdata.operation inside {ariane_pkg::LH, ariane_pkg::FLH}) ? ldbuf_rdata.address_offset + 1 : ldbuf_rdata.address_offset; - for (genvar i = 0; i < (riscv::XLEN / 8); i++) begin : gen_sign_bits + for (genvar i = 0; i < (CVA6Cfg.XLEN / 8); i++) begin : gen_sign_bits assign rdata_sign_bits[i] = req_port_i.data_rdata[(i+1)*8-1]; end @@ -471,28 +475,28 @@ module load_unit always_comb begin unique case (ldbuf_rdata.operation) ariane_pkg::LW, ariane_pkg::LWU: - result_o = {{riscv::XLEN - 32{rdata_sign_bit}}, shifted_data[31:0]}; + result_o = {{CVA6Cfg.XLEN - 32{rdata_sign_bit}}, shifted_data[31:0]}; ariane_pkg::LH, ariane_pkg::LHU: - result_o = {{riscv::XLEN - 32 + 16{rdata_sign_bit}}, shifted_data[15:0]}; + result_o = {{CVA6Cfg.XLEN - 32 + 16{rdata_sign_bit}}, shifted_data[15:0]}; ariane_pkg::LB, ariane_pkg::LBU: - result_o = {{riscv::XLEN - 32 + 24{rdata_sign_bit}}, shifted_data[7:0]}; + result_o = {{CVA6Cfg.XLEN - 32 + 24{rdata_sign_bit}}, shifted_data[7:0]}; ariane_pkg::FLW: begin if (CVA6Cfg.FpPresent) begin - result_o = {{riscv::XLEN - 32{rdata_sign_bit}}, shifted_data[31:0]}; + result_o = {{CVA6Cfg.XLEN - 32{rdata_sign_bit}}, shifted_data[31:0]}; end end ariane_pkg::FLH: begin if (CVA6Cfg.FpPresent) begin - result_o = {{riscv::XLEN - 32 + 16{rdata_sign_bit}}, shifted_data[15:0]}; + result_o = {{CVA6Cfg.XLEN - 32 + 16{rdata_sign_bit}}, shifted_data[15:0]}; end end ariane_pkg::FLB: begin if (CVA6Cfg.FpPresent) begin - result_o = {{riscv::XLEN - 32 + 24{rdata_sign_bit}}, shifted_data[7:0]}; + result_o = {{CVA6Cfg.XLEN - 32 + 24{rdata_sign_bit}}, shifted_data[7:0]}; end end - default: result_o = shifted_data[riscv::XLEN-1:0]; + default: result_o = shifted_data[CVA6Cfg.XLEN-1:0]; endcase end // end result mux fast diff --git a/core/lsu_bypass.sv b/core/lsu_bypass.sv index 96f6d50252..3663b24345 100644 --- a/core/lsu_bypass.sv +++ b/core/lsu_bypass.sv @@ -26,7 +26,8 @@ module lsu_bypass import ariane_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type lsu_ctrl_t = logic ) ( input logic clk_i, input logic rst_ni, diff --git a/core/mmu_sv32/cva6_mmu_sv32.sv b/core/mmu_sv32/cva6_mmu_sv32.sv index 2be172e457..d0768d393c 100644 --- a/core/mmu_sv32/cva6_mmu_sv32.sv +++ b/core/mmu_sv32/cva6_mmu_sv32.sv @@ -30,9 +30,15 @@ module cva6_mmu_sv32 import ariane_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type exception_t = logic, + parameter type icache_areq_t = logic, + parameter type icache_arsp_t = logic, + parameter type icache_dreq_t = logic, + parameter type icache_drsp_t = logic, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic, parameter int unsigned INSTR_TLB_ENTRIES = 2, - parameter int unsigned DATA_TLB_ENTRIES = 2, - parameter int unsigned ASID_WIDTH = 1 + parameter int unsigned DATA_TLB_ENTRIES = 2 ) ( input logic clk_i, input logic rst_ni, @@ -47,15 +53,15 @@ module cva6_mmu_sv32 // in the LSU as we distinguish load and stores, what we do here is simple address translation input exception_t misaligned_ex_i, input logic lsu_req_i, // request address translation - input logic [riscv::VLEN-1:0] lsu_vaddr_i, // virtual address in + input logic [CVA6Cfg.VLEN-1:0] lsu_vaddr_i, // virtual address in input logic lsu_is_store_i, // the translation is requested by a store // if we need to walk the page table we can't grant in the same cycle // Cycle 0 output logic lsu_dtlb_hit_o, // sent in the same cycle as the request if translation hits in the DTLB - output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit) + output logic [CVA6Cfg.PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit) // Cycle 1 output logic lsu_valid_o, // translation is valid - output logic [riscv::PLEN-1:0] lsu_paddr_o, // translated address + output logic [CVA6Cfg.PLEN-1:0] lsu_paddr_o, // translated address output exception_t lsu_exception_o, // address translation threw an exception // General control signals input riscv::priv_lvl_t priv_lvl_i, @@ -63,10 +69,10 @@ module cva6_mmu_sv32 input logic sum_i, input logic mxr_i, // input logic flag_mprv_i, - input logic [riscv::PPNW-1:0] satp_ppn_i, - input logic [ASID_WIDTH-1:0] asid_i, - input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i, - input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + input logic [CVA6Cfg.PPNW-1:0] satp_ppn_i, + input logic [CVA6Cfg.ASID_WIDTH-1:0] asid_i, + input logic [CVA6Cfg.ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [CVA6Cfg.VLEN-1:0] vaddr_to_be_flushed_i, input logic flush_tlb_i, // Performance counters output logic itlb_miss_o, @@ -76,7 +82,7 @@ module cva6_mmu_sv32 output dcache_req_i_t req_port_o, // PMP input riscv::pmpcfg_t [15:0] pmpcfg_i, - input logic [15:0][riscv::PLEN-3:0] pmpaddr_i + input logic [15:0][CVA6Cfg.PLEN-3:0] pmpaddr_i ); logic iaccess_err; // insufficient privilege to access this instruction page @@ -85,9 +91,9 @@ module cva6_mmu_sv32 logic walking_instr; // PTW is walking because of an ITLB miss logic ptw_error; // PTW threw an exception logic ptw_access_exception; // PTW threw an access exception (PMPs) - logic [riscv::PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr + logic [CVA6Cfg.PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr - logic [riscv::VLEN-1:0] update_vaddr; + logic [CVA6Cfg.VLEN-1:0] update_vaddr; tlb_update_sv32_t update_itlb, update_dtlb, update_shared_tlb; logic itlb_lu_access; @@ -101,7 +107,7 @@ module cva6_mmu_sv32 logic dtlb_lu_hit; logic shared_tlb_access; - logic [riscv::VLEN-1:0] shared_tlb_vaddr; + logic [CVA6Cfg.VLEN-1:0] shared_tlb_vaddr; logic shared_tlb_hit; logic itlb_req; @@ -114,8 +120,7 @@ module cva6_mmu_sv32 cva6_tlb_sv32 #( .CVA6Cfg (CVA6Cfg), - .TLB_ENTRIES(INSTR_TLB_ENTRIES), - .ASID_WIDTH (ASID_WIDTH) + .TLB_ENTRIES(INSTR_TLB_ENTRIES) ) i_itlb ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -136,8 +141,7 @@ module cva6_mmu_sv32 cva6_tlb_sv32 #( .CVA6Cfg (CVA6Cfg), - .TLB_ENTRIES(DATA_TLB_ENTRIES), - .ASID_WIDTH (ASID_WIDTH) + .TLB_ENTRIES(DATA_TLB_ENTRIES) ) i_dtlb ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -159,8 +163,7 @@ module cva6_mmu_sv32 cva6_shared_tlb_sv32 #( .CVA6Cfg (CVA6Cfg), .SHARED_TLB_DEPTH(64), - .SHARED_TLB_WAYS (2), - .ASID_WIDTH (ASID_WIDTH) + .SHARED_TLB_WAYS (2) ) i_shared_tlb ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -199,7 +202,8 @@ module cva6_mmu_sv32 cva6_ptw_sv32 #( .CVA6Cfg (CVA6Cfg), - .ASID_WIDTH(ASID_WIDTH) + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t) ) i_ptw ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -273,12 +277,12 @@ module cva6_mmu_sv32 always_comb begin : instr_interface // MMU disabled: just pass through icache_areq_o.fetch_valid = icache_areq_i.fetch_req; - if (riscv::PLEN > riscv::VLEN) + if (CVA6Cfg.PLEN > CVA6Cfg.VLEN) icache_areq_o.fetch_paddr = { - {riscv::PLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr + {CVA6Cfg.PLEN - CVA6Cfg.VLEN{1'b0}}, icache_areq_i.fetch_vaddr }; // play through in case we disabled address translation else - icache_areq_o.fetch_paddr = {2'b00, icache_areq_i.fetch_vaddr[riscv::VLEN-1:0]};// play through in case we disabled address translation + icache_areq_o.fetch_paddr = {2'b00, icache_areq_i.fetch_vaddr[CVA6Cfg.VLEN-1:0]};// play through in case we disabled address translation // two potential exception sources: // 1. HPTW threw an exception -> signal with a page fault exception // 2. We got an access error because of insufficient permissions -> throw an access exception @@ -292,11 +296,11 @@ module cva6_mmu_sv32 // AXI decode error), or when PTW performs walk due to ITLB miss and raises // an error. if (enable_translation_i) begin - // we work with SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal - if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin + // we work with SV32, so if VM is enabled, check that all bits [CVA6Cfg.VLEN-1:CVA6Cfg.SV-1] are equal + if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[CVA6Cfg.VLEN-1:CVA6Cfg.SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[CVA6Cfg.VLEN-1:CVA6Cfg.SV-1]) == 1'b0)) begin icache_areq_o.fetch_exception = { riscv::INSTR_ACCESS_FAULT, - {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, + {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1 }; end @@ -322,7 +326,7 @@ module cva6_mmu_sv32 // throw a page fault icache_areq_o.fetch_exception = { riscv::INSTR_PAGE_FAULT, - {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, + {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1 }; //to check on wave --> not connected end else if (!pmp_instr_allow) begin @@ -339,12 +343,12 @@ module cva6_mmu_sv32 icache_areq_o.fetch_valid = ptw_error | ptw_access_exception; if (ptw_error) icache_areq_o.fetch_exception = { - riscv::INSTR_PAGE_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1 + riscv::INSTR_PAGE_FAULT, {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, update_vaddr}, 1'b1 }; //to check on wave // TODO(moschn,zarubaf): What should the value of tval be in this case? else icache_areq_o.fetch_exception = { - riscv::INSTR_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1 + riscv::INSTR_ACCESS_FAULT, ptw_bad_paddr[CVA6Cfg.PLEN-1:2], 1'b1 }; //to check on wave --> not connected end end @@ -352,20 +356,20 @@ module cva6_mmu_sv32 // or: if we are not translating, check PMPs immediately on the paddr if (!match_any_execute_region || (!enable_translation_i && !pmp_instr_allow)) begin icache_areq_o.fetch_exception = { - riscv::INSTR_ACCESS_FAULT, icache_areq_o.fetch_paddr[riscv::PLEN-1:2], 1'b1 + riscv::INSTR_ACCESS_FAULT, icache_areq_o.fetch_paddr[CVA6Cfg.PLEN-1:2], 1'b1 }; //to check on wave --> not connected end end // check for execute flag on memory assign match_any_execute_region = config_pkg::is_inside_execute_regions( - CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr} + CVA6Cfg, {{64 - CVA6Cfg.PLEN{1'b0}}, icache_areq_o.fetch_paddr} ); // Instruction fetch pmp #( - .PLEN (riscv::PLEN), - .PMP_LEN (riscv::PLEN - 2), + .CVA6Cfg (CVA6Cfg), + .PMP_LEN (CVA6Cfg.PLEN - 2), .NR_ENTRIES(CVA6Cfg.NrPMPEntries) ) i_pmp_if ( .addr_i (icache_areq_o.fetch_paddr), @@ -381,7 +385,7 @@ module cva6_mmu_sv32 //----------------------- // Data Interface //----------------------- - logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q; + logic [CVA6Cfg.VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q; riscv::pte_sv32_t dtlb_pte_n, dtlb_pte_q; exception_t misaligned_ex_n, misaligned_ex_q; logic lsu_req_n, lsu_req_q; @@ -395,7 +399,7 @@ module cva6_mmu_sv32 // Wires to PMP checks riscv::pmp_access_t pmp_access_type; logic pmp_data_allow; - localparam PPNWMin = (riscv::PPNW - 1 > 29) ? 29 : riscv::PPNW - 1; + localparam PPNWMin = (CVA6Cfg.PPNW - 1 > 29) ? 29 : CVA6Cfg.PPNW - 1; // The data interface is simpler and only consists of a request/response interface always_comb begin : data_interface // save request and DTLB response @@ -407,12 +411,12 @@ module cva6_mmu_sv32 lsu_is_store_n = lsu_is_store_i; dtlb_is_4M_n = dtlb_is_4M; - if (riscv::PLEN > riscv::VLEN) begin - lsu_paddr_o = {{riscv::PLEN - riscv::VLEN{1'b0}}, lsu_vaddr_q}; - lsu_dtlb_ppn_o = {{riscv::PLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n[riscv::VLEN-1:12]}; + if (CVA6Cfg.PLEN > CVA6Cfg.VLEN) begin + lsu_paddr_o = {{CVA6Cfg.PLEN - CVA6Cfg.VLEN{1'b0}}, lsu_vaddr_q}; + lsu_dtlb_ppn_o = {{CVA6Cfg.PLEN - CVA6Cfg.VLEN{1'b0}}, lsu_vaddr_n[CVA6Cfg.VLEN-1:12]}; end else begin - lsu_paddr_o = {2'b00, lsu_vaddr_q[riscv::VLEN-1:0]}; - lsu_dtlb_ppn_o = lsu_vaddr_n[riscv::PPNW-1:0]; + lsu_paddr_o = {2'b00, lsu_vaddr_q[CVA6Cfg.VLEN-1:0]}; + lsu_dtlb_ppn_o = lsu_vaddr_n[CVA6Cfg.PPNW-1:0]; end lsu_valid_o = lsu_req_q; lsu_exception_o = misaligned_ex_q; @@ -453,13 +457,13 @@ module cva6_mmu_sv32 if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin lsu_exception_o = { riscv::STORE_PAGE_FAULT, - {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{lsu_vaddr_q[CVA6Cfg.VLEN-1]}}, lsu_vaddr_q}, 1'b1 }; //to check on wave // Check if any PMPs are violated end else if (!pmp_data_allow) begin lsu_exception_o = { - riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1 + riscv::ST_ACCESS_FAULT, lsu_paddr_o[CVA6Cfg.PLEN-1:2], 1'b1 }; //only 32 bits on 34b of lsu_paddr_o are returned. end @@ -469,13 +473,13 @@ module cva6_mmu_sv32 if (daccess_err) begin lsu_exception_o = { riscv::LOAD_PAGE_FAULT, - {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{lsu_vaddr_q[CVA6Cfg.VLEN-1]}}, lsu_vaddr_q}, 1'b1 }; // Check if any PMPs are violated end else if (!pmp_data_allow) begin lsu_exception_o = { - riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1 + riscv::LD_ACCESS_FAULT, lsu_paddr_o[CVA6Cfg.PLEN-1:2], 1'b1 }; //only 32 bits on 34b of lsu_paddr_o are returned. end end @@ -494,13 +498,13 @@ module cva6_mmu_sv32 if (lsu_is_store_q) begin lsu_exception_o = { riscv::STORE_PAGE_FAULT, - {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr}, + {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{lsu_vaddr_q[CVA6Cfg.VLEN-1]}}, update_vaddr}, 1'b1 }; end else begin lsu_exception_o = { riscv::LOAD_PAGE_FAULT, - {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr}, + {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{lsu_vaddr_q[CVA6Cfg.VLEN-1]}}, update_vaddr}, 1'b1 }; end @@ -510,23 +514,23 @@ module cva6_mmu_sv32 // an error makes the translation valid lsu_valid_o = 1'b1; // the page table walker can only throw page faults - lsu_exception_o = {riscv::LD_ACCESS_FAULT, ptw_bad_paddr[riscv::PLEN-1:2], 1'b1}; + lsu_exception_o = {riscv::LD_ACCESS_FAULT, ptw_bad_paddr[CVA6Cfg.PLEN-1:2], 1'b1}; end end end // If translation is not enabled, check the paddr immediately against PMPs else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin if (lsu_is_store_q) begin - lsu_exception_o = {riscv::ST_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1}; + lsu_exception_o = {riscv::ST_ACCESS_FAULT, lsu_paddr_o[CVA6Cfg.PLEN-1:2], 1'b1}; end else begin - lsu_exception_o = {riscv::LD_ACCESS_FAULT, lsu_paddr_o[riscv::PLEN-1:2], 1'b1}; + lsu_exception_o = {riscv::LD_ACCESS_FAULT, lsu_paddr_o[CVA6Cfg.PLEN-1:2], 1'b1}; end end end // Load/store PMP check pmp #( - .PLEN (riscv::PLEN), - .PMP_LEN (riscv::PLEN - 2), + .CVA6Cfg (CVA6Cfg), + .PMP_LEN (CVA6Cfg.PLEN - 2), .NR_ENTRIES(CVA6Cfg.NrPMPEntries) ) i_pmp_data ( .addr_i (lsu_paddr_o), diff --git a/core/mmu_sv32/cva6_ptw_sv32.sv b/core/mmu_sv32/cva6_ptw_sv32.sv index 4bd736bd30..ddcf4e587d 100644 --- a/core/mmu_sv32/cva6_ptw_sv32.sv +++ b/core/mmu_sv32/cva6_ptw_sv32.sv @@ -30,7 +30,8 @@ module cva6_ptw_sv32 import ariane_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, - parameter int ASID_WIDTH = 1 + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -50,19 +51,19 @@ module cva6_ptw_sv32 // to Shared TLB, update logic output tlb_update_sv32_t shared_tlb_update_o, - output logic [riscv::VLEN-1:0] update_vaddr_o, + output logic [CVA6Cfg.VLEN-1:0] update_vaddr_o, - input logic [ASID_WIDTH-1:0] asid_i, + input logic [CVA6Cfg.ASID_WIDTH-1:0] asid_i, // from shared TLB input logic shared_tlb_access_i, input logic shared_tlb_hit_i, - input logic [riscv::VLEN-1:0] shared_tlb_vaddr_i, + input logic [CVA6Cfg.VLEN-1:0] shared_tlb_vaddr_i, input logic itlb_req_i, // from CSR file - input logic [riscv::PPNW-1:0] satp_ppn_i, // ppn from satp + input logic [CVA6Cfg.PPNW-1:0] satp_ppn_i, // ppn from satp input logic mxr_i, // Performance counters @@ -70,14 +71,14 @@ module cva6_ptw_sv32 // PMP input riscv::pmpcfg_t [15:0] pmpcfg_i, - input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, - output logic [riscv::PLEN-1:0] bad_paddr_o + input logic [15:0][CVA6Cfg.PLEN-3:0] pmpaddr_i, + output logic [CVA6Cfg.PLEN-1:0] bad_paddr_o ); // input registers logic data_rvalid_q; - riscv::xlen_t data_rdata_q; + logic [CVA6Cfg.XLEN-1:0] data_rdata_q; riscv::pte_sv32_t pte; assign pte = riscv::pte_sv32_t'(data_rdata_q); @@ -107,11 +108,11 @@ module cva6_ptw_sv32 // latched tag signal logic tag_valid_n, tag_valid_q; // register the ASID - logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n; + logic [CVA6Cfg.ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n; // register the VPN we need to walk, SV32 defines a 32 bit virtual address - logic [riscv::VLEN-1:0] vaddr_q, vaddr_n; + logic [CVA6Cfg.VLEN-1:0] vaddr_q, vaddr_n; // 4 byte aligned physical pointer - logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n; + logic [CVA6Cfg.PLEN-1:0] ptw_pptr_q, ptw_pptr_n; // Assignments assign update_vaddr_o = vaddr_q; @@ -120,8 +121,8 @@ module cva6_ptw_sv32 //assign walking_instr_o = is_instr_ptw_q; assign walking_instr_o = is_instr_ptw_q; // directly output the correct physical address - assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0]; - assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH]; + assign req_port_o.address_index = ptw_pptr_q[CVA6Cfg.DCACHE_INDEX_WIDTH-1:0]; + assign req_port_o.address_tag = ptw_pptr_q[CVA6Cfg.DCACHE_INDEX_WIDTH+CVA6Cfg.DCACHE_TAG_WIDTH-1:CVA6Cfg.DCACHE_INDEX_WIDTH]; // we are never going to kill this request assign req_port_o.kill_req = '0; // we are never going to write with the HPTW @@ -132,7 +133,7 @@ module cva6_ptw_sv32 // ----------- // Shared TLB Update // ----------- - assign shared_tlb_update_o.vpn = vaddr_q[riscv::SV-1:12]; + assign shared_tlb_update_o.vpn = vaddr_q[CVA6Cfg.SV-1:12]; // update the correct page table level assign shared_tlb_update_o.is_4M = (ptw_lvl_q == LVL1); // output the correct ASID @@ -149,8 +150,7 @@ module cva6_ptw_sv32 pmp #( .CVA6Cfg (CVA6Cfg), - .PLEN (riscv::PLEN), - .PMP_LEN (riscv::PLEN - 2), + .PMP_LEN (CVA6Cfg.PLEN - 2), .NR_ENTRIES(CVA6Cfg.NrPMPEntries) ) i_pmp_ptw ( .addr_i (ptw_pptr_q), @@ -221,7 +221,7 @@ module cva6_ptw_sv32 // if we got a Shared TLB miss if (shared_tlb_access_i & ~shared_tlb_hit_i) begin ptw_pptr_n = { - satp_ppn_i, shared_tlb_vaddr_i[riscv::SV-1:22], 2'b0 + satp_ppn_i, shared_tlb_vaddr_i[CVA6Cfg.SV-1:22], 2'b0 }; // SATP.PPN * PAGESIZE + VPN*PTESIZE = SATP.PPN * 2^(12) + VPN*4 is_instr_ptw_n = itlb_req_i; tlb_update_asid_n = asid_i; diff --git a/core/mmu_sv32/cva6_shared_tlb_sv32.sv b/core/mmu_sv32/cva6_shared_tlb_sv32.sv index 98e2a044a9..83a0f24efc 100644 --- a/core/mmu_sv32/cva6_shared_tlb_sv32.sv +++ b/core/mmu_sv32/cva6_shared_tlb_sv32.sv @@ -22,8 +22,7 @@ module cva6_shared_tlb_sv32 #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, parameter int SHARED_TLB_DEPTH = 64, - parameter int SHARED_TLB_WAYS = 2, - parameter int ASID_WIDTH = 1 + parameter int SHARED_TLB_WAYS = 2 ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -32,17 +31,17 @@ module cva6_shared_tlb_sv32 input logic enable_translation_i, // CSRs indicate to enable SV32 input logic en_ld_st_translation_i, // enable virtual memory translation for load/stores - input logic [ASID_WIDTH-1:0] asid_i, + input logic [CVA6Cfg.ASID_WIDTH-1:0] asid_i, // from TLBs // did we miss? input logic itlb_access_i, input logic itlb_hit_i, - input logic [riscv::VLEN-1:0] itlb_vaddr_i, + input logic [CVA6Cfg.VLEN-1:0] itlb_vaddr_i, input logic dtlb_access_i, input logic dtlb_hit_i, - input logic [riscv::VLEN-1:0] dtlb_vaddr_i, + input logic [CVA6Cfg.VLEN-1:0] dtlb_vaddr_i, // to TLBs, update logic output tlb_update_sv32_t itlb_update_o, @@ -54,7 +53,7 @@ module cva6_shared_tlb_sv32 output logic shared_tlb_access_o, output logic shared_tlb_hit_o, - output logic [riscv::VLEN-1:0] shared_tlb_vaddr_o, + output logic [CVA6Cfg.VLEN-1:0] shared_tlb_vaddr_o, output logic itlb_req_o, @@ -113,14 +112,14 @@ module cva6_shared_tlb_sv32 riscv::pte_sv32_t [SHARED_TLB_WAYS-1:0] pte; - logic [riscv::VLEN-1-12:0] itlb_vpn_q; - logic [riscv::VLEN-1-12:0] dtlb_vpn_q; + logic [CVA6Cfg.VLEN-1-12:0] itlb_vpn_q; + logic [CVA6Cfg.VLEN-1-12:0] dtlb_vpn_q; - logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_d; + logic [CVA6Cfg.ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_d; logic shared_tlb_access_q, shared_tlb_access_d; logic shared_tlb_hit_d; - logic [riscv::VLEN-1:0] shared_tlb_vaddr_q, shared_tlb_vaddr_d; + logic [CVA6Cfg.VLEN-1:0] shared_tlb_vaddr_q, shared_tlb_vaddr_d; logic itlb_req_d, itlb_req_q; logic dtlb_req_d, dtlb_req_q; @@ -243,8 +242,8 @@ module cva6_shared_tlb_sv32 dtlb_req_q <= '0; shared_tag_valid <= '0; end else begin - itlb_vpn_q <= itlb_vaddr_i[riscv::SV-1:12]; - dtlb_vpn_q <= dtlb_vaddr_i[riscv::SV-1:12]; + itlb_vpn_q <= itlb_vaddr_i[CVA6Cfg.SV-1:12]; + dtlb_vpn_q <= dtlb_vaddr_i[CVA6Cfg.SV-1:12]; tlb_update_asid_q <= tlb_update_asid_d; shared_tlb_access_q <= shared_tlb_access_d; shared_tlb_vaddr_q <= shared_tlb_vaddr_d; diff --git a/core/mmu_sv32/cva6_tlb_sv32.sv b/core/mmu_sv32/cva6_tlb_sv32.sv index 79a7c98dc5..61d4bbb735 100644 --- a/core/mmu_sv32/cva6_tlb_sv32.sv +++ b/core/mmu_sv32/cva6_tlb_sv32.sv @@ -28,8 +28,7 @@ module cva6_tlb_sv32 import ariane_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, - parameter int unsigned TLB_ENTRIES = 4, - parameter int unsigned ASID_WIDTH = 1 + parameter int unsigned TLB_ENTRIES = 4 ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -38,11 +37,11 @@ module cva6_tlb_sv32 input tlb_update_sv32_t update_i, // Lookup signals input logic lu_access_i, - input logic [ASID_WIDTH-1:0] lu_asid_i, - input logic [riscv::VLEN-1:0] lu_vaddr_i, + input logic [CVA6Cfg.ASID_WIDTH-1:0] lu_asid_i, + input logic [CVA6Cfg.VLEN-1:0] lu_vaddr_i, output riscv::pte_sv32_t lu_content_o, - input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i, - input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + input logic [CVA6Cfg.ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [CVA6Cfg.VLEN-1:0] vaddr_to_be_flushed_i, output logic lu_is_4M_o, output logic lu_hit_o ); @@ -78,7 +77,7 @@ module cva6_tlb_sv32 for (int unsigned i = 0; i < TLB_ENTRIES; i++) begin // first level match, this may be a mega page, check the ASID flags as well // if the entry is associated to a global address, don't match the ASID (ASID is don't care) - if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid[ASID_WIDTH-1:0]) || content_q[i].g) && vpn1 == tags_q[i].vpn1) begin + if (tags_q[i].valid && ((lu_asid_i == tags_q[i].asid[CVA6Cfg.ASID_WIDTH-1:0]) || content_q[i].g) && vpn1 == tags_q[i].vpn1) begin if (tags_q[i].is_4M || vpn0 == tags_q[i].vpn0) begin lu_is_4M_o = tags_q[i].is_4M; lu_content_o = content_q[i]; @@ -118,10 +117,10 @@ module cva6_tlb_sv32 else if (asid_to_be_flushed_is0 && ( (vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M) ) && (~vaddr_to_be_flushed_is0)) tags_n[i].valid = 1'b0; // the entry is flushed if it's not global and asid and vaddr both matches with the entry to be flushed ("SFENCE.VMA vaddr asid" case) - else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M)) && (asid_to_be_flushed_i == tags_q[i].asid[ASID_WIDTH-1:0]) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0)) + else if ((!content_q[i].g) && ((vaddr_vpn0_match[i] && vaddr_vpn1_match[i]) || (vaddr_vpn1_match[i] && tags_q[i].is_4M)) && (asid_to_be_flushed_i == tags_q[i].asid[CVA6Cfg.ASID_WIDTH-1:0]) && (!vaddr_to_be_flushed_is0) && (!asid_to_be_flushed_is0)) tags_n[i].valid = 1'b0; // the entry is flushed if it's not global, and the asid matches and vaddr is 0. ("SFENCE.VMA 0 asid" case) - else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid[ASID_WIDTH-1:0]) && (!asid_to_be_flushed_is0)) + else if ((!content_q[i].g) && (vaddr_to_be_flushed_is0) && (asid_to_be_flushed_i == tags_q[i].asid[CVA6Cfg.ASID_WIDTH-1:0]) && (!asid_to_be_flushed_is0)) tags_n[i].valid = 1'b0; // normal replacement end else if (update_i.valid & replace_en[i]) begin @@ -248,7 +247,7 @@ module cva6_tlb_sv32 $error("TLB size must be a multiple of 2 and greater than 1"); $stop(); end - assert (ASID_WIDTH >= 1) + assert (CVA6Cfg.ASID_WIDTH >= 1) else begin $error("ASID width must be at least 1"); $stop(); diff --git a/core/mmu_sv39/mmu.sv b/core/mmu_sv39/mmu.sv index 39e9f34303..be8ba76858 100644 --- a/core/mmu_sv39/mmu.sv +++ b/core/mmu_sv39/mmu.sv @@ -19,9 +19,24 @@ module mmu import ariane_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type exception_t = logic, + parameter type icache_areq_t = logic, + parameter type icache_arsp_t = logic, + parameter type icache_dreq_t = logic, + parameter type icache_drsp_t = logic, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic, parameter int unsigned INSTR_TLB_ENTRIES = 4, parameter int unsigned DATA_TLB_ENTRIES = 4, - parameter int unsigned ASID_WIDTH = 1 + + parameter type tlb_update_t = struct packed { + logic valid; // valid flag + logic is_2M; // + logic is_1G; // + logic [27-1:0] vpn; // VPN (39bits) = 27bits + 12bits offset + logic [CVA6Cfg.ASID_WIDTH-1:0] asid; + riscv::pte_t content; + } ) ( input logic clk_i, input logic rst_ni, @@ -36,15 +51,15 @@ module mmu // in the LSU as we distinguish load and stores, what we do here is simple address translation input exception_t misaligned_ex_i, input logic lsu_req_i, // request address translation - input logic [riscv::VLEN-1:0] lsu_vaddr_i, // virtual address in + input logic [CVA6Cfg.VLEN-1:0] lsu_vaddr_i, // virtual address in input logic lsu_is_store_i, // the translation is requested by a store // if we need to walk the page table we can't grant in the same cycle // Cycle 0 output logic lsu_dtlb_hit_o, // sent in the same cycle as the request if translation hits in the DTLB - output logic [riscv::PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit) + output logic [CVA6Cfg.PPNW-1:0] lsu_dtlb_ppn_o, // ppn (send same cycle as hit) // Cycle 1 output logic lsu_valid_o, // translation is valid - output logic [riscv::PLEN-1:0] lsu_paddr_o, // translated address + output logic [CVA6Cfg.PLEN-1:0] lsu_paddr_o, // translated address output exception_t lsu_exception_o, // address translation threw an exception // General control signals input riscv::priv_lvl_t priv_lvl_i, @@ -52,10 +67,10 @@ module mmu input logic sum_i, input logic mxr_i, // input logic flag_mprv_i, - input logic [riscv::PPNW-1:0] satp_ppn_i, - input logic [ASID_WIDTH-1:0] asid_i, - input logic [ASID_WIDTH-1:0] asid_to_be_flushed_i, - input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + input logic [CVA6Cfg.PPNW-1:0] satp_ppn_i, + input logic [CVA6Cfg.ASID_WIDTH-1:0] asid_i, + input logic [CVA6Cfg.ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [CVA6Cfg.VLEN-1:0] vaddr_to_be_flushed_i, input logic flush_tlb_i, // Performance counters output logic itlb_miss_o, @@ -65,7 +80,7 @@ module mmu output dcache_req_i_t req_port_o, // PMP input riscv::pmpcfg_t [15:0] pmpcfg_i, - input logic [15:0][riscv::PLEN-3:0] pmpaddr_i + input logic [15:0][CVA6Cfg.PLEN-3:0] pmpaddr_i ); logic iaccess_err; // insufficient privilege to access this instruction page @@ -74,9 +89,9 @@ module mmu logic walking_instr; // PTW is walking because of an ITLB miss logic ptw_error; // PTW threw an exception logic ptw_access_exception; // PTW threw an access exception (PMPs) - logic [riscv::PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr + logic [CVA6Cfg.PLEN-1:0] ptw_bad_paddr; // PTW PMP exception bad physical addr - logic [riscv::VLEN-1:0] update_vaddr; + logic [CVA6Cfg.VLEN-1:0] update_vaddr; tlb_update_t update_ptw_itlb, update_ptw_dtlb; logic itlb_lu_access; @@ -99,8 +114,8 @@ module mmu tlb #( .CVA6Cfg (CVA6Cfg), - .TLB_ENTRIES(INSTR_TLB_ENTRIES), - .ASID_WIDTH (ASID_WIDTH) + .tlb_update_t(tlb_update_t), + .TLB_ENTRIES(INSTR_TLB_ENTRIES) ) i_itlb ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -122,8 +137,8 @@ module mmu tlb #( .CVA6Cfg (CVA6Cfg), - .TLB_ENTRIES(DATA_TLB_ENTRIES), - .ASID_WIDTH (ASID_WIDTH) + .tlb_update_t(tlb_update_t), + .TLB_ENTRIES(DATA_TLB_ENTRIES) ) i_dtlb ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -146,7 +161,9 @@ module mmu ptw #( .CVA6Cfg (CVA6Cfg), - .ASID_WIDTH(ASID_WIDTH) + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t), + .tlb_update_t(tlb_update_t) ) i_ptw ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -205,7 +222,7 @@ module mmu always_comb begin : instr_interface // MMU disabled: just pass through icache_areq_o.fetch_valid = icache_areq_i.fetch_req; - icache_areq_o.fetch_paddr = icache_areq_i.fetch_vaddr[riscv::PLEN-1:0]; // play through in case we disabled address translation + icache_areq_o.fetch_paddr = icache_areq_i.fetch_vaddr[CVA6Cfg.PLEN-1:0]; // play through in case we disabled address translation // two potential exception sources: // 1. HPTW threw an exception -> signal with a page fault exception // 2. We got an access error because of insufficient permissions -> throw an access exception @@ -220,11 +237,11 @@ module mmu // AXI decode error), or when PTW performs walk due to ITLB miss and raises // an error. if (enable_translation_i) begin - // we work with SV39 or SV32, so if VM is enabled, check that all bits [riscv::VLEN-1:riscv::SV-1] are equal - if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[riscv::VLEN-1:riscv::SV-1]) == 1'b0)) begin + // we work with SV39 or SV32, so if VM is enabled, check that all bits [CVA6Cfg.VLEN-1:CVA6Cfg.SV-1] are equal + if (icache_areq_i.fetch_req && !((&icache_areq_i.fetch_vaddr[CVA6Cfg.VLEN-1:CVA6Cfg.SV-1]) == 1'b1 || (|icache_areq_i.fetch_vaddr[CVA6Cfg.VLEN-1:CVA6Cfg.SV-1]) == 1'b0)) begin icache_areq_o.fetch_exception = { riscv::INSTR_ACCESS_FAULT, - {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, + {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1 }; end @@ -253,13 +270,13 @@ module mmu // throw a page fault icache_areq_o.fetch_exception = { riscv::INSTR_PAGE_FAULT, - {{riscv::XLEN - riscv::VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, + {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1 }; end else if (!pmp_instr_allow) begin icache_areq_o.fetch_exception = { riscv::INSTR_ACCESS_FAULT, - {{riscv::XLEN - riscv::PLEN{1'b0}}, icache_areq_i.fetch_vaddr}, + {{CVA6Cfg.XLEN - CVA6Cfg.PLEN{1'b0}}, icache_areq_i.fetch_vaddr}, 1'b1 }; end @@ -272,11 +289,11 @@ module mmu icache_areq_o.fetch_valid = ptw_error | ptw_access_exception; if (ptw_error) icache_areq_o.fetch_exception = { - riscv::INSTR_PAGE_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1 + riscv::INSTR_PAGE_FAULT, {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, update_vaddr}, 1'b1 }; else icache_areq_o.fetch_exception = { - riscv::INSTR_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, update_vaddr}, 1'b1 + riscv::INSTR_ACCESS_FAULT, {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, update_vaddr}, 1'b1 }; end end @@ -285,7 +302,7 @@ module mmu if ((!match_any_execute_region && !ptw_error) || (!enable_translation_i && !pmp_instr_allow)) begin icache_areq_o.fetch_exception = { riscv::INSTR_ACCESS_FAULT, - {{riscv::XLEN - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr}, + {{CVA6Cfg.XLEN - CVA6Cfg.PLEN{1'b0}}, icache_areq_o.fetch_paddr}, 1'b1 }; end @@ -293,14 +310,13 @@ module mmu // check for execute flag on memory assign match_any_execute_region = config_pkg::is_inside_execute_regions( - CVA6Cfg, {{64 - riscv::PLEN{1'b0}}, icache_areq_o.fetch_paddr} + CVA6Cfg, {{64 - CVA6Cfg.PLEN{1'b0}}, icache_areq_o.fetch_paddr} ); // Instruction fetch pmp #( .CVA6Cfg (CVA6Cfg), - .PLEN (riscv::PLEN), - .PMP_LEN (riscv::PLEN - 2), + .PMP_LEN (CVA6Cfg.PLEN - 2), .NR_ENTRIES(CVA6Cfg.NrPMPEntries) ) i_pmp_if ( .addr_i (icache_areq_o.fetch_paddr), @@ -316,7 +332,7 @@ module mmu //----------------------- // Data Interface //----------------------- - logic [riscv::VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q; + logic [CVA6Cfg.VLEN-1:0] lsu_vaddr_n, lsu_vaddr_q; riscv::pte_t dtlb_pte_n, dtlb_pte_q; exception_t misaligned_ex_n, misaligned_ex_q; logic lsu_req_n, lsu_req_q; @@ -331,7 +347,7 @@ module mmu // Wires to PMP checks riscv::pmp_access_t pmp_access_type; logic pmp_data_allow; - localparam PPNWMin = (riscv::PPNW - 1 > 29) ? 29 : riscv::PPNW - 1; + localparam PPNWMin = (CVA6Cfg.PPNW - 1 > 29) ? 29 : CVA6Cfg.PPNW - 1; // The data interface is simpler and only consists of a request/response interface always_comb begin : data_interface // save request and DTLB response @@ -344,8 +360,8 @@ module mmu dtlb_is_2M_n = dtlb_is_2M; dtlb_is_1G_n = dtlb_is_1G; - lsu_paddr_o = lsu_vaddr_q[riscv::PLEN-1:0]; - lsu_dtlb_ppn_o = lsu_vaddr_n[riscv::PLEN-1:12]; + lsu_paddr_o = lsu_vaddr_q[CVA6Cfg.PLEN-1:0]; + lsu_dtlb_ppn_o = lsu_vaddr_n[CVA6Cfg.PLEN-1:12]; lsu_valid_o = lsu_req_q; lsu_exception_o = misaligned_ex_q; pmp_access_type = lsu_is_store_q ? riscv::ACCESS_WRITE : riscv::ACCESS_READ; @@ -390,14 +406,14 @@ module mmu if (!dtlb_pte_q.w || daccess_err || !dtlb_pte_q.d) begin lsu_exception_o = { riscv::STORE_PAGE_FAULT, - {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{lsu_vaddr_q[CVA6Cfg.VLEN-1]}}, lsu_vaddr_q}, 1'b1 }; // Check if any PMPs are violated end else if (!pmp_data_allow) begin lsu_exception_o = { riscv::ST_ACCESS_FAULT, - {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{lsu_vaddr_q[CVA6Cfg.VLEN-1]}}, lsu_vaddr_q}, 1'b1 }; end @@ -408,14 +424,14 @@ module mmu if (daccess_err) begin lsu_exception_o = { riscv::LOAD_PAGE_FAULT, - {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{lsu_vaddr_q[CVA6Cfg.VLEN-1]}}, lsu_vaddr_q}, 1'b1 }; // Check if any PMPs are violated end else if (!pmp_data_allow) begin lsu_exception_o = { riscv::LD_ACCESS_FAULT, - {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, lsu_vaddr_q}, + {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{lsu_vaddr_q[CVA6Cfg.VLEN-1]}}, lsu_vaddr_q}, 1'b1 }; end @@ -435,13 +451,13 @@ module mmu if (lsu_is_store_q) begin lsu_exception_o = { riscv::STORE_PAGE_FAULT, - {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr}, + {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{lsu_vaddr_q[CVA6Cfg.VLEN-1]}}, update_vaddr}, 1'b1 }; end else begin lsu_exception_o = { riscv::LOAD_PAGE_FAULT, - {{riscv::XLEN - riscv::VLEN{lsu_vaddr_q[riscv::VLEN-1]}}, update_vaddr}, + {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{lsu_vaddr_q[CVA6Cfg.VLEN-1]}}, update_vaddr}, 1'b1 }; end @@ -453,11 +469,11 @@ module mmu // Any fault of the page table walk should be based of the original access type if (lsu_is_store_q) begin lsu_exception_o = { - riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n}, 1'b1 + riscv::ST_ACCESS_FAULT, {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_vaddr_n}, 1'b1 }; end else begin lsu_exception_o = { - riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::VLEN{1'b0}}, lsu_vaddr_n}, 1'b1 + riscv::LD_ACCESS_FAULT, {{CVA6Cfg.XLEN - CVA6Cfg.VLEN{1'b0}}, lsu_vaddr_n}, 1'b1 }; end end @@ -466,11 +482,11 @@ module mmu else if (lsu_req_q && !misaligned_ex_q.valid && !pmp_data_allow) begin if (lsu_is_store_q) begin lsu_exception_o = { - riscv::ST_ACCESS_FAULT, {{riscv::XLEN - riscv::PLEN{1'b0}}, lsu_paddr_o}, 1'b1 + riscv::ST_ACCESS_FAULT, {{CVA6Cfg.XLEN - CVA6Cfg.PLEN{1'b0}}, lsu_paddr_o}, 1'b1 }; end else begin lsu_exception_o = { - riscv::LD_ACCESS_FAULT, {{riscv::XLEN - riscv::PLEN{1'b0}}, lsu_paddr_o}, 1'b1 + riscv::LD_ACCESS_FAULT, {{CVA6Cfg.XLEN - CVA6Cfg.PLEN{1'b0}}, lsu_paddr_o}, 1'b1 }; end end @@ -479,8 +495,7 @@ module mmu // Load/store PMP check pmp #( .CVA6Cfg (CVA6Cfg), - .PLEN (riscv::PLEN), - .PMP_LEN (riscv::PLEN - 2), + .PMP_LEN (CVA6Cfg.PLEN - 2), .NR_ENTRIES(CVA6Cfg.NrPMPEntries) ) i_pmp_data ( .addr_i (lsu_paddr_o), diff --git a/core/mmu_sv39/ptw.sv b/core/mmu_sv39/ptw.sv index 2d0e3780ac..80d6c065bf 100644 --- a/core/mmu_sv39/ptw.sv +++ b/core/mmu_sv39/ptw.sv @@ -19,7 +19,9 @@ module ptw import ariane_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, - parameter int ASID_WIDTH = 1 + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic, + parameter type tlb_update_t = logic ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -43,20 +45,20 @@ module ptw output tlb_update_t itlb_update_o, output tlb_update_t dtlb_update_o, - output logic [riscv::VLEN-1:0] update_vaddr_o, + output logic [CVA6Cfg.VLEN-1:0] update_vaddr_o, - input logic [ ASID_WIDTH-1:0] asid_i, + input logic [ CVA6Cfg.ASID_WIDTH-1:0] asid_i, // from TLBs // did we miss? input logic itlb_access_i, input logic itlb_hit_i, - input logic [riscv::VLEN-1:0] itlb_vaddr_i, + input logic [CVA6Cfg.VLEN-1:0] itlb_vaddr_i, input logic dtlb_access_i, input logic dtlb_hit_i, - input logic [riscv::VLEN-1:0] dtlb_vaddr_i, + input logic [CVA6Cfg.VLEN-1:0] dtlb_vaddr_i, // from CSR file - input logic [riscv::PPNW-1:0] satp_ppn_i, // ppn from satp + input logic [CVA6Cfg.PPNW-1:0] satp_ppn_i, // ppn from satp input logic mxr_i, // Performance counters output logic itlb_miss_o, @@ -64,8 +66,8 @@ module ptw // PMP input riscv::pmpcfg_t [15:0] pmpcfg_i, - input logic [15:0][riscv::PLEN-3:0] pmpaddr_i, - output logic [riscv::PLEN-1:0] bad_paddr_o + input logic [15:0][CVA6Cfg.PLEN-3:0] pmpaddr_i, + output logic [CVA6Cfg.PLEN-1:0] bad_paddr_o ); @@ -100,11 +102,11 @@ module ptw // latched tag signal logic tag_valid_n, tag_valid_q; // register the ASID - logic [ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n; + logic [CVA6Cfg.ASID_WIDTH-1:0] tlb_update_asid_q, tlb_update_asid_n; // register the VPN we need to walk, SV39 defines a 39 bit virtual address - logic [riscv::VLEN-1:0] vaddr_q, vaddr_n; + logic [CVA6Cfg.VLEN-1:0] vaddr_q, vaddr_n; // 4 byte aligned physical pointer - logic [riscv::PLEN-1:0] ptw_pptr_q, ptw_pptr_n; + logic [CVA6Cfg.PLEN-1:0] ptw_pptr_q, ptw_pptr_n; // Assignments assign update_vaddr_o = vaddr_q; @@ -112,8 +114,8 @@ module ptw assign ptw_active_o = (state_q != IDLE); assign walking_instr_o = is_instr_ptw_q; // directly output the correct physical address - assign req_port_o.address_index = ptw_pptr_q[DCACHE_INDEX_WIDTH-1:0]; - assign req_port_o.address_tag = ptw_pptr_q[DCACHE_INDEX_WIDTH+DCACHE_TAG_WIDTH-1:DCACHE_INDEX_WIDTH]; + assign req_port_o.address_index = ptw_pptr_q[CVA6Cfg.DCACHE_INDEX_WIDTH-1:0]; + assign req_port_o.address_tag = ptw_pptr_q[CVA6Cfg.DCACHE_INDEX_WIDTH+CVA6Cfg.DCACHE_TAG_WIDTH-1:CVA6Cfg.DCACHE_INDEX_WIDTH]; // we are never going to kill this request assign req_port_o.kill_req = '0; // we are never going to write with the HPTW @@ -123,8 +125,8 @@ module ptw // ----------- // TLB Update // ----------- - assign itlb_update_o.vpn = {{39 - riscv::SV{1'b0}}, vaddr_q[riscv::SV-1:12]}; - assign dtlb_update_o.vpn = {{39 - riscv::SV{1'b0}}, vaddr_q[riscv::SV-1:12]}; + assign itlb_update_o.vpn = {{39 - CVA6Cfg.SV{1'b0}}, vaddr_q[CVA6Cfg.SV-1:12]}; + assign dtlb_update_o.vpn = {{39 - CVA6Cfg.SV{1'b0}}, vaddr_q[CVA6Cfg.SV-1:12]}; // update the correct page table level assign itlb_update_o.is_2M = (ptw_lvl_q == LVL2); assign itlb_update_o.is_1G = (ptw_lvl_q == LVL1); @@ -145,8 +147,7 @@ module ptw pmp #( .CVA6Cfg (CVA6Cfg), - .PLEN (riscv::PLEN), - .PMP_LEN (riscv::PLEN - 2), + .PMP_LEN (CVA6Cfg.PLEN - 2), .NR_ENTRIES(CVA6Cfg.NrPMPEntries) ) i_pmp_ptw ( .addr_i (ptw_pptr_q), @@ -216,7 +217,7 @@ module ptw is_instr_ptw_n = 1'b0; // if we got an ITLB miss if (enable_translation_i & itlb_access_i & ~itlb_hit_i & ~dtlb_access_i) begin - ptw_pptr_n = {satp_ppn_i, itlb_vaddr_i[riscv::SV-1:30], 3'b0}; + ptw_pptr_n = {satp_ppn_i, itlb_vaddr_i[CVA6Cfg.SV-1:30], 3'b0}; is_instr_ptw_n = 1'b1; tlb_update_asid_n = asid_i; vaddr_n = itlb_vaddr_i; @@ -224,7 +225,7 @@ module ptw itlb_miss_o = 1'b1; // we got an DTLB miss end else if (en_ld_st_translation_i & dtlb_access_i & ~dtlb_hit_i) begin - ptw_pptr_n = {satp_ppn_i, dtlb_vaddr_i[riscv::SV-1:30], 3'b0}; + ptw_pptr_n = {satp_ppn_i, dtlb_vaddr_i[CVA6Cfg.SV-1:30], 3'b0}; tlb_update_asid_n = asid_i; vaddr_n = dtlb_vaddr_i; state_d = WAIT_GRANT; diff --git a/core/mmu_sv39/tlb.sv b/core/mmu_sv39/tlb.sv index 3df2cb0173..e2ffa9fee8 100644 --- a/core/mmu_sv39/tlb.sv +++ b/core/mmu_sv39/tlb.sv @@ -19,8 +19,8 @@ module tlb import ariane_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, - parameter int unsigned TLB_ENTRIES = 4, - parameter int unsigned ASID_WIDTH = 1 + parameter type tlb_update_t = logic, + parameter int unsigned TLB_ENTRIES = 4 ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -29,11 +29,11 @@ module tlb input tlb_update_t update_i, // Lookup signals input logic lu_access_i, - input logic [ ASID_WIDTH-1:0] lu_asid_i, - input logic [riscv::VLEN-1:0] lu_vaddr_i, + input logic [ CVA6Cfg.ASID_WIDTH-1:0] lu_asid_i, + input logic [CVA6Cfg.VLEN-1:0] lu_vaddr_i, output riscv::pte_t lu_content_o, - input logic [ ASID_WIDTH-1:0] asid_to_be_flushed_i, - input logic [riscv::VLEN-1:0] vaddr_to_be_flushed_i, + input logic [ CVA6Cfg.ASID_WIDTH-1:0] asid_to_be_flushed_i, + input logic [CVA6Cfg.VLEN-1:0] vaddr_to_be_flushed_i, output logic lu_is_2M_o, output logic lu_is_1G_o, output logic lu_hit_o @@ -41,8 +41,8 @@ module tlb // SV39 defines three levels of page tables struct packed { - logic [ASID_WIDTH-1:0] asid; - logic [riscv::VPN2:0] vpn2; + logic [CVA6Cfg.ASID_WIDTH-1:0] asid; + logic [CVA6Cfg.VPN2:0] vpn2; logic [8:0] vpn1; logic [8:0] vpn0; logic is_2M; @@ -53,7 +53,7 @@ module tlb riscv::pte_t [TLB_ENTRIES-1:0] content_q, content_n; logic [8:0] vpn0, vpn1; - logic [ riscv::VPN2:0] vpn2; + logic [ CVA6Cfg.VPN2:0] vpn2; logic [TLB_ENTRIES-1:0] lu_hit; // to replacement logic logic [TLB_ENTRIES-1:0] replace_en; // replace the following entry, set by replacement strategy //------------- @@ -62,7 +62,7 @@ module tlb always_comb begin : translation vpn0 = lu_vaddr_i[20:12]; vpn1 = lu_vaddr_i[29:21]; - vpn2 = lu_vaddr_i[30+riscv::VPN2:30]; + vpn2 = lu_vaddr_i[30+CVA6Cfg.VPN2:30]; // default assignment lu_hit = '{default: 0}; @@ -118,7 +118,7 @@ module tlb vaddr_vpn0_match[i] = (vaddr_to_be_flushed_i[20:12] == tags_q[i].vpn0); vaddr_vpn1_match[i] = (vaddr_to_be_flushed_i[29:21] == tags_q[i].vpn1); - vaddr_vpn2_match[i] = (vaddr_to_be_flushed_i[30+riscv::VPN2:30] == tags_q[i].vpn2); + vaddr_vpn2_match[i] = (vaddr_to_be_flushed_i[30+CVA6Cfg.VPN2:30] == tags_q[i].vpn2); if (flush_i) begin // invalidate logic @@ -138,7 +138,7 @@ module tlb // update tag array tags_n[i] = '{ asid: update_i.asid, - vpn2: update_i.vpn[18+riscv::VPN2:18], + vpn2: update_i.vpn[18+CVA6Cfg.VPN2:18], vpn1: update_i.vpn[17:9], vpn0: update_i.vpn[8:0], is_1G: update_i.is_1G, @@ -257,7 +257,7 @@ module tlb $error("TLB size must be a multiple of 2 and greater than 1"); $stop(); end - assert (ASID_WIDTH >= 1) + assert (CVA6Cfg.ASID_WIDTH >= 1) else begin $error("ASID width must be at least 1"); $stop(); diff --git a/core/mult.sv b/core/mult.sv index 6f98e62ccc..937c6c59f5 100644 --- a/core/mult.sv +++ b/core/mult.sv @@ -3,25 +3,26 @@ module mult import ariane_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type fu_data_t = logic ) ( input logic clk_i, input logic rst_ni, input logic flush_i, input fu_data_t fu_data_i, input logic mult_valid_i, - output riscv::xlen_t result_o, + output logic [CVA6Cfg.XLEN-1:0] result_o, output logic mult_valid_o, output logic mult_ready_o, - output logic [TRANS_ID_BITS-1:0] mult_trans_id_o + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] mult_trans_id_o ); logic mul_valid; logic div_valid; logic div_ready_i; // receiver of division result is able to accept the result - logic [TRANS_ID_BITS-1:0] mul_trans_id; - logic [TRANS_ID_BITS-1:0] div_trans_id; - riscv::xlen_t mul_result; - riscv::xlen_t div_result; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] mul_trans_id; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] div_trans_id; + logic [CVA6Cfg.XLEN-1:0] mul_result; + logic [CVA6Cfg.XLEN-1:0] div_result; logic div_valid_op; logic mul_valid_op; @@ -64,10 +65,10 @@ module mult // --------------------- // Division // --------------------- - riscv::xlen_t + logic [CVA6Cfg.XLEN-1:0] operand_b, operand_a; // input operands after input MUX (input silencing, word operations or full inputs) - riscv::xlen_t result; // result before result mux + logic [CVA6Cfg.XLEN-1:0] result; // result before result mux logic div_signed; // signed or unsigned division logic rem; // is it a reminder (or not a reminder e.g.: a division) @@ -92,8 +93,8 @@ module mult if (fu_data_i.operation inside {DIVW, DIVUW, REMW, REMUW}) begin // yes so check if we should sign extend this is only done for a signed operation if (div_signed) begin - operand_a = sext32(fu_data_i.operand_a[31:0]); - operand_b = sext32(fu_data_i.operand_b[31:0]); + operand_a = {{CVA6Cfg.XLEN - 32{fu_data_i.operand_a[31]}}, fu_data_i.operand_a[31:0]}; + operand_b = {{CVA6Cfg.XLEN - 32{fu_data_i.operand_b[31]}}, fu_data_i.operand_b[31:0]}; end else begin operand_a = fu_data_i.operand_a[31:0]; operand_b = fu_data_i.operand_b[31:0]; @@ -115,7 +116,7 @@ module mult // --------------------- serdiv #( .CVA6Cfg(CVA6Cfg), - .WIDTH (riscv::XLEN) + .WIDTH (CVA6Cfg.XLEN) ) i_div ( .clk_i (clk_i), .rst_ni (rst_ni), @@ -134,7 +135,7 @@ module mult // Result multiplexer // if it was a signed word operation the bit will be set and the result will be sign extended accordingly - assign div_result = (word_op_q) ? sext32(result) : result; + assign div_result = (word_op_q) ? {{CVA6Cfg.XLEN - 32{result[31]}}, result[31:0]} : result; // --------------------- // Registers diff --git a/core/multiplier.sv b/core/multiplier.sv index 5f2fcfb69b..857b44b5e4 100644 --- a/core/multiplier.sv +++ b/core/multiplier.sv @@ -22,30 +22,30 @@ module multiplier ) ( input logic clk_i, input logic rst_ni, - input logic [TRANS_ID_BITS-1:0] trans_id_i, + input logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id_i, input logic mult_valid_i, input fu_op operation_i, - input riscv::xlen_t operand_a_i, - input riscv::xlen_t operand_b_i, - output riscv::xlen_t result_o, + input logic [CVA6Cfg.XLEN-1:0] operand_a_i, + input logic [CVA6Cfg.XLEN-1:0] operand_b_i, + output logic [CVA6Cfg.XLEN-1:0] result_o, output logic mult_valid_o, output logic mult_ready_o, - output logic [TRANS_ID_BITS-1:0] mult_trans_id_o + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] mult_trans_id_o ); // Carry-less multiplication - logic [riscv::XLEN-1:0] + logic [CVA6Cfg.XLEN-1:0] clmul_q, clmul_d, clmulr_q, clmulr_d, operand_a, operand_b, operand_a_rev, operand_b_rev; logic clmul_rmode, clmul_hmode; - if (ariane_pkg::BITMANIP) begin : gen_bitmanip + if (CVA6Cfg.BITMANIP) begin : gen_bitmanip // checking for clmul_rmode and clmul_hmode assign clmul_rmode = (operation_i == CLMULR); assign clmul_hmode = (operation_i == CLMULH); // operand_a and b reverse generator - for (genvar i = 0; i < riscv::XLEN; i++) begin - assign operand_a_rev[i] = operand_a_i[(riscv::XLEN-1)-i]; - assign operand_b_rev[i] = operand_b_i[(riscv::XLEN-1)-i]; + for (genvar i = 0; i < CVA6Cfg.XLEN; i++) begin + assign operand_a_rev[i] = operand_a_i[(CVA6Cfg.XLEN-1)-i]; + assign operand_b_rev[i] = operand_b_i[(CVA6Cfg.XLEN-1)-i]; end // operand_a and operand_b selection @@ -55,22 +55,22 @@ module multiplier // implementation always_comb begin clmul_d = '0; - for (int i = 0; i <= riscv::XLEN; i++) begin + for (int i = 0; i <= CVA6Cfg.XLEN; i++) begin clmul_d = (|((operand_b >> i) & 1)) ? clmul_d ^ (operand_a << i) : clmul_d; end end // clmulr + clmulh result generator - for (genvar i = 0; i < riscv::XLEN; i++) begin - assign clmulr_d[i] = clmul_d[(riscv::XLEN-1)-i]; + for (genvar i = 0; i < CVA6Cfg.XLEN; i++) begin + assign clmulr_d[i] = clmul_d[(CVA6Cfg.XLEN-1)-i]; end end // Pipeline register - logic [TRANS_ID_BITS-1:0] trans_id_q; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id_q; logic mult_valid_q; fu_op operator_d, operator_q; - logic [riscv::XLEN*2-1:0] mult_result_d, mult_result_q; + logic [CVA6Cfg.XLEN*2-1:0] mult_result_d, mult_result_q; // control registers logic sign_a, sign_b; @@ -105,9 +105,9 @@ module multiplier // single stage version assign mult_result_d = $signed( - {operand_a_i[riscv::XLEN-1] & sign_a, operand_a_i} + {operand_a_i[CVA6Cfg.XLEN-1] & sign_a, operand_a_i} ) * $signed( - {operand_b_i[riscv::XLEN-1] & sign_b, operand_b_i} + {operand_b_i[CVA6Cfg.XLEN-1] & sign_b, operand_b_i} ); @@ -115,16 +115,16 @@ module multiplier always_comb begin : p_selmux unique case (operator_q) - MULH, MULHU, MULHSU: result_o = mult_result_q[riscv::XLEN*2-1:riscv::XLEN]; - MULW: result_o = sext32(mult_result_q[31:0]); + MULH, MULHU, MULHSU: result_o = mult_result_q[CVA6Cfg.XLEN*2-1:CVA6Cfg.XLEN]; + MULW: result_o = {{CVA6Cfg.XLEN - 32{mult_result_q[31]}}, mult_result_q[31:0]}; CLMUL: result_o = clmul_q; CLMULH: result_o = clmulr_q >> 1; CLMULR: result_o = clmulr_q; - // MUL performs an XLEN-bit×XLEN-bit multiplication and places the lower XLEN bits in the destination register - default: result_o = mult_result_q[riscv::XLEN-1:0]; // including MUL + // MUL performs an CVA6Cfg.XLEN-bit×CVA6Cfg.XLEN-bit multiplication and places the lower CVA6Cfg.XLEN bits in the destination register + default: result_o = mult_result_q[CVA6Cfg.XLEN-1:0]; // including MUL endcase end - if (ariane_pkg::BITMANIP) begin + if (CVA6Cfg.BITMANIP) begin always_ff @(posedge clk_i or negedge rst_ni) begin if (~rst_ni) begin clmul_q <= '0; diff --git a/core/perf_counters.sv b/core/perf_counters.sv index d529c5b300..df84bc7fa2 100644 --- a/core/perf_counters.sv +++ b/core/perf_counters.sv @@ -17,6 +17,12 @@ module perf_counters import ariane_pkg::*; #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type exception_t = logic, + parameter type bp_resolve_t = logic, + parameter type scoreboard_entry_t = logic, + parameter type icache_dreq_t = logic, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic, parameter int unsigned NumPorts = 3 // number of miss ports ) ( input logic clk_i, @@ -25,8 +31,8 @@ module perf_counters // SRAM like interface input logic [11:0] addr_i, // read/write address (up to 6 counters possible) input logic we_i, // write enable - input riscv::xlen_t data_i, // data to write - output riscv::xlen_t data_o, // data to read + input logic [CVA6Cfg.XLEN-1:0] data_i, // data to write + output logic [CVA6Cfg.XLEN-1:0] data_o, // data to read // from commit stage input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_i, // the instruction we want to commit input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, // acknowledge that we are indeed committing @@ -48,7 +54,7 @@ module perf_counters input exception_t branch_exceptions_i, //Branch exceptions->execute unit-> branch_exception_o input icache_dreq_t l1_icache_access_i, input dcache_req_i_t [2:0] l1_dcache_access_i, - input logic [NumPorts-1:0][DCACHE_SET_ASSOC-1:0]miss_vld_bits_i, //For Cache eviction (3ports-LOAD,STORE,PTW) + input logic [NumPorts-1:0][CVA6Cfg.DCACHE_SET_ASSOC-1:0]miss_vld_bits_i, //For Cache eviction (3ports-LOAD,STORE,PTW) input logic i_tlb_flush_i, input logic stall_issue_i, //stall-read operands input logic [31:0] mcountinhibit_i @@ -147,7 +153,7 @@ module perf_counters riscv::CSR_MHPM_COUNTER_6, riscv::CSR_MHPM_COUNTER_7, riscv::CSR_MHPM_COUNTER_8 :begin - if (riscv::XLEN == 32) data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3+1][31:0]; + if (CVA6Cfg.XLEN == 32) data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3+1][31:0]; else data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3+1]; end riscv::CSR_MHPM_COUNTER_3H, @@ -156,7 +162,7 @@ module perf_counters riscv::CSR_MHPM_COUNTER_6H, riscv::CSR_MHPM_COUNTER_7H, riscv::CSR_MHPM_COUNTER_8H :begin - if (riscv::XLEN == 32) + if (CVA6Cfg.XLEN == 32) data_o = generic_counter_q[addr_i-riscv::CSR_MHPM_COUNTER_3H+1][63:32]; else read_access_exception = 1'b1; end @@ -179,7 +185,7 @@ module perf_counters riscv::CSR_MHPM_COUNTER_6, riscv::CSR_MHPM_COUNTER_7, riscv::CSR_MHPM_COUNTER_8 :begin - if (riscv::XLEN == 32) + if (CVA6Cfg.XLEN == 32) generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3+1][31:0] = data_i; else generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3+1] = data_i; end @@ -189,7 +195,7 @@ module perf_counters riscv::CSR_MHPM_COUNTER_6H, riscv::CSR_MHPM_COUNTER_7H, riscv::CSR_MHPM_COUNTER_8H :begin - if (riscv::XLEN == 32) + if (CVA6Cfg.XLEN == 32) generic_counter_d[addr_i-riscv::CSR_MHPM_COUNTER_3H+1][63:32] = data_i; else update_access_exception = 1'b1; end diff --git a/core/pmp/src/pmp.sv b/core/pmp/src/pmp.sv index a3adbb903a..b1c57bf4cc 100644 --- a/core/pmp/src/pmp.sv +++ b/core/pmp/src/pmp.sv @@ -14,12 +14,11 @@ module pmp #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, - parameter int unsigned PLEN = 34, // rv64: 56 parameter int unsigned PMP_LEN = 32, // rv64: 54 parameter int unsigned NR_ENTRIES = 4 ) ( // Input - input logic [PLEN-1:0] addr_i, + input logic [CVA6Cfg.PLEN-1:0] addr_i, input riscv::pmp_access_t access_type_i, input riscv::priv_lvl_t priv_lvl_i, // Configuration @@ -39,7 +38,6 @@ module pmp #( pmp_entry #( .CVA6Cfg(CVA6Cfg), - .PLEN (PLEN), .PMP_LEN(PMP_LEN) ) i_pmp_entry ( .addr_i (addr_i), diff --git a/core/pmp/src/pmp_entry.sv b/core/pmp/src/pmp_entry.sv index 667ae18911..e4490b4c59 100644 --- a/core/pmp/src/pmp_entry.sv +++ b/core/pmp/src/pmp_entry.sv @@ -14,11 +14,10 @@ module pmp_entry #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, - parameter int unsigned PLEN = 56, parameter int unsigned PMP_LEN = 54 ) ( // Input - input logic [PLEN-1:0] addr_i, + input logic [CVA6Cfg.PLEN-1:0] addr_i, // Configuration input logic [PMP_LEN-1:0] conf_addr_i, @@ -28,14 +27,14 @@ module pmp_entry #( // Output output logic match_o ); - logic [PLEN-1:0] conf_addr_n; - logic [$clog2(PLEN)-1:0] trail_ones; - logic [PLEN-1:0] base; - logic [PLEN-1:0] mask; + logic [CVA6Cfg.PLEN-1:0] conf_addr_n; + logic [$clog2(CVA6Cfg.PLEN)-1:0] trail_ones; + logic [CVA6Cfg.PLEN-1:0] base; + logic [CVA6Cfg.PLEN-1:0] mask; int unsigned size; assign conf_addr_n = {2'b11, ~conf_addr_i}; lzc #( - .WIDTH(PLEN), + .WIDTH(CVA6Cfg.PLEN), .MODE (1'b0) ) i_lzc ( .in_i (conf_addr_n), @@ -69,7 +68,7 @@ module pmp_entry #( if (conf_addr_mode_i == riscv::NA4) size = 2; else begin // use the extracted trailing ones - size = {{(32 - $clog2(PLEN)) {1'b0}}, trail_ones} + 3; + size = {{(32 - $clog2(CVA6Cfg.PLEN)) {1'b0}}, trail_ones} + 3; end mask = '1 << size; @@ -89,7 +88,7 @@ module pmp_entry #( end end - if (size < PLEN - 1) begin + if (size < CVA6Cfg.PLEN - 1) begin if (base + 2 ** size > base) begin // check for overflow if (match_o == 0) begin assert (addr_i >= base + 2 ** size || addr_i < base); diff --git a/core/scoreboard.sv b/core/scoreboard.sv index 2553554d89..092133bf50 100644 --- a/core/scoreboard.sv +++ b/core/scoreboard.sv @@ -14,6 +14,9 @@ module scoreboard #( parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type exception_t = logic, + parameter type bp_resolve_t = logic, + parameter type scoreboard_entry_t = logic, parameter bit IsRVFI = bit'(0), parameter type rs3_len_t = logic, parameter int unsigned NR_ENTRIES = 8 // must be a power of 2 @@ -30,11 +33,11 @@ module scoreboard #( // regfile like interface to operand read stage input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs1_i, - output riscv::xlen_t rs1_o, + output logic [CVA6Cfg.XLEN-1:0] rs1_o, output logic rs1_valid_o, input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs2_i, - output riscv::xlen_t rs2_o, + output logic [CVA6Cfg.XLEN-1:0] rs2_o, output logic rs2_valid_o, input logic [ariane_pkg::REG_ADDR_SIZE-1:0] rs3_i, @@ -42,35 +45,35 @@ module scoreboard #( output logic rs3_valid_o, // advertise instruction to commit stage, if commit_ack_i is asserted advance the commit pointer - output ariane_pkg::scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o, + output scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr_o, input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack_i, // instruction to put on top of scoreboard e.g.: top pointer // we can always put this instruction to the top unless we signal with asserted full_o - input ariane_pkg::scoreboard_entry_t decoded_instr_i, + input scoreboard_entry_t decoded_instr_i, input logic decoded_instr_valid_i, output logic decoded_instr_ack_o, // instruction to issue logic, if issue_instr_valid and issue_ready is asserted, advance the issue pointer - output ariane_pkg::scoreboard_entry_t issue_instr_o, + output scoreboard_entry_t issue_instr_o, output logic issue_instr_valid_o, input logic issue_ack_i, // write-back port - input ariane_pkg::bp_resolve_t resolved_branch_i, - input logic [CVA6Cfg.NrWbPorts-1:0][ariane_pkg::TRANS_ID_BITS-1:0] trans_id_i, // transaction ID at which to write the result back - input logic [CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] wbdata_i, // write data in - input ariane_pkg::exception_t [CVA6Cfg.NrWbPorts-1:0] ex_i, // exception from a functional unit (e.g.: ld/st exception) + input bp_resolve_t resolved_branch_i, + input logic [CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.TRANS_ID_BITS-1:0] trans_id_i, // transaction ID at which to write the result back + input logic [CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.XLEN-1:0] wbdata_i, // write data in + input exception_t [CVA6Cfg.NrWbPorts-1:0] ex_i, // exception from a functional unit (e.g.: ld/st exception) input logic [CVA6Cfg.NrWbPorts-1:0] wt_valid_i, // data in is valid input logic x_we_i, // cvxif we for writeback // RVFI - input [ riscv::VLEN-1:0] lsu_addr_i, - input [ (riscv::XLEN/8)-1:0] lsu_rmask_i, - input [ (riscv::XLEN/8)-1:0] lsu_wmask_i, - input [ariane_pkg::TRANS_ID_BITS-1:0] lsu_addr_trans_id_i, - input riscv::xlen_t rs1_forwarding_i, - input riscv::xlen_t rs2_forwarding_i + input [ CVA6Cfg.VLEN-1:0] lsu_addr_i, + input [ (CVA6Cfg.XLEN/8)-1:0] lsu_rmask_i, + input [ (CVA6Cfg.XLEN/8)-1:0] lsu_wmask_i, + input [CVA6Cfg.TRANS_ID_BITS-1:0] lsu_addr_trans_id_i, + input logic [CVA6Cfg.XLEN-1:0] rs1_forwarding_i, + input logic [CVA6Cfg.XLEN-1:0] rs2_forwarding_i ); localparam int unsigned BITS_ENTRIES = $clog2(NR_ENTRIES); @@ -78,7 +81,7 @@ module scoreboard #( typedef struct packed { logic issued; // this bit indicates whether we issued this instruction e.g.: if it is valid logic is_rd_fpr_flag; // redundant meta info, added for speed - ariane_pkg::scoreboard_entry_t sbe; // this is the score board entry we will send to ex + scoreboard_entry_t sbe; // this is the score board entry we will send to ex } sb_mem_t; sb_mem_t [NR_ENTRIES-1:0] mem_q, mem_n; @@ -94,7 +97,7 @@ module scoreboard #( assign sb_full_o = issue_full; - ariane_pkg::scoreboard_entry_t decoded_instr; + scoreboard_entry_t decoded_instr; always_comb begin decoded_instr = decoded_instr_i; if (IsRVFI) begin @@ -309,7 +312,7 @@ module scoreboard #( // ---------------------------------- // read operand interface: same logic as register file logic [NR_ENTRIES+CVA6Cfg.NrWbPorts-1:0] rs1_fwd_req, rs2_fwd_req, rs3_fwd_req; - logic [NR_ENTRIES+CVA6Cfg.NrWbPorts-1:0][riscv::XLEN-1:0] rs_data; + logic [NR_ENTRIES+CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.XLEN-1:0] rs_data; logic rs1_valid, rs2_valid, rs3_valid; // WB ports have higher prio than entries @@ -353,7 +356,7 @@ module scoreboard #( // this implicitly gives higher prio to WB ports rr_arb_tree #( .NumIn(NR_ENTRIES + CVA6Cfg.NrWbPorts), - .DataWidth(riscv::XLEN), + .DataWidth(CVA6Cfg.XLEN), .ExtPrio(1'b1), .AxiVldRdy(1'b1) ) i_sel_rs1 ( @@ -372,7 +375,7 @@ module scoreboard #( rr_arb_tree #( .NumIn(NR_ENTRIES + CVA6Cfg.NrWbPorts), - .DataWidth(riscv::XLEN), + .DataWidth(CVA6Cfg.XLEN), .ExtPrio(1'b1), .AxiVldRdy(1'b1) ) i_sel_rs2 ( @@ -389,11 +392,11 @@ module scoreboard #( .idx_o () ); - riscv::xlen_t rs3; + logic [CVA6Cfg.XLEN-1:0] rs3; rr_arb_tree #( .NumIn(NR_ENTRIES + CVA6Cfg.NrWbPorts), - .DataWidth(riscv::XLEN), + .DataWidth(CVA6Cfg.XLEN), .ExtPrio(1'b1), .AxiVldRdy(1'b1) ) i_sel_rs3 ( @@ -411,7 +414,7 @@ module scoreboard #( ); if (CVA6Cfg.NrRgprPorts == 3) begin : gen_gp_three_port - assign rs3_o = rs3[riscv::XLEN-1:0]; + assign rs3_o = rs3[CVA6Cfg.XLEN-1:0]; end else begin : gen_fp_three_port assign rs3_o = rs3[CVA6Cfg.FLen-1:0]; end diff --git a/core/serdiv.sv b/core/serdiv.sv index 244ee975dc..494a7514bd 100644 --- a/core/serdiv.sv +++ b/core/serdiv.sv @@ -25,7 +25,7 @@ module serdiv input logic clk_i, input logic rst_ni, // input IF - input logic [TRANS_ID_BITS-1:0] id_i, + input logic [CVA6Cfg.TRANS_ID_BITS-1:0] id_i, input logic [WIDTH-1:0] op_a_i, input logic [WIDTH-1:0] op_b_i, input logic [1:0] opcode_i, // 0: udiv, 2: urem, 1: div, 3: rem @@ -36,7 +36,7 @@ module serdiv // output IF output logic out_vld_o, input logic out_rdy_i, - output logic [TRANS_ID_BITS-1:0] id_o, + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] id_o, output logic [WIDTH-1:0] res_o ); @@ -58,7 +58,7 @@ module serdiv logic op_b_zero, op_b_zero_q, op_b_zero_d; logic op_b_neg_one, op_b_neg_one_q, op_b_neg_one_d; - logic [TRANS_ID_BITS-1:0] id_q, id_d; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] id_q, id_d; logic rem_sel_d, rem_sel_q; logic comp_inv_d, comp_inv_q; diff --git a/core/store_buffer.sv b/core/store_buffer.sv index 5567382781..7b237ee7d2 100644 --- a/core/store_buffer.sv +++ b/core/store_buffer.sv @@ -17,7 +17,9 @@ module store_buffer import ariane_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -38,10 +40,10 @@ module store_buffer input logic valid_i, // this is a valid store input logic valid_without_flush_i, // just tell if the address is valid which we are current putting and do not take any further action - input logic [riscv::PLEN-1:0] paddr_i, // physical address of store which needs to be placed in the queue - output [riscv::PLEN-1:0] mem_paddr_o, - input riscv::xlen_t data_i, // data which is placed in the queue - input logic [(riscv::XLEN/8)-1:0] be_i, // byte enable in + input logic [CVA6Cfg.PLEN-1:0] paddr_i, // physical address of store which needs to be placed in the queue + output [CVA6Cfg.PLEN-1:0] mem_paddr_o, + input logic [CVA6Cfg.XLEN-1:0] data_i, // data which is placed in the queue + input logic [(CVA6Cfg.XLEN/8)-1:0] be_i, // byte enable in input logic [1:0] data_size_i, // type of request we are making (e.g.: bytes to write) // D$ interface @@ -53,9 +55,9 @@ module store_buffer // 1. Speculative queue // 2. Commit queue which is non-speculative, e.g.: the store will definitely happen. struct packed { - logic [riscv::PLEN-1:0] address; - riscv::xlen_t data; - logic [(riscv::XLEN/8)-1:0] be; + logic [CVA6Cfg.PLEN-1:0] address; + logic [CVA6Cfg.XLEN-1:0] data; + logic [(CVA6Cfg.XLEN/8)-1:0] be; logic [1:0] data_size; logic valid; // this entry is valid, we need this for checking if the address offset matches } @@ -139,11 +141,11 @@ module store_buffer // we do not require an acknowledgement for writes, thus we do not need to identify uniquely the responses assign req_port_o.data_id = '0; // those signals can directly be output to the memory - assign req_port_o.address_index = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_INDEX_WIDTH-1:0]; + assign req_port_o.address_index = commit_queue_q[commit_read_pointer_q].address[CVA6Cfg.DCACHE_INDEX_WIDTH-1:0]; // if we got a new request we already saved the tag from the previous cycle - assign req_port_o.address_tag = commit_queue_q[commit_read_pointer_q].address[ariane_pkg::DCACHE_TAG_WIDTH + - ariane_pkg::DCACHE_INDEX_WIDTH-1 : - ariane_pkg::DCACHE_INDEX_WIDTH]; + assign req_port_o.address_tag = commit_queue_q[commit_read_pointer_q].address[CVA6Cfg.DCACHE_TAG_WIDTH + + CVA6Cfg.DCACHE_INDEX_WIDTH-1 : + CVA6Cfg.DCACHE_INDEX_WIDTH]; assign req_port_o.data_wdata = commit_queue_q[commit_read_pointer_q].data; assign req_port_o.data_be = commit_queue_q[commit_read_pointer_q].be; assign req_port_o.data_size = commit_queue_q[commit_read_pointer_q].data_size; diff --git a/core/store_unit.sv b/core/store_unit.sv index 8ec6c68aca..0550710f64 100644 --- a/core/store_unit.sv +++ b/core/store_unit.sv @@ -16,7 +16,11 @@ module store_unit import ariane_pkg::*; #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty, + parameter type exception_t = logic, + parameter type dcache_req_i_t = logic, + parameter type dcache_req_o_t = logic, + parameter type lsu_ctrl_t = logic ) ( input logic clk_i, // Clock input logic rst_ni, // Asynchronous reset active low @@ -33,14 +37,14 @@ module store_unit input logic amo_valid_commit_i, // store unit output port output logic valid_o, - output logic [TRANS_ID_BITS-1:0] trans_id_o, - output riscv::xlen_t result_o, + output logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id_o, + output logic [CVA6Cfg.XLEN-1:0] result_o, output exception_t ex_o, // MMU -> Address Translation output logic translation_req_o, // request address translation - output logic [riscv::VLEN-1:0] vaddr_o, // virtual address out - output [riscv::PLEN-1:0] mem_paddr_o, - input logic [riscv::PLEN-1:0] paddr_i, // physical address in + output logic [CVA6Cfg.VLEN-1:0] vaddr_o, // virtual address out + output [CVA6Cfg.PLEN-1:0] mem_paddr_o, + input logic [CVA6Cfg.PLEN-1:0] paddr_i, // physical address in input exception_t ex_i, input logic dtlb_hit_i, // will be one in the same cycle translation_req was asserted if it hits // address checker @@ -70,12 +74,12 @@ module store_unit logic instr_is_amo; assign instr_is_amo = is_amo(lsu_ctrl_i.operation); // keep the data and the byte enable for the second cycle (after address translation) - riscv::xlen_t st_data_n, st_data_q; - logic [(riscv::XLEN/8)-1:0] st_be_n, st_be_q; + logic [CVA6Cfg.XLEN-1:0] st_data_n, st_data_q; + logic [(CVA6Cfg.XLEN/8)-1:0] st_be_n, st_be_q; logic [1:0] st_data_size_n, st_data_size_q; amo_t amo_op_d, amo_op_q; - logic [TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; + logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id_n, trans_id_q; // output assignments assign vaddr_o = lsu_ctrl_i.vaddr; // virtual address @@ -186,9 +190,26 @@ module store_unit // re-align the write data to comply with the address offset always_comb begin st_be_n = lsu_ctrl_i.be; + // don't shift the data if we are going to perform an AMO as we still need to operate on this data - st_data_n = (CVA6Cfg.RVA && instr_is_amo) ? lsu_ctrl_i.data[riscv::XLEN-1:0] : - data_align(lsu_ctrl_i.vaddr[2:0], lsu_ctrl_i.data); + if (CVA6Cfg.RVA && instr_is_amo) begin + st_data_n = lsu_ctrl_i.data[CVA6Cfg.XLEN-1:0]; + end else begin + st_data_n = '0; + // align data to address e.g.: shift data to be naturally 64 + // Set addr[2] to 1'b0 when 32bits + case ({(lsu_ctrl_i.vaddr[2] && CVA6Cfg.IS_XLEN64), lsu_ctrl_i.vaddr[1:0]}) + 3'b000: st_data_n[CVA6Cfg.XLEN-1:0] = {lsu_ctrl_i.data[CVA6Cfg.XLEN-1:0]}; + 3'b001: st_data_n[CVA6Cfg.XLEN-1:0] = {lsu_ctrl_i.data[CVA6Cfg.XLEN-9:0], lsu_ctrl_i.data[CVA6Cfg.XLEN-1:CVA6Cfg.XLEN-8]}; + 3'b010: st_data_n[CVA6Cfg.XLEN-1:0] = {lsu_ctrl_i.data[CVA6Cfg.XLEN-17:0], lsu_ctrl_i.data[CVA6Cfg.XLEN-1:CVA6Cfg.XLEN-16]}; + 3'b011: st_data_n[CVA6Cfg.XLEN-1:0] = {lsu_ctrl_i.data[CVA6Cfg.XLEN-25:0], lsu_ctrl_i.data[CVA6Cfg.XLEN-1:CVA6Cfg.XLEN-24]}; + 3'b100: st_data_n = {lsu_ctrl_i.data[31:0], lsu_ctrl_i.data[63:32]}; + 3'b101: st_data_n = {lsu_ctrl_i.data[23:0], lsu_ctrl_i.data[63:24]}; + 3'b110: st_data_n = {lsu_ctrl_i.data[15:0], lsu_ctrl_i.data[63:16]}; + 3'b111: st_data_n = {lsu_ctrl_i.data[7:0], lsu_ctrl_i.data[63:8]}; + endcase + end + st_data_size_n = extract_transfer_size(lsu_ctrl_i.operation); // save AMO op for next cycle if(CVA6Cfg.RVA) begin @@ -224,7 +245,9 @@ module store_unit // Store Queue // --------------- store_buffer #( - .CVA6Cfg(CVA6Cfg) + .CVA6Cfg(CVA6Cfg), + .dcache_req_i_t(dcache_req_i_t), + .dcache_req_o_t(dcache_req_o_t) ) store_buffer_i ( .clk_i, .rst_ni, diff --git a/corev_apu/clint/clint.sv b/corev_apu/clint/clint.sv index e76f96d8f9..2d4dc0536c 100644 --- a/corev_apu/clint/clint.sv +++ b/corev_apu/clint/clint.sv @@ -17,6 +17,7 @@ // constant frequency, and the platform must provide a mechanism for determining the timebase of mtime (device tree). module clint #( + parameter config_pkg::cva6_cfg_t CVA6Cfg = cva6_config_pkg::cva6_cfg, parameter int unsigned AXI_ADDR_WIDTH = 64, parameter int unsigned AXI_DATA_WIDTH = 64, parameter int unsigned AXI_ID_WIDTH = 10, @@ -101,7 +102,7 @@ module clint #( end [MTIMECMP_BASE:MTIMECMP_BASE+8*NR_CORES]: begin - if (riscv::XLEN == 32) begin + if (CVA6Cfg.XLEN == 32) begin if (be[3:0] == 4'hf) mtimecmp_n[$unsigned(address[AddrSelWidth-1+3:3])][31:0] = wdata[31:0]; else @@ -113,7 +114,7 @@ module clint #( end [MTIME_BASE:MTIME_BASE+4]: begin - if (riscv::XLEN == 32) begin + if (CVA6Cfg.XLEN == 32) begin if (address[2:0] == 3'h0) mtime_n[31:0] = wdata[31:0]; else begin @@ -136,14 +137,14 @@ module clint #( if (en && !we) begin case (register_address) inside [MSIP_BASE:MSIP_BASE+4*NR_CORES]: begin - if (riscv::XLEN == 32) + if (CVA6Cfg.XLEN == 32) rdata[31:0] = msip_q[$unsigned(address[AddrSelWidth-1+2:2])]; else rdata = msip_q[$unsigned(address[AddrSelWidth-1+2:2])]; end [MTIMECMP_BASE:MTIMECMP_BASE+8*NR_CORES]: begin - if (riscv::XLEN == 32) begin + if (CVA6Cfg.XLEN == 32) begin if (address[2:0] == 3'h0) rdata[31:0] = mtimecmp_q[$unsigned(address[AddrSelWidth-1+3:3])][31:0]; else begin @@ -157,7 +158,7 @@ module clint #( end [MTIME_BASE:MTIME_BASE+4]: begin - if (riscv::XLEN == 32) begin + if (CVA6Cfg.XLEN == 32) begin if (address[2:0] == 3'h0) rdata[31:0] = mtime_q[31:0]; else begin diff --git a/corev_apu/openpiton/riscv_peripherals.sv b/corev_apu/openpiton/riscv_peripherals.sv index cdca4109b1..6730178af1 100644 --- a/corev_apu/openpiton/riscv_peripherals.sv +++ b/corev_apu/openpiton/riscv_peripherals.sv @@ -465,6 +465,7 @@ module riscv_peripherals #( ariane_axi::resp_t clint_axi_resp; clint #( + .CVA6Cfg ( CVA6Cfg ), .AXI_ADDR_WIDTH ( AxiAddrWidth ), .AXI_DATA_WIDTH ( AxiDataWidth ), .AXI_ID_WIDTH ( AxiIdWidth ), diff --git a/corev_apu/src/ariane.sv b/corev_apu/src/ariane.sv index e45fc1df49..25704b6c23 100644 --- a/corev_apu/src/ariane.sv +++ b/corev_apu/src/ariane.sv @@ -29,8 +29,8 @@ module ariane import ariane_pkg::*; #( input logic clk_i, input logic rst_ni, // Core ID, Cluster ID and boot address are considered more or less static - input logic [riscv::VLEN-1:0] boot_addr_i, // reset boot address - input logic [riscv::XLEN-1:0] hart_id_i, // hart id in a multicore environment (reflected in a CSR) + input logic [CVA6Cfg.VLEN-1:0] boot_addr_i, // reset boot address + input logic [CVA6Cfg.XLEN-1:0] hart_id_i, // hart id in a multicore environment (reflected in a CSR) // Interrupt inputs input logic [1:0] irq_i, // level sensitive IR lines, mip & sip (async) diff --git a/corev_apu/tb/ariane_tb.sv b/corev_apu/tb/ariane_tb.sv index 67b7ea8580..d5004c47f2 100644 --- a/corev_apu/tb/ariane_tb.sv +++ b/corev_apu/tb/ariane_tb.sv @@ -29,7 +29,8 @@ import "DPI-C" context function void read_section(input longint address, inout b module ariane_tb; // cva6 configuration - localparam config_pkg::cva6_cfg_t CVA6Cfg = cva6_config_pkg::cva6_cfg; + TODO + localparam config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::build_config(cva6_config_pkg::cva6_cfg); localparam bit IsRVFI = bit'(cva6_config_pkg::CVA6ConfigRvfiTrace); localparam type rvfi_instr_t = struct packed { logic [config_pkg::NRET-1:0] valid; diff --git a/corev_apu/tb/ariane_testharness.sv b/corev_apu/tb/ariane_testharness.sv index 5bfa842819..252a9fef3e 100644 --- a/corev_apu/tb/ariane_testharness.sv +++ b/corev_apu/tb/ariane_testharness.sv @@ -16,36 +16,36 @@ `include "axi/assign.svh" module ariane_testharness #( - parameter config_pkg::cva6_cfg_t CVA6Cfg = cva6_config_pkg::cva6_cfg, + parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::build_config(cva6_config_pkg::cva6_cfg), parameter bit IsRVFI = bit'(cva6_config_pkg::CVA6ConfigRvfiTrace), parameter type rvfi_instr_t = struct packed { logic [config_pkg::NRET-1:0] valid; logic [config_pkg::NRET*64-1:0] order; logic [config_pkg::NRET*config_pkg::ILEN-1:0] insn; logic [config_pkg::NRET-1:0] trap; - logic [config_pkg::NRET*riscv::XLEN-1:0] cause; + logic [config_pkg::NRET*CVA6Cfg.XLEN-1:0] cause; logic [config_pkg::NRET-1:0] halt; logic [config_pkg::NRET-1:0] intr; logic [config_pkg::NRET*2-1:0] mode; logic [config_pkg::NRET*2-1:0] ixl; logic [config_pkg::NRET*5-1:0] rs1_addr; logic [config_pkg::NRET*5-1:0] rs2_addr; - logic [config_pkg::NRET*riscv::XLEN-1:0] rs1_rdata; - logic [config_pkg::NRET*riscv::XLEN-1:0] rs2_rdata; + logic [config_pkg::NRET*CVA6Cfg.XLEN-1:0] rs1_rdata; + logic [config_pkg::NRET*CVA6Cfg.XLEN-1:0] rs2_rdata; logic [config_pkg::NRET*5-1:0] rd_addr; - logic [config_pkg::NRET*riscv::XLEN-1:0] rd_wdata; - logic [config_pkg::NRET*riscv::XLEN-1:0] pc_rdata; - logic [config_pkg::NRET*riscv::XLEN-1:0] pc_wdata; - logic [config_pkg::NRET*riscv::VLEN-1:0] mem_addr; - logic [config_pkg::NRET*riscv::PLEN-1:0] mem_paddr; - logic [config_pkg::NRET*(riscv::XLEN/8)-1:0] mem_rmask; - logic [config_pkg::NRET*(riscv::XLEN/8)-1:0] mem_wmask; - logic [config_pkg::NRET*riscv::XLEN-1:0] mem_rdata; - logic [config_pkg::NRET*riscv::XLEN-1:0] mem_wdata; + logic [config_pkg::NRET*CVA6Cfg.XLEN-1:0] rd_wdata; + logic [config_pkg::NRET*CVA6Cfg.XLEN-1:0] pc_rdata; + logic [config_pkg::NRET*CVA6Cfg.XLEN-1:0] pc_wdata; + logic [config_pkg::NRET*CVA6Cfg.VLEN-1:0] mem_addr; + logic [config_pkg::NRET*CVA6Cfg.PLEN-1:0] mem_paddr; + logic [config_pkg::NRET*(CVA6Cfg.XLEN/8)-1:0] mem_rmask; + logic [config_pkg::NRET*(CVA6Cfg.XLEN/8)-1:0] mem_wmask; + logic [config_pkg::NRET*CVA6Cfg.XLEN-1:0] mem_rdata; + logic [config_pkg::NRET*CVA6Cfg.XLEN-1:0] mem_wdata; }, // - parameter int unsigned AXI_USER_WIDTH = ariane_pkg::AXI_USER_WIDTH, - parameter int unsigned AXI_USER_EN = ariane_pkg::AXI_USER_EN, + parameter int unsigned AXI_USER_WIDTH = CVA6Cfg.AXI_USER_WIDTH, + parameter int unsigned AXI_USER_EN = CVA6Cfg.AXI_USER_EN, parameter int unsigned AXI_ADDRESS_WIDTH = 64, parameter int unsigned AXI_DATA_WIDTH = 64, parameter bit InclSimDTM = 1'b1, @@ -134,7 +134,7 @@ module ariane_testharness #( initial begin if (!$value$plusargs("jtag_rbb_enable=%b", jtag_enable)) jtag_enable = 'h0; if ($test$plusargs("debug_disable")) debug_enable = 'h0; else debug_enable = 'h1; - if (riscv::XLEN != 32 & riscv::XLEN != 64) $error("XLEN different from 32 and 64"); + if (CVA6Cfg.XLEN != 32 & CVA6Cfg.XLEN != 64) $error("XLEN different from 32 and 64"); end // debug if MUX @@ -552,6 +552,7 @@ module ariane_testharness #( ariane_axi_soc::resp_slv_t axi_clint_resp; clint #( + .CVA6Cfg ( CVA6Cfg ), .AXI_ADDR_WIDTH ( AXI_ADDRESS_WIDTH ), .AXI_DATA_WIDTH ( AXI_DATA_WIDTH ), .AXI_ID_WIDTH ( ariane_axi_soc::IdWidthSlave ), diff --git a/corev_apu/tb/rvfi_tracer.sv b/corev_apu/tb/rvfi_tracer.sv index 75f68beb57..4c8642b520 100644 --- a/corev_apu/tb/rvfi_tracer.sv +++ b/corev_apu/tb/rvfi_tracer.sv @@ -21,7 +21,7 @@ module rvfi_tracer #( output logic[31:0] end_of_test_o ); - logic[riscv::PLEN-1:0] TOHOST_ADDR; + logic[CVA6Cfg.PLEN-1:0] TOHOST_ADDR; int f; int unsigned SIM_FINISH; initial begin @@ -47,7 +47,7 @@ module rvfi_tracer #( always_ff @(posedge clk_i) begin end_of_test_q = (rst_ni && (end_of_test_d[0] == 1'b1)) ? end_of_test_d : 0; for (int i = 0; i < CVA6Cfg.NrCommitPorts; i++) begin - pc64 = {{riscv::XLEN-riscv::VLEN{rvfi_i[i].pc_rdata[riscv::VLEN-1]}}, rvfi_i[i].pc_rdata}; + pc64 = {{CVA6Cfg.XLEN-CVA6Cfg.VLEN{rvfi_i[i].pc_rdata[CVA6Cfg.VLEN-1]}}, rvfi_i[i].pc_rdata}; // print the instruction information if the instruction is valid or a trap is taken if (rvfi_i[i].valid) begin // Instruction information @@ -71,8 +71,8 @@ module rvfi_tracer #( (rvfi_i[i].insn[6:0] == 7'b1010011 && rvfi_i[i].insn[31:26] != 6'b111000 && rvfi_i[i].insn[31:26] != 6'b101000 && rvfi_i[i].insn[31:26] != 6'b110000) || - (rvfi_i[i].insn[0] == 1'b0 && ((rvfi_i[i].insn[15:13] == 3'b001 && riscv::XLEN == 64) || - (rvfi_i[i].insn[15:13] == 3'b011 && riscv::XLEN == 32) ))) begin + (rvfi_i[i].insn[0] == 1'b0 && ((rvfi_i[i].insn[15:13] == 3'b001 && CVA6Cfg.XLEN == 64) || + (rvfi_i[i].insn[15:13] == 3'b011 && CVA6Cfg.XLEN == 32) ))) begin $fwrite(f, " f%d 0x%h", rvfi_i[i].rd_addr, rvfi_i[i].rd_wdata); end else if (rvfi_i[i].rd_addr != 0) begin $fwrite(f, " x%d 0x%h", rvfi_i[i].rd_addr, rvfi_i[i].rd_wdata); diff --git a/vendor/pulp-platform/common_cells/src/fifo_v3.sv b/vendor/pulp-platform/common_cells/src/fifo_v3.sv index 11b77e0258..eacc8840dc 100644 --- a/vendor/pulp-platform/common_cells/src/fifo_v3.sv +++ b/vendor/pulp-platform/common_cells/src/fifo_v3.sv @@ -15,6 +15,7 @@ module fifo_v3 #( parameter int unsigned DATA_WIDTH = 32, // default data width if the fifo is of type logic parameter int unsigned DEPTH = 8, // depth can be arbitrary from 0 to 2**32 parameter type dtype = logic [DATA_WIDTH-1:0], + parameter bit FPGA_EN = 1'b0, // DO NOT OVERWRITE THIS PARAMETER parameter int unsigned ADDR_DEPTH = (DEPTH > 1) ? $clog2(DEPTH) : 1 )( @@ -70,7 +71,7 @@ module fifo_v3 #( read_pointer_n = read_pointer_q; write_pointer_n = write_pointer_q; status_cnt_n = status_cnt_q; - if (ariane_pkg::FPGA_EN) begin + if (FPGA_EN) begin fifo_ram_we = '0; fifo_ram_read_address = read_pointer_q; fifo_ram_write_address = '0; @@ -84,7 +85,7 @@ module fifo_v3 #( // push a new element to the queue if (push_i && ~full_o) begin - if (ariane_pkg::FPGA_EN) begin + if (FPGA_EN) begin fifo_ram_we = 1'b1; fifo_ram_write_address = write_pointer_q; fifo_ram_wdata = data_i; @@ -149,7 +150,7 @@ module fifo_v3 #( end end - if (ariane_pkg::FPGA_EN) begin : gen_fpga_queue + if (FPGA_EN) begin : gen_fpga_queue AsyncDpRam #( .ADDR_WIDTH (ADDR_DEPTH), .DATA_DEPTH (DEPTH), diff --git a/verif/tb/uvmt/uvmt_cva6_tb.sv b/verif/tb/uvmt/uvmt_cva6_tb.sv index 0c89a510e6..de18dfe8ed 100644 --- a/verif/tb/uvmt/uvmt_cva6_tb.sv +++ b/verif/tb/uvmt/uvmt_cva6_tb.sv @@ -31,7 +31,7 @@ module uvmt_cva6_tb; import uvme_cva6_pkg::*; // CVA6 config - localparam config_pkg::cva6_cfg_t CVA6Cfg = cva6_config_pkg::cva6_cfg; + localparam config_pkg::cva6_cfg_t CVA6Cfg = build_config(cva6_config_pkg::cva6_cfg); localparam bit IsRVFI = bit'(cva6_config_pkg::CVA6ConfigRvfiTrace); localparam type rvfi_instr_t = struct packed { logic [config_pkg::NRET-1:0] valid;