From 4da2edaa6ba69aea8c1cc0e6bba50851ed72218e Mon Sep 17 00:00:00 2001 From: Li Qianruo Date: Mon, 9 Sep 2024 16:05:01 +0800 Subject: [PATCH] cpu-o3, mem-ruby: Enhance goldenmem Previously golden mem is checked only when inst commits, if golden value changes between cache access and commit, difftest will throw an error. Two changes are made: 1. Check L1D refill cache block with golden mem 2. When load accesses DCache, check with golden mem, and save golden value to DynInst. When it commits, and value doesn't match NEMU val, then compare with inst-local golden value Change-Id: I7bfee53a622d7f0513b457c531df49c1c5e32035 --- configs/ruby/CHI_config.py | 7 +++--- src/cpu/base.cc | 13 ++++++++-- src/cpu/base.hh | 3 +++ src/cpu/o3/commit.cc | 1 + src/cpu/o3/dyn_inst.hh | 9 +++++++ src/cpu/o3/lsq.cc | 19 +++++++++++++-- src/cpu/o3/lsq.hh | 4 ++++ src/cpu/o3/lsq_unit.cc | 24 +++++++++++++++++++ src/cpu/o3/lsq_unit.hh | 2 ++ src/cpu/utils.hh | 12 ++++++++++ src/mem/port.hh | 10 ++++++++ src/mem/protocol/functional.cc | 11 +++++++++ src/mem/protocol/functional.hh | 4 ++++ src/mem/ruby/protocol/RubySlicc_Types.sm | 1 + .../ruby/protocol/chi/CHI-cache-actions.sm | 3 +++ src/mem/ruby/protocol/chi/CHI-cache.sm | 2 +- src/mem/ruby/system/RubyPort.cc | 11 ++++++++- src/mem/ruby/system/RubyPort.hh | 4 ++++ src/mem/ruby/system/Sequencer.cc | 23 +++++++++++++++++- src/mem/ruby/system/Sequencer.hh | 4 ++++ src/mem/ruby/system/Sequencer.py | 1 + 21 files changed, 158 insertions(+), 10 deletions(-) diff --git a/configs/ruby/CHI_config.py b/configs/ruby/CHI_config.py index 9df852f83c..1a77239712 100644 --- a/configs/ruby/CHI_config.py +++ b/configs/ruby/CHI_config.py @@ -232,7 +232,7 @@ class CHI_L1Controller(CHI_Cache_Controller): Default parameters for a L1 Cache controller """ - def __init__(self, ruby_system, sequencer, cache, prefetcher, is_dcache=False): + def __init__(self, ruby_system, sequencer, cache, prefetcher, is_dcache=False, enable_difftest=False): super().__init__(ruby_system) self.sequencer = sequencer self.cache = cache @@ -256,6 +256,7 @@ def __init__(self, ruby_system, sequencer, cache, prefetcher, is_dcache=False): self.dealloc_backinv_unique = False self.dealloc_backinv_shared = False self.is_dcache = is_dcache + self.enable_difftest = enable_difftest # Some reasonable default TBE params self.number_of_TBEs = 32+8 self.number_of_repl_TBEs = 16 @@ -501,7 +502,7 @@ def __init__( version=Versions.getSeqId(), ruby_system=ruby_system ) cpu.data_sequencer = RubySequencer( - version=Versions.getSeqId(), ruby_system=ruby_system + version=Versions.getSeqId(), ruby_system=ruby_system, is_data_sequencer=True ) self._seqs.append( @@ -531,7 +532,7 @@ def __init__( ) cpu.l1d = CHI_L1Controller( - ruby_system, cpu.data_sequencer, l1d_cache, l1d_pf, is_dcache=True + ruby_system, cpu.data_sequencer, l1d_cache, l1d_pf, is_dcache=True, enable_difftest = True ) cpu.inst_sequencer.dcache = NULL diff --git a/src/cpu/base.cc b/src/cpu/base.cc index 22ef037400..43ecfbecbf 100644 --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -1149,7 +1149,7 @@ BaseCPU::diffWithNEMU(ThreadID tid, InstSeqNum seq) _goldenMemManager->inPmem(diffInfo.physEffAddr)) { DPRINTF(Diff, "Difference on %s instr found in multicore mode, check in golden memory\n", diffInfo.inst->isLoad() ? "load" : "amo"); - uint8_t *golden_ptr = (uint8_t *)_goldenMemManager->guestToHost(diffInfo.physEffAddr); + uint8_t *golden_ptr = diffInfo.goldenValue; // a lambda function to sync memory and register from golden results to ref auto sync_mem_reg = [&]() { @@ -1478,5 +1478,14 @@ BaseCPU::setExceptionGuideExecInfo(uint64_t exception_num, uint64_t mtval, // diffAllStates->proxy->update_config(&diffAllStates->diff.dynamic_config); } - +void +BaseCPU::checkL1DRefill(Addr paddr, const uint8_t* refill_data, size_t size) { + assert(size == 64); + if (system->multiCore()) { + uint8_t *golden_ptr = (uint8_t *)_goldenMemManager->guestToHost(paddr); + if (memcmp(golden_ptr, refill_data, size)) { + panic("Refill data diff with Golden addr %#lx with size %d\n", paddr, size); + } + } +} } // namespace gem5 diff --git a/src/cpu/base.hh b/src/cpu/base.hh index 8c17f900c5..28cae56a33 100644 --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -736,6 +736,7 @@ class BaseCPU : public ClockedObject bool curInstStrictOrdered{false}; gem5::Addr physEffAddr; gem5::Addr effSize; + uint8_t *goldenValue; uint64_t amoOldGoldenValue; // Register address causing difftest error bool errorRegsValue[96];// 32 regs + 32fprs +32 vprs @@ -797,6 +798,8 @@ class BaseCPU : public ClockedObject uint8_t *getGoldenMemPtr() { return goldenMemPtr; } gem5::GoldenGloablMem *goldenMemManager() { return _goldenMemManager; } + + void checkL1DRefill(Addr paddr, const uint8_t *refill_data, size_t size); }; } // namespace gem5 diff --git a/src/cpu/o3/commit.cc b/src/cpu/o3/commit.cc index 6dc1c1ef21..74f12f4fec 100644 --- a/src/cpu/o3/commit.cc +++ b/src/cpu/o3/commit.cc @@ -1335,6 +1335,7 @@ Commit::diffInst(ThreadID tid, const DynInstPtr &inst) { inst->strictlyOrdered(); cpu->diffInfo.physEffAddr = inst->physEffAddr; cpu->diffInfo.effSize = inst->effSize; + cpu->diffInfo.goldenValue = inst->getGolden(); cpu->difftestStep(tid, inst->seqNum); } diff --git a/src/cpu/o3/dyn_inst.hh b/src/cpu/o3/dyn_inst.hh index 587ad468e0..3dae156eb1 100644 --- a/src/cpu/o3/dyn_inst.hh +++ b/src/cpu/o3/dyn_inst.hh @@ -387,6 +387,9 @@ class DynInst : public ExecContext, public RefCounted ssize_t sqIdx = -1; typename LSQUnit::SQIterator sqIt; + /** If load data is from cache then it must be golden */ + uint8_t goldenData[8] = {0}; + int pf_source = -1; // if load cache line is prefetched /////////////////////// TLB Miss ////////////////////// /** @@ -1402,6 +1405,12 @@ class DynInst : public ExecContext, public RefCounted { return pc->as().branching(); } + + /** set golden */ + void setGolden(uint8_t *golden) { memcpy(goldenData, golden, effSize); } + + /** get golden */ + uint8_t *getGolden() { return goldenData; } }; } // namespace o3 diff --git a/src/cpu/o3/lsq.cc b/src/cpu/o3/lsq.cc index 5158dff935..d26f5903b2 100644 --- a/src/cpu/o3/lsq.cc +++ b/src/cpu/o3/lsq.cc @@ -61,6 +61,7 @@ #include "debug/Schedule.hh" #include "debug/StoreBuffer.hh" #include "debug/Writeback.hh" +#include "mem/packet_access.hh" #include "params/BaseO3CPU.hh" namespace gem5 @@ -517,6 +518,11 @@ LSQ::recvFunctionalCustomSignal(PacketPtr pkt, int sig) iewStage->loadCancel(request->instruction()); } +void* +LSQ::getCPUPtr() { + return (void *) cpu; +} + int LSQ::getCount() { @@ -1147,7 +1153,8 @@ LSQ::LSQRequest::LSQRequest( _state(State::NotIssued), _port(*port), _inst(inst), _data(nullptr), _res(nullptr), _addr(0), _size(0), _flags(0), - _numOutstandingPackets(0), _amo_op(nullptr) + _numOutstandingPackets(0), _amo_op(nullptr), + _sbufferBypass(false) { flags.set(Flag::IsLoad, isLoad); if (_inst) { @@ -1172,7 +1179,8 @@ LSQ::LSQRequest::LSQRequest( _flags(flags_), _numOutstandingPackets(0), _amo_op(std::move(amo_op)), - _hasStaleTranslation(stale_translation) + _hasStaleTranslation(stale_translation), + _sbufferBypass(false) { flags.set(Flag::IsLoad, isLoad); if (_inst) { @@ -1235,6 +1243,7 @@ LSQ::LSQRequest::forward() { if (!isLoad() || !needWBToRegister() || forwardPackets.empty()) return; DPRINTF(StoreBuffer, "sbuffer forward data\n"); + _sbufferBypass = true; for (auto& p : forwardPackets) { _inst->memData[p.idx] = p.byte; @@ -1614,6 +1623,12 @@ LSQ::DcachePort::recvFunctionalCustomSignal(PacketPtr pkt, int sig) lsq->recvFunctionalCustomSignal(pkt, sig); } +void* +LSQ::DcachePort::recvGetCPUPtr() +{ + return (void *) (lsq->cpu); +} + void LSQ::DcachePort::recvReqRetry() { diff --git a/src/cpu/o3/lsq.hh b/src/cpu/o3/lsq.hh index 1bfb50d963..fc9a66c04f 100644 --- a/src/cpu/o3/lsq.hh +++ b/src/cpu/o3/lsq.hh @@ -101,6 +101,7 @@ class LSQ virtual bool recvTimingResp(PacketPtr pkt); virtual void recvTimingSnoopReq(PacketPtr pkt); virtual void recvFunctionalCustomSignal(PacketPtr pkt, int sig); + virtual void* recvGetCPUPtr(); virtual void recvFunctionalSnoop(PacketPtr pkt) @@ -260,6 +261,7 @@ class LSQ uint32_t _numOutstandingPackets; AtomicOpFunctorPtr _amo_op; bool _hasStaleTranslation; + bool _sbufferBypass; struct FWDPacket { @@ -917,6 +919,8 @@ class LSQ void recvFunctionalCustomSignal(PacketPtr pkt, int sig); + void* getCPUPtr(); + Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data, unsigned int size, Addr addr, Request::Flags flags, uint64_t *res, AtomicOpFunctorPtr amo_op, diff --git a/src/cpu/o3/lsq_unit.cc b/src/cpu/o3/lsq_unit.cc index 9f93533641..290c5384ad 100644 --- a/src/cpu/o3/lsq_unit.cc +++ b/src/cpu/o3/lsq_unit.cc @@ -52,6 +52,7 @@ #include "cpu/o3/dyn_inst.hh" #include "cpu/o3/limits.hh" #include "cpu/o3/lsq.hh" +#include "cpu/utils.hh" #include "debug/Activity.hh" #include "debug/Diff.hh" #include "debug/HtmCpu.hh" @@ -60,6 +61,7 @@ #include "debug/O3PipeView.hh" #include "debug/StoreBuffer.hh" #include "mem/packet.hh" +#include "mem/packet_access.hh" #include "mem/request.hh" namespace gem5 @@ -243,6 +245,26 @@ LSQUnit::completeDataAccess(PacketPtr pkt) assert(!cpu->switchedOut()); if (!inst->isSquashed()) { + if (inst->isLoad() || inst->isAtomic()) { + Addr addr = pkt->getAddr(); + auto [enable_diff, diff_all_states] = cpu->getDiffAllStates(); + if (system->multiCore() && enable_diff && !request->_sbufferBypass && + cpu->goldenMemManager()->inPmem(addr)) { + // check data with golden mem + uint8_t *golden_data = (uint8_t *)cpu->goldenMemManager()->guestToHost(addr); + uint8_t *loaded_data = pkt->getPtr(); + size_t size = pkt->getSize(); + if (memcmp(golden_data, loaded_data, size) == 0) { + assert(size == inst->effSize); + inst->setGolden(golden_data); + } else { + panic("Data error at addr %#lx, size %d. %s\n", + addr, size, + goldenDiffStr(loaded_data, golden_data, size).c_str()); + } + } + } + if (request->needWBToRegister()) { // Only loads, store conditionals and atomics perform the writeback // after receving the response from the memory @@ -306,6 +328,8 @@ LSQUnit::init(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams ¶ms, DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",lsqID); + system = params.system; + depCheckShift = params.LSQDepCheckShift; checkLoads = params.LSQCheckLoads; needsTSO = params.needsTSO; diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index 49fdaf236f..090fbe0b44 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -556,6 +556,8 @@ class LSQUnit BaseMMU *getMMUPtr(); private: + System *system; + /** Pointer to the CPU. */ CPU *cpu; diff --git a/src/cpu/utils.hh b/src/cpu/utils.hh index 2ca2508e73..030876e282 100644 --- a/src/cpu/utils.hh +++ b/src/cpu/utils.hh @@ -94,6 +94,18 @@ isAnyActiveElement(const std::vector::const_iterator& it_start, return (it_tmp != it_end); } +inline std::string +goldenDiffStr(uint8_t *dut_ptr, uint8_t* golden_ptr, size_t size) { + assert(size <= 8); + uint64_t dut_value = 0; + uint64_t golden_value = 0; + memcpy(&dut_value, dut_ptr, size); + memcpy(&golden_value, golden_ptr, size); + std::stringstream ss; + ss << std::hex << "Dut value: " << dut_value << " , golden value: " << golden_value << " "; + return ss.str(); +} + } // namespace gem5 #endif // __CPU_UTILS_HH__ diff --git a/src/mem/port.hh b/src/mem/port.hh index 65ccaf4384..44ab6617a7 100644 --- a/src/mem/port.hh +++ b/src/mem/port.hh @@ -325,6 +325,16 @@ class ResponsePort : public Port, public AtomicResponseProtocol, } } + void* sendGetCPUPtr() + { + try { + warn("Peer is %s\n", _requestPort->name()); + return FunctionalResponseProtocol::sendGetCPUPtr(_requestPort); + } catch (UnboundPortException) { + reportUnbound(); + } + } + public: /* The atomic protocol. */ diff --git a/src/mem/protocol/functional.cc b/src/mem/protocol/functional.cc index 1d01926081..44bbccda2a 100644 --- a/src/mem/protocol/functional.cc +++ b/src/mem/protocol/functional.cc @@ -59,6 +59,12 @@ void FunctionalRequestProtocol::sendFunctionalCustomSignal( return peer->recvFunctionalCustomSignal(pkt, sig); } +void* FunctionalRequestProtocol::recvGetCPUPtr() +{ + panic("recvGetCPUPtr not implemented\n"); + return nullptr; +} + /* The response protocol. */ void @@ -75,4 +81,9 @@ void FunctionalResponseProtocol::sendFunctionalCustomSignal( return peer->recvFunctionalCustomSignal(pkt, sig); } +void* FunctionalResponseProtocol::sendGetCPUPtr(FunctionalRequestProtocol *peer) const +{ + return peer->recvGetCPUPtr(); +} + } // namespace gem5 diff --git a/src/mem/protocol/functional.hh b/src/mem/protocol/functional.hh index 439fb448c8..7bfbd2895b 100644 --- a/src/mem/protocol/functional.hh +++ b/src/mem/protocol/functional.hh @@ -73,6 +73,8 @@ class FunctionalRequestProtocol * Receive a functional custom signals */ virtual void recvFunctionalCustomSignal(PacketPtr pkt, int sig) {}; + + virtual void* recvGetCPUPtr(); }; class FunctionalResponseProtocol @@ -96,6 +98,8 @@ class FunctionalResponseProtocol void sendFunctionalCustomSignal(FunctionalRequestProtocol *peer, PacketPtr pkt, int sig) const; + void* sendGetCPUPtr(FunctionalRequestProtocol *peer) const; + /** * Receive a functional custom signals */ diff --git a/src/mem/ruby/protocol/RubySlicc_Types.sm b/src/mem/ruby/protocol/RubySlicc_Types.sm index 881b8b8b76..c51316a76d 100644 --- a/src/mem/ruby/protocol/RubySlicc_Types.sm +++ b/src/mem/ruby/protocol/RubySlicc_Types.sm @@ -123,6 +123,7 @@ structure (Sequencer, external = "yes") { Cycles, Cycles, Cycles); void notifyMissCallback(Addr, bool, bool); + void checkL1DRefill(Addr, DataBlock, WriteMask); void TBEFullCancel(Addr); void writeCallback(Addr, DataBlock); diff --git a/src/mem/ruby/protocol/chi/CHI-cache-actions.sm b/src/mem/ruby/protocol/chi/CHI-cache-actions.sm index cb55052602..9c6df898af 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache-actions.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache-actions.sm @@ -2157,6 +2157,9 @@ action(Receive_ReqDataResp, desc="") { tbe.dataBlkValid.clear(); } tbe.dataBlk.copyPartial(in_msg.dataBlk, in_msg.bitMask); + if (is_dcache && enable_difftest) { + sequencer.checkL1DRefill(tbe.addr, in_msg.dataBlk, in_msg.bitMask); + } assert(tbe.dataBlkValid.isOverlap(in_msg.bitMask) == false); tbe.dataBlkValid.orMask(in_msg.bitMask); } diff --git a/src/mem/ruby/protocol/chi/CHI-cache.sm b/src/mem/ruby/protocol/chi/CHI-cache.sm index a10ca4e5b5..44c71f8a0a 100644 --- a/src/mem/ruby/protocol/chi/CHI-cache.sm +++ b/src/mem/ruby/protocol/chi/CHI-cache.sm @@ -179,7 +179,7 @@ machine(MachineType:Cache, "Cache coherency protocol") : bool use_prefetcher, default="false"; bool is_dcache := "False"; - + bool enable_difftest := "False"; // Message Queues // Interface to the network diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc index 0b6c458168..0e086b3208 100644 --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -46,6 +46,7 @@ #include "debug/Config.hh" #include "debug/Drain.hh" #include "debug/Ruby.hh" +#include "mem/packet_access.hh" #include "mem/ruby/protocol/AccessPermission.hh" #include "mem/ruby/slicc_interface/AbstractController.hh" #include "mem/simple_mem.hh" @@ -69,7 +70,9 @@ RubyPort::RubyPort(const Params &p) p.ruby_system->getAccessBackingStore(), -1, p.no_retry_on_stall), gotAddrRanges(p.port_interrupt_out_port_connection_count), - m_isCPUSequencer(p.is_cpu_sequencer) + m_isCPUSequencer(p.is_cpu_sequencer), + m_isDataSequencer(p.is_data_sequencer), + cpu(nullptr) { assert(m_version != -1); @@ -285,6 +288,12 @@ RubyPort::MemResponsePort::recvTimingReq(PacketPtr pkt) } } + // set cpu here + // when receiving a request, set sequencer(rubyport)'s cpu + if (ruby_port->m_isDataSequencer && !ruby_port->cpu) { + ruby_port->cpu = (BaseCPU *)(this->sendGetCPUPtr()); + } + // Save the port in the sender state object to be used later to // route the response pkt->pushSenderState(new SenderState(this)); diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh index 308f9e9393..1af553e2a4 100644 --- a/src/mem/ruby/system/RubyPort.hh +++ b/src/mem/ruby/system/RubyPort.hh @@ -45,6 +45,7 @@ #include #include +#include "cpu/base.hh" #include "mem/ruby/common/MachineID.hh" #include "mem/ruby/network/MessageBuffer.hh" #include "mem/ruby/protocol/RequestStatus.hh" @@ -234,6 +235,9 @@ class RubyPort : public ClockedObject std::vector retryList; bool m_isCPUSequencer; + public: + bool m_isDataSequencer; + BaseCPU* cpu; }; } // namespace ruby diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc index dffb5bb34d..8f6213b70c 100644 --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -45,6 +45,7 @@ #include "base/compiler.hh" #include "base/logging.hh" #include "base/str.hh" +#include "cpu/base.hh" #include "cpu/testers/rubytest/RubyTester.hh" #include "debug/LLSC.hh" #include "debug/MemoryAccess.hh" @@ -78,7 +79,8 @@ Sequencer::Sequencer(const Params &p) : RubyPort(p), stat(this), m_IncompleteTimes(MachineType_NUM), - deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check") + deadlockCheckEvent([this]{ wakeup(); }, "Sequencer deadlock check"), + isDataSequencer(p.is_data_sequencer) { m_outstanding_count = 0; @@ -660,6 +662,25 @@ Sequencer::notifyMissCallback(Addr address, bool is_upgrade, bool is_busy) return; } + +void +Sequencer::checkL1DRefill(Addr address, const DataBlock& data, WriteMask mask) { + if (!m_isDataSequencer) { + return; + } + + if (!mask.isFull()) { + panic("Currently partial L1D refills are not supported"); + } + + assert(address == makeLineAddress(address)); + DPRINTF(RubySequencer, "Checking dut refill with golden for addr %#x\n", address); + + size_t block_size = RubySystem::getBlockSizeBytes(); + + cpu->checkL1DRefill(address, data.getData(getOffset(address), block_size), block_size); +} + void Sequencer::TBEFullCancel(Addr address) { diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh index 0ed88d247d..71a93b6c8b 100644 --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -129,6 +129,8 @@ class Sequencer : public RubyPort void notifyMissCallback(Addr address, bool is_upgrade, bool is_snoop); + void checkL1DRefill(Addr address, const DataBlock& data, WriteMask mask); + void TBEFullCancel(Addr address); void atomicCallback(Addr address, @@ -322,6 +324,8 @@ class Sequencer : public RubyPort EventFunctionWrapper deadlockCheckEvent; + bool isDataSequencer; + // support for LL/SC /** diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py index e3a13c87f8..b00214086a 100644 --- a/src/mem/ruby/system/Sequencer.py +++ b/src/mem/ruby/system/Sequencer.py @@ -88,6 +88,7 @@ class RubyPort(ClockedObject): support_data_reqs = Param.Bool(True, "data cache requests supported") support_inst_reqs = Param.Bool(True, "inst cache requests supported") is_cpu_sequencer = Param.Bool(True, "connected to a cpu") + is_data_sequencer = Param.Bool(False, "Is this sequencer for the dcache?") class RubyPortProxy(RubyPort):