Skip to content

Commit

Permalink
cpu-o3, mem-ruby: Enhance goldenmem
Browse files Browse the repository at this point in the history
Previously golden mem is checked only when inst commits, if golden value
changes between cache access and commit, difftest will throw an error.
Two changes are made:
1. Check L1D refill cache block with golden mem
2. When load accesses DCache, check with golden mem, and save golden
   value to DynInst. When it commits, and value doesn't match NEMU val,
   then compare with inst-local golden value

Change-Id: I7bfee53a622d7f0513b457c531df49c1c5e32035
  • Loading branch information
notlqr committed Sep 10, 2024
1 parent 35cd1dc commit 4da2eda
Show file tree
Hide file tree
Showing 21 changed files with 158 additions and 10 deletions.
7 changes: 4 additions & 3 deletions configs/ruby/CHI_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ class CHI_L1Controller(CHI_Cache_Controller):
Default parameters for a L1 Cache controller
"""

def __init__(self, ruby_system, sequencer, cache, prefetcher, is_dcache=False):
def __init__(self, ruby_system, sequencer, cache, prefetcher, is_dcache=False, enable_difftest=False):
super().__init__(ruby_system)
self.sequencer = sequencer
self.cache = cache
Expand All @@ -256,6 +256,7 @@ def __init__(self, ruby_system, sequencer, cache, prefetcher, is_dcache=False):
self.dealloc_backinv_unique = False
self.dealloc_backinv_shared = False
self.is_dcache = is_dcache
self.enable_difftest = enable_difftest
# Some reasonable default TBE params
self.number_of_TBEs = 32+8
self.number_of_repl_TBEs = 16
Expand Down Expand Up @@ -501,7 +502,7 @@ def __init__(
version=Versions.getSeqId(), ruby_system=ruby_system
)
cpu.data_sequencer = RubySequencer(
version=Versions.getSeqId(), ruby_system=ruby_system
version=Versions.getSeqId(), ruby_system=ruby_system, is_data_sequencer=True
)

self._seqs.append(
Expand Down Expand Up @@ -531,7 +532,7 @@ def __init__(
)

cpu.l1d = CHI_L1Controller(
ruby_system, cpu.data_sequencer, l1d_cache, l1d_pf, is_dcache=True
ruby_system, cpu.data_sequencer, l1d_cache, l1d_pf, is_dcache=True, enable_difftest = True
)

cpu.inst_sequencer.dcache = NULL
Expand Down
13 changes: 11 additions & 2 deletions src/cpu/base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1149,7 +1149,7 @@ BaseCPU::diffWithNEMU(ThreadID tid, InstSeqNum seq)
_goldenMemManager->inPmem(diffInfo.physEffAddr)) {
DPRINTF(Diff, "Difference on %s instr found in multicore mode, check in golden memory\n",
diffInfo.inst->isLoad() ? "load" : "amo");
uint8_t *golden_ptr = (uint8_t *)_goldenMemManager->guestToHost(diffInfo.physEffAddr);
uint8_t *golden_ptr = diffInfo.goldenValue;

// a lambda function to sync memory and register from golden results to ref
auto sync_mem_reg = [&]() {
Expand Down Expand Up @@ -1478,5 +1478,14 @@ BaseCPU::setExceptionGuideExecInfo(uint64_t exception_num, uint64_t mtval,
// diffAllStates->proxy->update_config(&diffAllStates->diff.dynamic_config);
}


void
BaseCPU::checkL1DRefill(Addr paddr, const uint8_t* refill_data, size_t size) {
assert(size == 64);
if (system->multiCore()) {
uint8_t *golden_ptr = (uint8_t *)_goldenMemManager->guestToHost(paddr);
if (memcmp(golden_ptr, refill_data, size)) {
panic("Refill data diff with Golden addr %#lx with size %d\n", paddr, size);
}
}
}
} // namespace gem5
3 changes: 3 additions & 0 deletions src/cpu/base.hh
Original file line number Diff line number Diff line change
Expand Up @@ -736,6 +736,7 @@ class BaseCPU : public ClockedObject
bool curInstStrictOrdered{false};
gem5::Addr physEffAddr;
gem5::Addr effSize;
uint8_t *goldenValue;
uint64_t amoOldGoldenValue;
// Register address causing difftest error
bool errorRegsValue[96];// 32 regs + 32fprs +32 vprs
Expand Down Expand Up @@ -797,6 +798,8 @@ class BaseCPU : public ClockedObject
uint8_t *getGoldenMemPtr() { return goldenMemPtr; }

gem5::GoldenGloablMem *goldenMemManager() { return _goldenMemManager; }

void checkL1DRefill(Addr paddr, const uint8_t *refill_data, size_t size);
};

} // namespace gem5
Expand Down
1 change: 1 addition & 0 deletions src/cpu/o3/commit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1335,6 +1335,7 @@ Commit::diffInst(ThreadID tid, const DynInstPtr &inst) {
inst->strictlyOrdered();
cpu->diffInfo.physEffAddr = inst->physEffAddr;
cpu->diffInfo.effSize = inst->effSize;
cpu->diffInfo.goldenValue = inst->getGolden();
cpu->difftestStep(tid, inst->seqNum);
}

Expand Down
9 changes: 9 additions & 0 deletions src/cpu/o3/dyn_inst.hh
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,9 @@ class DynInst : public ExecContext, public RefCounted
ssize_t sqIdx = -1;
typename LSQUnit::SQIterator sqIt;

/** If load data is from cache then it must be golden */
uint8_t goldenData[8] = {0};

int pf_source = -1; // if load cache line is prefetched
/////////////////////// TLB Miss //////////////////////
/**
Expand Down Expand Up @@ -1402,6 +1405,12 @@ class DynInst : public ExecContext, public RefCounted
{
return pc->as<RiscvISA::PCState>().branching();
}

/** set golden */
void setGolden(uint8_t *golden) { memcpy(goldenData, golden, effSize); }

/** get golden */
uint8_t *getGolden() { return goldenData; }
};

} // namespace o3
Expand Down
19 changes: 17 additions & 2 deletions src/cpu/o3/lsq.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
#include "debug/Schedule.hh"
#include "debug/StoreBuffer.hh"
#include "debug/Writeback.hh"
#include "mem/packet_access.hh"
#include "params/BaseO3CPU.hh"

namespace gem5
Expand Down Expand Up @@ -517,6 +518,11 @@ LSQ::recvFunctionalCustomSignal(PacketPtr pkt, int sig)
iewStage->loadCancel(request->instruction());
}

void*
LSQ::getCPUPtr() {
return (void *) cpu;
}

int
LSQ::getCount()
{
Expand Down Expand Up @@ -1147,7 +1153,8 @@ LSQ::LSQRequest::LSQRequest(
_state(State::NotIssued),
_port(*port), _inst(inst), _data(nullptr),
_res(nullptr), _addr(0), _size(0), _flags(0),
_numOutstandingPackets(0), _amo_op(nullptr)
_numOutstandingPackets(0), _amo_op(nullptr),
_sbufferBypass(false)
{
flags.set(Flag::IsLoad, isLoad);
if (_inst) {
Expand All @@ -1172,7 +1179,8 @@ LSQ::LSQRequest::LSQRequest(
_flags(flags_),
_numOutstandingPackets(0),
_amo_op(std::move(amo_op)),
_hasStaleTranslation(stale_translation)
_hasStaleTranslation(stale_translation),
_sbufferBypass(false)
{
flags.set(Flag::IsLoad, isLoad);
if (_inst) {
Expand Down Expand Up @@ -1235,6 +1243,7 @@ LSQ::LSQRequest::forward()
{
if (!isLoad() || !needWBToRegister() || forwardPackets.empty()) return;
DPRINTF(StoreBuffer, "sbuffer forward data\n");
_sbufferBypass = true;
for (auto& p : forwardPackets)
{
_inst->memData[p.idx] = p.byte;
Expand Down Expand Up @@ -1614,6 +1623,12 @@ LSQ::DcachePort::recvFunctionalCustomSignal(PacketPtr pkt, int sig)
lsq->recvFunctionalCustomSignal(pkt, sig);
}

void*
LSQ::DcachePort::recvGetCPUPtr()
{
return (void *) (lsq->cpu);
}

void
LSQ::DcachePort::recvReqRetry()
{
Expand Down
4 changes: 4 additions & 0 deletions src/cpu/o3/lsq.hh
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ class LSQ
virtual bool recvTimingResp(PacketPtr pkt);
virtual void recvTimingSnoopReq(PacketPtr pkt);
virtual void recvFunctionalCustomSignal(PacketPtr pkt, int sig);
virtual void* recvGetCPUPtr();

virtual void
recvFunctionalSnoop(PacketPtr pkt)
Expand Down Expand Up @@ -260,6 +261,7 @@ class LSQ
uint32_t _numOutstandingPackets;
AtomicOpFunctorPtr _amo_op;
bool _hasStaleTranslation;
bool _sbufferBypass;

struct FWDPacket
{
Expand Down Expand Up @@ -917,6 +919,8 @@ class LSQ

void recvFunctionalCustomSignal(PacketPtr pkt, int sig);

void* getCPUPtr();

Fault pushRequest(const DynInstPtr& inst, bool isLoad, uint8_t *data,
unsigned int size, Addr addr, Request::Flags flags,
uint64_t *res, AtomicOpFunctorPtr amo_op,
Expand Down
24 changes: 24 additions & 0 deletions src/cpu/o3/lsq_unit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
#include "cpu/o3/dyn_inst.hh"
#include "cpu/o3/limits.hh"
#include "cpu/o3/lsq.hh"
#include "cpu/utils.hh"
#include "debug/Activity.hh"
#include "debug/Diff.hh"
#include "debug/HtmCpu.hh"
Expand All @@ -60,6 +61,7 @@
#include "debug/O3PipeView.hh"
#include "debug/StoreBuffer.hh"
#include "mem/packet.hh"
#include "mem/packet_access.hh"
#include "mem/request.hh"

namespace gem5
Expand Down Expand Up @@ -243,6 +245,26 @@ LSQUnit::completeDataAccess(PacketPtr pkt)

assert(!cpu->switchedOut());
if (!inst->isSquashed()) {
if (inst->isLoad() || inst->isAtomic()) {
Addr addr = pkt->getAddr();
auto [enable_diff, diff_all_states] = cpu->getDiffAllStates();
if (system->multiCore() && enable_diff && !request->_sbufferBypass &&
cpu->goldenMemManager()->inPmem(addr)) {
// check data with golden mem
uint8_t *golden_data = (uint8_t *)cpu->goldenMemManager()->guestToHost(addr);
uint8_t *loaded_data = pkt->getPtr<uint8_t>();
size_t size = pkt->getSize();
if (memcmp(golden_data, loaded_data, size) == 0) {
assert(size == inst->effSize);
inst->setGolden(golden_data);
} else {
panic("Data error at addr %#lx, size %d. %s\n",
addr, size,
goldenDiffStr(loaded_data, golden_data, size).c_str());
}
}
}

if (request->needWBToRegister()) {
// Only loads, store conditionals and atomics perform the writeback
// after receving the response from the memory
Expand Down Expand Up @@ -306,6 +328,8 @@ LSQUnit::init(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params,

DPRINTF(LSQUnit, "Creating LSQUnit%i object.\n",lsqID);

system = params.system;

depCheckShift = params.LSQDepCheckShift;
checkLoads = params.LSQCheckLoads;
needsTSO = params.needsTSO;
Expand Down
2 changes: 2 additions & 0 deletions src/cpu/o3/lsq_unit.hh
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,8 @@ class LSQUnit
BaseMMU *getMMUPtr();

private:
System *system;

/** Pointer to the CPU. */
CPU *cpu;

Expand Down
12 changes: 12 additions & 0 deletions src/cpu/utils.hh
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,18 @@ isAnyActiveElement(const std::vector<bool>::const_iterator& it_start,
return (it_tmp != it_end);
}

inline std::string
goldenDiffStr(uint8_t *dut_ptr, uint8_t* golden_ptr, size_t size) {
assert(size <= 8);
uint64_t dut_value = 0;
uint64_t golden_value = 0;
memcpy(&dut_value, dut_ptr, size);
memcpy(&golden_value, golden_ptr, size);
std::stringstream ss;
ss << std::hex << "Dut value: " << dut_value << " , golden value: " << golden_value << " ";
return ss.str();
}

} // namespace gem5

#endif // __CPU_UTILS_HH__
10 changes: 10 additions & 0 deletions src/mem/port.hh
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,16 @@ class ResponsePort : public Port, public AtomicResponseProtocol,
}
}

void* sendGetCPUPtr()
{
try {
warn("Peer is %s\n", _requestPort->name());
return FunctionalResponseProtocol::sendGetCPUPtr(_requestPort);
} catch (UnboundPortException) {
reportUnbound();
}
}

public:
/* The atomic protocol. */

Expand Down
11 changes: 11 additions & 0 deletions src/mem/protocol/functional.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,12 @@ void FunctionalRequestProtocol::sendFunctionalCustomSignal(
return peer->recvFunctionalCustomSignal(pkt, sig);
}

void* FunctionalRequestProtocol::recvGetCPUPtr()
{
panic("recvGetCPUPtr not implemented\n");
return nullptr;
}

/* The response protocol. */

void
Expand All @@ -75,4 +81,9 @@ void FunctionalResponseProtocol::sendFunctionalCustomSignal(
return peer->recvFunctionalCustomSignal(pkt, sig);
}

void* FunctionalResponseProtocol::sendGetCPUPtr(FunctionalRequestProtocol *peer) const
{
return peer->recvGetCPUPtr();
}

} // namespace gem5
4 changes: 4 additions & 0 deletions src/mem/protocol/functional.hh
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ class FunctionalRequestProtocol
* Receive a functional custom signals
*/
virtual void recvFunctionalCustomSignal(PacketPtr pkt, int sig) {};

virtual void* recvGetCPUPtr();
};

class FunctionalResponseProtocol
Expand All @@ -96,6 +98,8 @@ class FunctionalResponseProtocol

void sendFunctionalCustomSignal(FunctionalRequestProtocol *peer, PacketPtr pkt, int sig) const;

void* sendGetCPUPtr(FunctionalRequestProtocol *peer) const;

/**
* Receive a functional custom signals
*/
Expand Down
1 change: 1 addition & 0 deletions src/mem/ruby/protocol/RubySlicc_Types.sm
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ structure (Sequencer, external = "yes") {
Cycles, Cycles, Cycles);

void notifyMissCallback(Addr, bool, bool);
void checkL1DRefill(Addr, DataBlock, WriteMask);
void TBEFullCancel(Addr);

void writeCallback(Addr, DataBlock);
Expand Down
3 changes: 3 additions & 0 deletions src/mem/ruby/protocol/chi/CHI-cache-actions.sm
Original file line number Diff line number Diff line change
Expand Up @@ -2157,6 +2157,9 @@ action(Receive_ReqDataResp, desc="") {
tbe.dataBlkValid.clear();
}
tbe.dataBlk.copyPartial(in_msg.dataBlk, in_msg.bitMask);
if (is_dcache && enable_difftest) {
sequencer.checkL1DRefill(tbe.addr, in_msg.dataBlk, in_msg.bitMask);
}
assert(tbe.dataBlkValid.isOverlap(in_msg.bitMask) == false);
tbe.dataBlkValid.orMask(in_msg.bitMask);
}
Expand Down
2 changes: 1 addition & 1 deletion src/mem/ruby/protocol/chi/CHI-cache.sm
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ machine(MachineType:Cache, "Cache coherency protocol") :
bool use_prefetcher, default="false";

bool is_dcache := "False";

bool enable_difftest := "False";
// Message Queues

// Interface to the network
Expand Down
11 changes: 10 additions & 1 deletion src/mem/ruby/system/RubyPort.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
#include "debug/Config.hh"
#include "debug/Drain.hh"
#include "debug/Ruby.hh"
#include "mem/packet_access.hh"
#include "mem/ruby/protocol/AccessPermission.hh"
#include "mem/ruby/slicc_interface/AbstractController.hh"
#include "mem/simple_mem.hh"
Expand All @@ -69,7 +70,9 @@ RubyPort::RubyPort(const Params &p)
p.ruby_system->getAccessBackingStore(), -1,
p.no_retry_on_stall),
gotAddrRanges(p.port_interrupt_out_port_connection_count),
m_isCPUSequencer(p.is_cpu_sequencer)
m_isCPUSequencer(p.is_cpu_sequencer),
m_isDataSequencer(p.is_data_sequencer),
cpu(nullptr)
{
assert(m_version != -1);

Expand Down Expand Up @@ -285,6 +288,12 @@ RubyPort::MemResponsePort::recvTimingReq(PacketPtr pkt)
}
}

// set cpu here
// when receiving a request, set sequencer(rubyport)'s cpu
if (ruby_port->m_isDataSequencer && !ruby_port->cpu) {
ruby_port->cpu = (BaseCPU *)(this->sendGetCPUPtr());
}

// Save the port in the sender state object to be used later to
// route the response
pkt->pushSenderState(new SenderState(this));
Expand Down
Loading

0 comments on commit 4da2eda

Please sign in to comment.