From 2221b7b2998fadb2974156e9f1f1e3b529d3ba91 Mon Sep 17 00:00:00 2001 From: tastynoob <934348725@qq.com> Date: Mon, 14 Oct 2024 14:58:33 +0800 Subject: [PATCH] cpu-o3: clean and foramt code --- src/arch/riscv/RiscvCPU.py | 6 +- src/cpu/o3/FUPool.py | 123 ++++++++++++++++++++-------------- src/cpu/o3/FuncUnitConfig.py | 26 +++++--- src/cpu/o3/inst_queue.cc | 8 +++ src/cpu/o3/issue_queue.cc | 125 ++++++++++++++++------------------- src/cpu/o3/issue_queue.hh | 14 ++-- src/cpu/o3/lsq_unit.cc | 15 +++-- src/cpu/o3/lsq_unit.hh | 6 +- 8 files changed, 176 insertions(+), 147 deletions(-) diff --git a/src/arch/riscv/RiscvCPU.py b/src/arch/riscv/RiscvCPU.py index 1a03a0a805..fa2e66c65d 100644 --- a/src/arch/riscv/RiscvCPU.py +++ b/src/arch/riscv/RiscvCPU.py @@ -56,13 +56,12 @@ class RiscvMinorCPU(BaseMinorCPU, RiscvCPU): mmu = RiscvMMU() class XiangshanCore(RiscvO3CPU): - pass + scheduler = KunminghuScheduler() class XiangshanECore(XiangshanCore): fetchWidth = 8 decodeWidth = 4 renameWidth = 4 - wbWidth = 6 numROBEntries = 150 LQEntries = 48 @@ -73,12 +72,12 @@ class XiangshanECore(XiangshanCore): numPhysVecPredRegs = 36 numPhysCCRegs = 0 numPhysRMiscRegs = 40 + scheduler = ECoreScheduler() class XiangshanECore2Read(XiangshanCore): fetchWidth = 8 decodeWidth = 4 renameWidth = 4 - wbWidth = 6 numROBEntries = 150 LQEntries = 48 @@ -89,3 +88,4 @@ class XiangshanECore2Read(XiangshanCore): numPhysVecPredRegs = 36 numPhysCCRegs = 0 numPhysRMiscRegs = 40 + scheduler = ECore2ReadScheduler() diff --git a/src/cpu/o3/FUPool.py b/src/cpu/o3/FUPool.py index cce5e29a29..3b5c069b5c 100644 --- a/src/cpu/o3/FUPool.py +++ b/src/cpu/o3/FUPool.py @@ -85,56 +85,79 @@ class Scheduler(SimObject): specWakeupNetwork = VectorParam.SpecWakeupChannel([], "") xbarWakeup = Param.Bool(False, "use xbar wakeup network, (will override specWakeupNetwork)") -# class DefaultScheduler(Scheduler): -# IQs = [ -# IssueQue(name='intIQ0' , inports=2, size=2*12, oports=[ -# IssuePort(fu=[IntBRU(), IntMisc()]), -# IssuePort(fu=[IntBRU(), IntMisc()]) -# ]), -# IssueQue(name='intIQ1' , inports=2, size=2*12, oports=[ -# IssuePort(fu=[IntALU(), IntMult()]), -# IssuePort(fu=[IntALU(), IntMult()]), -# ]), -# IssueQue(name='intIQ2' , inports=2, size=2*12, oports=[ -# IssuePort(fu=[IntALU(), IntDiv()]), -# IssuePort(fu=[IntALU(), IntDiv()]), -# ]), -# IssueQue(name='memIQ0' , inports=6, size=3*16, oports=[ -# IssuePort(fu=[ReadPort()]), -# IssuePort(fu=[ReadPort()]), -# IssuePort(fu=[ReadPort()]), -# ]), -# IssueQue(name='memIQ1' , inports=4, size=2*16, oports=[ -# IssuePort(fu=[WritePort()]), -# IssuePort(fu=[WritePort()]) -# ]), -# IssueQue(name='fpIQ0' , inports=2, size=18, oports=[ -# IssuePort(fu=[FP_ALU(), FP_MISC(), FP_MAC()]) -# ], scheduleToExecDelay=3), -# IssueQue(name='fpIQ1' , inports=2, size=18, oports=[ -# IssuePort(fu=[FP_ALU(), FP_MAC()]) -# ], scheduleToExecDelay=3), -# IssueQue(name='fpIQ2' , inports=2, size=18, oports=[ -# IssuePort(fu=[FP_ALU(), FP_MAC()]) -# ], scheduleToExecDelay=3), -# IssueQue(name='fpIQ3' , inports=2, size=18, oports=[ -# IssuePort(fu=[FP_ALU(), FP_MAC()]) -# ], scheduleToExecDelay=3), -# IssueQue(name='fpIQ4' , inports=2, size=18, oports=[ -# IssuePort(fu=[FP_SLOW()]), -# IssuePort(fu=[FP_SLOW()]) -# ], scheduleToExecDelay=3), -# IssueQue(name='vecIQ0' , inports=5, size=16+16+10, oports=[ -# IssuePort(fu=[SIMD_Unit()]), -# IssuePort(fu=[SIMD_Unit()]), -# IssuePort(fu=[SIMD_Unit()]), -# IssuePort(fu=[SIMD_Unit()]), -# IssuePort(fu=[SIMD_Unit()]) -# ], scheduleToExecDelay=3), -# ] -# intSlotNum = 12 -# fpSlotNum = 12 -# xbarWakeup = True +class ECoreScheduler(Scheduler): + IQs = [ + IssueQue(name='intIQ0' , inports=2, size=2*12, oports=[ + IssuePort(fu=[IntALU(), IntBRU()]), + IssuePort(fu=[IntALU(), IntBRU()]) + ]), + IssueQue(name='intIQ1' , inports=2, size=2*12, oports=[ + IssuePort(fu=[IntALU(), IntBRU()]), + IssuePort(fu=[IntALU(), IntBRU()]) + ]), + IssueQue(name='intIQ2' , inports=2, size=2*12, oports=[ + IssuePort(fu=[IntMult(), IntDiv(), IntMisc()]) + ]), + IssueQue(name='memIQ0' , inports=2, size=2*16, oports=[ + IssuePort(fu=[ReadPort()]) + ]), + IssueQue(name='memIQ1' , inports=2, size=2*16, oports=[ + IssuePort(fu=[RdWrPort()]) + ]), + IssueQue(name='fpIQ0' , inports=2, size=18, oports=[ + IssuePort(fu=[FP_ALU(), FP_MAC()]), + IssuePort(fu=[FP_ALU(), FP_MAC()]) + ], scheduleToExecDelay=3), + IssueQue(name='fpIQ1' , inports=2, size=18, oports=[ + IssuePort(fu=[FP_MISC(), FP_SLOW()]) + ], scheduleToExecDelay=3), + IssueQue(name='vecIQ0' , inports=2, size=16, oports=[ + IssuePort(fu=[SIMD_Unit()]), + IssuePort(fu=[SIMD_Unit()]) + ], scheduleToExecDelay=3), + ] + intSlotNum = 12 + fpSlotNum = 12 + xbarWakeup = True + +class ECore2ReadScheduler(Scheduler): + IQs = [ + IssueQue(name='intIQ0' , inports=2, size=2*12, oports=[ + IssuePort(fu=[IntALU(), IntBRU()]), + IssuePort(fu=[IntALU(), IntBRU()]) + ]), + IssueQue(name='intIQ1' , inports=2, size=2*12, oports=[ + IssuePort(fu=[IntALU(), IntBRU()]), + IssuePort(fu=[IntALU(), IntBRU()]) + ]), + IssueQue(name='intIQ2' , inports=2, size=2*12, oports=[ + IssuePort(fu=[IntMult(), IntDiv(), IntMisc()]) + ]), + IssueQue(name='memIQ0' , inports=2, size=2*16, oports=[ + IssuePort(fu=[ReadPort()]), + IssuePort(fu=[ReadPort()]) + ]), + IssueQue(name='memIQ1' , inports=2, size=2*16, oports=[ + IssuePort(fu=[WritePort()]) + ]), + IssueQue(name='fpIQ0' , inports=2, size=18, oports=[ + IssuePort(fu=[FP_ALU(), FP_MAC()]), + IssuePort(fu=[FP_ALU(), FP_MAC()]) + ], scheduleToExecDelay=3), + IssueQue(name='fpIQ1' , inports=2, size=18, oports=[ + IssuePort(fu=[FP_MISC()]) + ], scheduleToExecDelay=3), + IssueQue(name='fpIQ4' , inports=2, size=18, oports=[ + IssuePort(fu=[FP_SLOW()]) + ], scheduleToExecDelay=3), + IssueQue(name='vecIQ0' , inports=2, size=16, oports=[ + IssuePort(fu=[SIMD_Unit()]), + IssuePort(fu=[SIMD_Unit()]) + ], scheduleToExecDelay=3), + ] + intSlotNum = 12 + fpSlotNum = 12 + xbarWakeup = True class KunminghuScheduler(Scheduler): diff --git a/src/cpu/o3/FuncUnitConfig.py b/src/cpu/o3/FuncUnitConfig.py index 626daec1fe..d3512513bd 100644 --- a/src/cpu/o3/FuncUnitConfig.py +++ b/src/cpu/o3/FuncUnitConfig.py @@ -159,21 +159,25 @@ class WritePort(FUDesc): class RdWrPort(FUDesc): opList = [ OpDesc(opClass='MemRead', opLat=2), - OpDesc(opClass='MemWrite', opLat=4), - OpDesc(opClass='FloatMemRead'), - OpDesc(opClass='FloatMemWrite'), - OpDesc(opClass='VectorUnitStrideLoad', opLat=2), - OpDesc(opClass='VectorUnitStrideMaskLoad', opLat=2), - OpDesc(opClass='VectorStridedLoad', opLat=2), - OpDesc(opClass='VectorIndexedLoad', opLat=2), - OpDesc(opClass='VectorUnitStrideFaultOnlyFirstLoad', opLat=2), - OpDesc(opClass='VectorWholeRegisterLoad', opLat=2), + OpDesc(opClass='MemWrite', opLat=2), + OpDesc(opClass='FloatMemRead', opLat=2), + OpDesc(opClass='FloatMemWrite', opLat=3), + OpDesc(opClass='VectorUnitStrideLoad', opLat=3), + OpDesc(opClass='VectorSegUnitStrideLoad', opLat=3), + OpDesc(opClass='VectorUnitStrideMaskLoad', opLat=3), + OpDesc(opClass='VectorSegUnitStrideMaskLoad', opLat=3), + OpDesc(opClass='VectorStridedLoad', opLat=3), + OpDesc(opClass='VectorSegStridedLoad', opLat=3), + OpDesc(opClass='VectorIndexedLoad', opLat=3), + OpDesc(opClass='VectorSegIndexedLoad', opLat=3), + OpDesc(opClass='VectorUnitStrideFaultOnlyFirstLoad', opLat=3), + OpDesc(opClass='VectorWholeRegisterLoad', opLat=3), OpDesc(opClass='VectorUnitStrideStore'), + OpDesc(opClass='VectorSegUnitStrideStore'), OpDesc(opClass='VectorUnitStrideMaskStore'), OpDesc(opClass='VectorStridedStore'), OpDesc(opClass='VectorIndexedStore'), - OpDesc(opClass='VectorWholeRegisterStore') - ] + OpDesc(opClass='VectorWholeRegisterStore')] count = 0 class IprPort(FUDesc): diff --git a/src/cpu/o3/inst_queue.cc b/src/cpu/o3/inst_queue.cc index 1df5bc1feb..91fe733a07 100644 --- a/src/cpu/o3/inst_queue.cc +++ b/src/cpu/o3/inst_queue.cc @@ -521,6 +521,7 @@ InstructionQueue::processFUCompletion(const DynInstPtr &inst, int fu_idx) bool InstructionQueue::execLatencyCheck(const DynInstPtr& inst, uint32_t& op_latency) { + // Leading zero count auto lzc = [](RegVal val) { for (int i = 0; i < 64; i++) { if (val & (0x1lu << 63)) { @@ -540,14 +541,20 @@ InstructionQueue::execLatencyCheck(const DynInstPtr& inst, uint32_t& op_latency) rs2 = cpu->readArchIntReg(inst->srcRegIdx(1).index(), inst->threadNumber); // rs1 / rs2 : 0x80/0x8 ,delay_ = 4 + // get the leading zero difference between rs1 and rs2 (rs1 > rs2) delay_ = std::max(lzc(std::labs(rs2)) - lzc(std::labs(rs1)), 0); if (rs2 == 1) { + // rs1 / 1 = rs1 op_latency = 6; } else if (rs1 == rs2) { + // rs1 / rs2 = 1 rem 0 op_latency = 8; } else if (lzc(std::labs(rs2)) - lzc(std::labs(rs1)) < 0) { + // if rs2 > rs1 then rs1/rs2 = 0 rem rs1 op_latency = 6; } else { + // base_latency + dynamic_latency + // dynamic_latency determined by delay_ op_latency = 8 + delay_ / 4; } return true; @@ -556,6 +563,7 @@ InstructionQueue::execLatencyCheck(const DynInstPtr& inst, uint32_t& op_latency) inst->threadNumber); rs2 = cpu->readArchFloatReg(inst->srcRegIdx(1).index(), inst->threadNumber); + // for special values, fsqrt/fdiv early finish switch (inst->staticInst->operWid()) { case 32: if (__isnanf(*((float*)(&rs1))) || diff --git a/src/cpu/o3/issue_queue.cc b/src/cpu/o3/issue_queue.cc index 7b6a4d2a53..3ea644c68d 100644 --- a/src/cpu/o3/issue_queue.cc +++ b/src/cpu/o3/issue_queue.cc @@ -29,12 +29,12 @@ #include "sim/eventq.hh" #include "sim/sim_object.hh" -#define POPINST(x) \ - do {\ - if (x->opClass() != FMAMulOp) [[likely]] {\ - assert(instNum != 0);\ - instNum--;\ - }\ +#define POPINST(x) \ + do { \ + if (x->opClass() != FMAMulOp) [[likely]] { \ + assert(instNum != 0); \ + instNum--; \ + } \ } while (0) namespace gem5 @@ -43,9 +43,7 @@ namespace gem5 namespace o3 { -IssuePort::IssuePort(const IssuePortParams ¶ms) - : SimObject(params), - fu(params.fu) +IssuePort::IssuePort(const IssuePortParams& params) : SimObject(params), fu(params.fu) { mask.resize(Num_OpClasses, false); for (auto it0 : params.fu) { @@ -56,7 +54,7 @@ IssuePort::IssuePort(const IssuePortParams ¶ms) } bool -IssueQue::select_ploy::operator()(const DynInstPtr& a, const DynInstPtr& b) const +IssueQue::select_policy::operator()(const DynInstPtr& a, const DynInstPtr& b) const { return a->seqNum > b->seqNum; } @@ -97,7 +95,7 @@ IssueQue::IssueQueStats::IssueQueStats(statistics::Group* parent, IssueQue* que, arbFailed.flags(statistics::nozero); } -IssueQue::IssueQue(const IssueQueParams ¶ms) +IssueQue::IssueQue(const IssueQueParams& params) : SimObject(params), inports(params.inports), outports(params.oports.size()), @@ -123,18 +121,15 @@ IssueQue::IssueQue(const IssueQueParams ¶ms) } } } - if (same_fu) { - warn("%s: Use one selector by multiple identical fus\n", iqname); - } if (same_fu) { // we only allocate one ReadyQue + warn("%s: Use one selector by multiple identical fus\n", iqname); auto t = new ReadyQue; readyQs.resize(outports, t); auto& port = params.oports[0]; fuDescs.insert(fuDescs.begin(), port->fu.begin(), port->fu.end()); - } - else { + } else { readyQs.resize(outports, nullptr); for (int i = 0; i < outports; i++) { readyQs[i] = new ReadyQue; @@ -179,7 +174,7 @@ IssueQue::resetDepGraph(int numPhysRegs) void IssueQue::checkScoreboard(const DynInstPtr& inst) { - for (int i=0; inumSrcRegs(); i++) { + for (int i = 0; i < inst->numSrcRegs(); i++) { auto src = inst->renamedSrcIdx(i); if (src->isFixedMapping()) [[unlikely]] { continue; @@ -209,7 +204,7 @@ void IssueQue::issueToFu() { int size = toFu->size; - for (int i=0; ipop(); if (!inst) { continue; @@ -219,7 +214,7 @@ IssueQue::issueToFu() iqstats->portBusy[inst->issueportid]++; // replay it inst->setInReadyQ(); - readyQclassify[inst->opClass()]->push(inst);// retry + readyQclassify[inst->opClass()]->push(inst); // retry continue; } checkScoreboard(inst); @@ -259,12 +254,12 @@ IssueQue::wakeUpDependents(const DynInstPtr& inst, bool speculative) for (int i = 0; i < inst->numDestRegs(); i++) { PhysRegIdPtr dst = inst->renamedDestIdx(i); if (dst->isFixedMapping() || dst->getNumPinnedWritesToComplete() != 1) [[unlikely]] { - continue;; + continue; } - DPRINTF(Schedule, "was %s woken by p%lu [sn %lu]\n", - speculative ? "spec" : "wb", dst->flatIndex(), inst->seqNum); - for (auto& it: subDepGraph[dst->flatIndex()]) { + DPRINTF(Schedule, "was %s woken by p%lu [sn %lu]\n", speculative ? "spec" : "wb", dst->flatIndex(), + inst->seqNum); + for (auto& it : subDepGraph[dst->flatIndex()]) { int srcIdx = it.first; auto& consumer = it.second; if (consumer->readySrcIdx(srcIdx)) { @@ -295,7 +290,7 @@ IssueQue::addIfReady(const DynInstPtr& inst) DPRINTF(Counters, "set readyTick at addIfReady\n"); } - //Add the instruction to the proper ready list. + // Add the instruction to the proper ready list. if (inst->isMemRef()) { if (inst->memDepSolved()) { DPRINTF(Schedule, "memRef Dependency was solved can issue\n"); @@ -318,7 +313,7 @@ void IssueQue::selectInst() { selectQ.clear(); - for (int pi=0;piempty()) { auto top = readyQ->top(); @@ -344,7 +339,7 @@ IssueQue::scheduleInst() { // here is issueStage 0 for (auto& info : selectQ) { - auto& pi = info.first; // port id + auto& pi = info.first; // port id auto& inst = info.second; if (inst->canceled()) { DPRINTF(Schedule, "[sn %ld] was canceled\n", inst->seqNum); @@ -353,7 +348,7 @@ IssueQue::scheduleInst() iqstats->arbFailed++; assert(inst->readyToIssue()); inst->setInReadyQ(); - readyQclassify[inst->opClass()]->push(inst);// retry + readyQclassify[inst->opClass()]->push(inst); // retry } else [[likely]] { DPRINTF(Schedule, "[sn %ld] no conflict, scheduled\n", inst->seqNum); iqstats->portissued[pi]++; @@ -417,13 +412,12 @@ IssueQue::insert(const DynInstPtr& inst) instNumInsert++; } - DPRINTF(Schedule, "[sn %lu] %s insert into %s\n", - inst->seqNum, enums::OpClassStrings[inst->opClass()] ,iqname); + DPRINTF(Schedule, "[sn %lu] %s insert into %s\n", inst->seqNum, enums::OpClassStrings[inst->opClass()], iqname); DPRINTF(Schedule, "[sn %lu] instNum++\n", inst->seqNum); inst->issueQue = this; instList.emplace_back(inst); bool addToDepGraph = false; - for (int i=0; inumSrcRegs(); i++) { + for (int i = 0; i < inst->numSrcRegs(); i++) { auto src = inst->renamedSrcIdx(i); if (!inst->readySrcIdx(i) && !src->isFixedMapping()) { if (scheduler->scoreboard[src->flatIndex()] || scheduler->earlyScoreboard[src->flatIndex()]) { @@ -472,7 +466,7 @@ IssueQue::doCommit(const InstSeqNum seqNum) void IssueQue::doSquash(const InstSeqNum seqNum) { - for (auto it=instList.begin(); it!=instList.end();) { + for (auto it = instList.begin(); it != instList.end();) { if ((*it)->seqNum > seqNum) { (*it)->setSquashedInIQ(); (*it)->setCanCommit(); @@ -514,13 +508,13 @@ IssueQue::doSquash(const InstSeqNum seqNum) Scheduler::Slot::Slot(uint32_t priority, uint32_t demand, const DynInstPtr& inst) : priority(priority), resourceDemand(demand), inst(inst) -{} +{ +} Scheduler::SpecWakeupCompletion::SpecWakeupCompletion(const DynInstPtr& inst, IssueQue* to) - : Event(Stat_Event_Pri, AutoDelete), - inst(inst), - to_issue_queue(to) -{} + : Event(Stat_Event_Pri, AutoDelete), inst(inst), to_issue_queue(to) +{ +} void Scheduler::SpecWakeupCompletion::process() @@ -528,14 +522,14 @@ Scheduler::SpecWakeupCompletion::process() to_issue_queue->wakeUpDependents(inst, true); } -const char * +const char* Scheduler::SpecWakeupCompletion::description() const { return "Spec wakeup completion"; } bool -Scheduler::disp_ploy::operator()(IssueQue* a, IssueQue* b) const +Scheduler::disp_policy::operator()(IssueQue* a, IssueQue* b) const { // bigger/ready first int p0 = a->ready() ? a->emptyEntries() : -1; @@ -544,24 +538,21 @@ Scheduler::disp_ploy::operator()(IssueQue* a, IssueQue* b) const } bool -Scheduler::slot_ploy::operator()(const Slot& a, const Slot& b) const +Scheduler::slot_policy::operator()(const Slot& a, const Slot& b) const { // smaller first return a.priority > b.priority; } Scheduler::Scheduler(const SchedulerParams& params) - : SimObject(params), - issueQues(params.IQs), - intSlotNum(params.intSlotNum), - fpSlotNum(params.fpSlotNum) + : SimObject(params), issueQues(params.IQs), intSlotNum(params.intSlotNum), fpSlotNum(params.fpSlotNum) { dispTable.resize(enums::OpClass::Num_OpClass); opExecTimeTable.resize(enums::OpClass::Num_OpClass, 1); opPipelined.resize(enums::OpClass::Num_OpClass, false); boost::dynamic_bitset<> opChecker(enums::Num_OpClass, 0); - for (int i=0; i< issueQues.size(); i++) { + for (int i = 0; i < issueQues.size(); i++) { issueQues[i]->setIQID(i); issueQues[i]->scheduler = this; combinedFus += issueQues[i]->outports; @@ -577,7 +568,7 @@ Scheduler::Scheduler(const SchedulerParams& params) } if (opChecker.count() != enums::Num_OpClass) { - for (int i=0; i IssueQue*{ + auto findIQbyname = [this](std::string name) -> IssueQue* { IssueQue* ret = nullptr; for (auto it : this->issueQues) { if (it->getName().compare(name) == 0) { @@ -653,7 +644,8 @@ Scheduler::tick() } void -Scheduler::issueAndSelect(){ +Scheduler::issueAndSelect() +{ for (auto it : issueQues) { it->issueToFu(); } @@ -718,9 +710,8 @@ Scheduler::full(const DynInstPtr& inst) DynInstPtr Scheduler::getInstByDstReg(RegIndex flatIdx) { - for (auto iq : issueQues) - { - for (auto& inst : iq->instList){ + for (auto iq : issueQues) { + for (auto& inst : iq->instList) { if (inst->numDestRegs() > 0 && inst->renamedDestIdx(0)->flatIndex() == flatIdx) { return inst; } @@ -733,7 +724,7 @@ void Scheduler::addProducer(const DynInstPtr& inst) { DPRINTF(Schedule, "[sn %lu] addProdecer\n", inst->seqNum); - for (int i=0; inumDestRegs(); i++) { + for (int i = 0; i < inst->numDestRegs(); i++) { auto dst = inst->renamedDestIdx(i); if (dst->isFixedMapping()) { continue; @@ -752,6 +743,7 @@ Scheduler::insert(const DynInstPtr& inst) auto& iqs = dispTable[inst->opClass()]; bool inserted = false; + // TODO: align with RTL std::random_shuffle(iqs.begin(), iqs.end()); for (auto iq : iqs) { if (iq->ready()) { @@ -797,14 +789,15 @@ Scheduler::specWakeUpDependents(const DynInstPtr& inst, IssueQue* from_issue_que wakeDelay -= diff; } - DPRINTF(Schedule, "[sn %lu] %s create wakeupEvent to %s, delay %d cycles\n", - inst->seqNum, from_issue_queue->getName(), to->getName(), wakeDelay); + DPRINTF(Schedule, "[sn %lu] %s create wakeupEvent to %s, delay %d cycles\n", inst->seqNum, + from_issue_queue->getName(), to->getName(), wakeDelay); if (wakeDelay == 0) { to->wakeUpDependents(inst, true); for (int i = 0; i < inst->numDestRegs(); i++) { PhysRegIdPtr dst = inst->renamedDestIdx(i); if (dst->isFixedMapping()) [[unlikely]] { - continue;; + continue; + ; } earlyScoreboard[dst->flatIndex()] = true; } @@ -813,8 +806,6 @@ Scheduler::specWakeUpDependents(const DynInstPtr& inst, IssueQue* from_issue_que cpu->schedule(wakeEvent, cpu->clockEdge(Cycles(wakeDelay)) - 1); } } - - } DynInstPtr @@ -841,15 +832,15 @@ Scheduler::insertSlot(const DynInstPtr& inst) if (inst->isFloating()) { fpSlotOccupied += needed; fpSlot.push(Slot(priority, needed, inst)); - } - else if (inst->isInteger()) { + } else if (inst->isInteger()) { intSlotOccupied += needed; intSlot.push(Slot(priority, needed, inst)); } DPRINTF(Schedule, "[sn %lu] insert slot, priority: %u, needed: %u\n", inst->seqNum, priority, needed); } -void Scheduler::loadCancel(const DynInstPtr& inst) +void +Scheduler::loadCancel(const DynInstPtr& inst) { if (inst->canceled()) { return; @@ -865,7 +856,7 @@ void Scheduler::loadCancel(const DynInstPtr& inst) while (!dfs.empty()) { auto top = dfs.top(); dfs.pop(); - for (int i=0; inumDestRegs(); i++) { + for (int i = 0; i < top->numDestRegs(); i++) { auto dst = top->renamedDestIdx(i); if (dst->isFixedMapping()) { continue; @@ -877,8 +868,8 @@ void Scheduler::loadCancel(const DynInstPtr& inst) auto& depInst = it.second; if (depInst->readySrcIdx(srcIdx) && depInst->renamedSrcIdx(srcIdx) != cpu->vecOnesPhysRegId) { assert(!depInst->isIssued()); - DPRINTF(Schedule, "cancel [sn %lu], clear src p%d ready\n", - depInst->seqNum, depInst->renamedSrcIdx(srcIdx)->flatIndex()); + DPRINTF(Schedule, "cancel [sn %lu], clear src p%d ready\n", depInst->seqNum, + depInst->renamedSrcIdx(srcIdx)->flatIndex()); depInst->setCancel(); iq->iqstats->canceledInst++; depInst->clearSrcRegReady(srcIdx); @@ -890,9 +881,9 @@ void Scheduler::loadCancel(const DynInstPtr& inst) } for (auto iq : issueQues) { - for (int i=0; i<=iq->getIssueStages(); i++) { + for (int i = 0; i <= iq->getIssueStages(); i++) { int size = iq->inflightIssues[-i].size; - for (int j=0; jinflightIssues[-i].insts[j]; if (inst && inst->canceled()) { inst = nullptr; @@ -906,7 +897,7 @@ void Scheduler::writebackWakeup(const DynInstPtr& inst) { DPRINTF(Schedule, "[sn %lu] was writeback\n", inst->seqNum); - inst->setWriteback();// clear in issueQue + inst->setWriteback(); // clear in issueQue for (int i = 0; i < inst->numDestRegs(); i++) { auto dst = inst->renamedDestIdx(i); if (dst->isFixedMapping()) { @@ -926,7 +917,7 @@ Scheduler::bypassWriteback(const DynInstPtr& inst) inst->issueQue->portBusy[inst->issueportid] = 0; } DPRINTF(Schedule, "[sn %lu] bypass write\n", inst->seqNum); - for (int i=0; inumDestRegs(); i++) { + for (int i = 0; i < inst->numDestRegs(); i++) { auto dst = inst->renamedDestIdx(i); if (dst->isFixedMapping()) { continue; @@ -1000,7 +991,7 @@ Scheduler::doSquash(const InstSeqNum seqNum) uint32_t Scheduler::getIQInsts() { - uint32_t total=0; + uint32_t total = 0; for (auto iq : issueQues) { total += iq->instNum; } diff --git a/src/cpu/o3/issue_queue.hh b/src/cpu/o3/issue_queue.hh index df1d67e849..efd2f65ff1 100644 --- a/src/cpu/o3/issue_queue.hh +++ b/src/cpu/o3/issue_queue.hh @@ -74,11 +74,11 @@ class IssueQue : public SimObject std::vector opPipelined; int IQID = -1; - struct select_ploy + struct select_policy { bool operator()(const DynInstPtr& a, const DynInstPtr& b) const; }; - using ReadyQue = boost::heap::priority_queue>; + using ReadyQue = boost::heap::priority_queue>; using SelectQue = std::vector>; struct IssueStream @@ -192,15 +192,15 @@ class Scheduler : public SimObject CPU* cpu; MemDepUnit *memDepUnit; - struct disp_ploy + struct disp_policy { bool operator()(IssueQue* a, IssueQue* b) const; }; - using DispPloy = std::vector; + using DispPolicy = std::vector; std::vector opExecTimeTable; std::vector opPipelined; - std::vector dispTable; + std::vector dispTable; std::vector issueQues; std::vector> wakeMatrix; uint32_t combinedFus; @@ -218,11 +218,11 @@ class Scheduler : public SimObject DynInstPtr inst; Slot(uint32_t priority, uint32_t demand, const DynInstPtr& inst); }; - struct slot_ploy + struct slot_policy { bool operator()(const Slot& a, const Slot& b) const; }; - using SlotQue = boost::heap::priority_queue>; + using SlotQue = boost::heap::priority_queue>; const uint32_t intSlotNum; const uint32_t fpSlotNum; diff --git a/src/cpu/o3/lsq_unit.cc b/src/cpu/o3/lsq_unit.cc index a5a8ab6693..1e7af2a31d 100644 --- a/src/cpu/o3/lsq_unit.cc +++ b/src/cpu/o3/lsq_unit.cc @@ -96,7 +96,7 @@ StoreBufferEntry::merge(uint64_t offset, uint8_t *datas, uint64_t size) } bool -StoreBufferEntry::coverage(PacketPtr pkt, LSQ::LSQRequest *req) +StoreBufferEntry::recordForward(PacketPtr pkt, LSQ::LSQRequest *req) { int offset = pkt->getAddr() & (validMask.size() - 1); int goffset = pkt->req->getVaddr() - req->mainReq()->getVaddr(); @@ -422,6 +422,7 @@ LSQUnit::LSQUnit(uint32_t lqEntries, uint32_t sqEntries, uint32_t sbufferEntries storeInFlight(false), stats(nullptr) { + // reserve space, we want if sq will be full, sbuffer will start evicting sqFullUpperLimit = sqEntries - 4; sqFullLowerLimit = sqFullUpperLimit - 4; assert(sqFullLowerLimit > 0); @@ -1375,14 +1376,14 @@ LSQUnit::storeBufferEvictToCache() } if (storeQueue.size() > sqFullUpperLimit) { - sqwillfull = true; + sqWillFull = true; } else if (storeQueue.size() < sqFullLowerLimit) { - sqwillfull = false; + sqWillFull = false; } if ((storeBuffer.unsentSize() > sbufferEvictThreshold) || (storeBufferWritebackInactive > storeBufferInactiveThreshold) || - (sqwillfull) || + (sqWillFull) || storeBufferFlushing) { if (storeBufferFlushing) { @@ -1391,7 +1392,7 @@ LSQUnit::storeBufferEvictToCache() } else if (storeBuffer.unsentSize() > sbufferEvictThreshold) { stats.sbufferEvictDuetoFull++; DPRINTF(StoreBuffer, "sbuffer has reached threshold\n"); - } else if (sqwillfull) { + } else if (sqWillFull) { stats.sbufferEvictDuetoSQFull++; DPRINTF(StoreBuffer, "sbuffer has reached SQ threshold\n"); } else { @@ -1820,10 +1821,10 @@ LSQUnit::trySendPacket(bool isLoad, PacketPtr data_pkt, bool &bank_conflict) auto entry = storeBuffer.get(pkt->getAddr() & cacheBlockMask); if (entry) { DPRINTF(StoreBuffer, "sbuffer entry[%#x] coverage %s\n", entry->blockPaddr, pkt->print()); - entry->coverage(pkt, request); + entry->recordForward(pkt, request); if (entry->vice) { DPRINTF(StoreBuffer, "sbuffer vice entry coverage\n"); - entry->vice->coverage(pkt, request); + entry->vice->recordForward(pkt, request); } } } diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh index bfd11c4fbd..b42eb60873 100644 --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -103,7 +103,7 @@ class StoreBufferEntry void merge(uint64_t offset, uint8_t* datas, uint64_t size); - bool coverage(PacketPtr pkt, LSQ::LSQRequest* req); + bool recordForward(PacketPtr pkt, LSQ::LSQRequest* req); }; class StoreBuffer @@ -260,10 +260,12 @@ class LSQUnit public: // storeQue -> storeBuffer -> cache const int maxSQoffload = 2; + const int sqFullBufferSize = 4; + int sqFullUpperLimit = 0; int sqFullLowerLimit = 0; bool storeBufferFlushing = false; - bool sqwillfull = false; + bool sqWillFull = false; const uint32_t sbufferEvictThreshold = 0; const uint32_t sbufferEntries = 0; StoreBuffer storeBuffer;