Skip to content

Commit

Permalink
Merge pull request #186 from OpenXiangShan/align-lsq
Browse files Browse the repository at this point in the history
Calibrate storeset and store merge buffer
  • Loading branch information
shinezyy authored Oct 16, 2024
2 parents 412f2ce + 0bf9246 commit da01165
Show file tree
Hide file tree
Showing 9 changed files with 414 additions and 267 deletions.
6 changes: 3 additions & 3 deletions src/arch/riscv/RiscvCPU.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,12 @@ class RiscvMinorCPU(BaseMinorCPU, RiscvCPU):
mmu = RiscvMMU()

class XiangshanCore(RiscvO3CPU):
pass
scheduler = KunminghuScheduler()

class XiangshanECore(XiangshanCore):
fetchWidth = 8
decodeWidth = 4
renameWidth = 4
wbWidth = 6

numROBEntries = 150
LQEntries = 48
Expand All @@ -73,12 +72,12 @@ class XiangshanECore(XiangshanCore):
numPhysVecPredRegs = 36
numPhysCCRegs = 0
numPhysRMiscRegs = 40
scheduler = ECoreScheduler()

class XiangshanECore2Read(XiangshanCore):
fetchWidth = 8
decodeWidth = 4
renameWidth = 4
wbWidth = 6

numROBEntries = 150
LQEntries = 48
Expand All @@ -89,3 +88,4 @@ class XiangshanECore2Read(XiangshanCore):
numPhysVecPredRegs = 36
numPhysCCRegs = 0
numPhysRMiscRegs = 40
scheduler = ECore2ReadScheduler()
12 changes: 6 additions & 6 deletions src/cpu/o3/BaseO3CPU.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,8 +157,8 @@ def support_take_over(cls):
SQEntries = Param.Unsigned(64, "Number of store queue entries")

SbufferEntries = Param.Unsigned(16, "Number of store buffer entries")
SbufferEvictThreshold = Param.Unsigned(12, "store buffer eviction threshold")
storeBufferInactiveThreshold = Param.Unsigned(100, "store buffer writeback timeout threshold")
SbufferEvictThreshold = Param.Unsigned(8, "store buffer eviction threshold")
storeBufferInactiveThreshold = Param.Unsigned(800, "store buffer writeback timeout threshold")

LSQDepCheckShift = Param.Unsigned(0,
"Number of places to shift addr before check")
Expand All @@ -168,11 +168,11 @@ def support_take_over(cls):
store_set_clear_period = Param.Unsigned(250000,
"Number of load/store insts before the dep predictor "
"should be invalidated")
LFSTSize = Param.Unsigned(256, "Last fetched store table size")
LFSTSize = Param.Unsigned(2048, "Last fetched store table size")
store_set_clear_thres = Param.Unsigned(1048576,"")
LFSTEntrySize = Param.Unsigned(4,"The number of store table inst in every entry of LFST can contain")
SSITSize = Param.Unsigned(1024, "Store set ID table size")
BankConflictCheck = Param.Bool(True,"open Bank conflict check")
SSITSize = Param.Unsigned(8192, "Store set ID table size")
BankConflictCheck = Param.Bool(True, "open Bank conflict check")


numRobs = Param.Unsigned(1, "Number of Reorder Buffers");
Expand All @@ -191,7 +191,7 @@ def support_take_over(cls):
numPhysRMiscRegs = Param.Unsigned(40, "Number of physical renameable misc registers")

numDQEntries = Param.Unsigned(18, "Number of entries in the dispQue")
numROBEntries = Param.Unsigned(256, "Number of reorder buffer entries")
numROBEntries = Param.Unsigned(320, "Number of reorder buffer entries")

smtNumFetchingThreads = Param.Unsigned(1, "SMT Number of Fetching Threads")
smtFetchPolicy = Param.SMTFetchPolicy('RoundRobin', "SMT Fetch policy")
Expand Down
123 changes: 73 additions & 50 deletions src/cpu/o3/FUPool.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,56 +85,79 @@ class Scheduler(SimObject):
specWakeupNetwork = VectorParam.SpecWakeupChannel([], "")
xbarWakeup = Param.Bool(False, "use xbar wakeup network, (will override specWakeupNetwork)")

# class DefaultScheduler(Scheduler):
# IQs = [
# IssueQue(name='intIQ0' , inports=2, size=2*12, oports=[
# IssuePort(fu=[IntBRU(), IntMisc()]),
# IssuePort(fu=[IntBRU(), IntMisc()])
# ]),
# IssueQue(name='intIQ1' , inports=2, size=2*12, oports=[
# IssuePort(fu=[IntALU(), IntMult()]),
# IssuePort(fu=[IntALU(), IntMult()]),
# ]),
# IssueQue(name='intIQ2' , inports=2, size=2*12, oports=[
# IssuePort(fu=[IntALU(), IntDiv()]),
# IssuePort(fu=[IntALU(), IntDiv()]),
# ]),
# IssueQue(name='memIQ0' , inports=6, size=3*16, oports=[
# IssuePort(fu=[ReadPort()]),
# IssuePort(fu=[ReadPort()]),
# IssuePort(fu=[ReadPort()]),
# ]),
# IssueQue(name='memIQ1' , inports=4, size=2*16, oports=[
# IssuePort(fu=[WritePort()]),
# IssuePort(fu=[WritePort()])
# ]),
# IssueQue(name='fpIQ0' , inports=2, size=18, oports=[
# IssuePort(fu=[FP_ALU(), FP_MISC(), FP_MAC()])
# ], scheduleToExecDelay=3),
# IssueQue(name='fpIQ1' , inports=2, size=18, oports=[
# IssuePort(fu=[FP_ALU(), FP_MAC()])
# ], scheduleToExecDelay=3),
# IssueQue(name='fpIQ2' , inports=2, size=18, oports=[
# IssuePort(fu=[FP_ALU(), FP_MAC()])
# ], scheduleToExecDelay=3),
# IssueQue(name='fpIQ3' , inports=2, size=18, oports=[
# IssuePort(fu=[FP_ALU(), FP_MAC()])
# ], scheduleToExecDelay=3),
# IssueQue(name='fpIQ4' , inports=2, size=18, oports=[
# IssuePort(fu=[FP_SLOW()]),
# IssuePort(fu=[FP_SLOW()])
# ], scheduleToExecDelay=3),
# IssueQue(name='vecIQ0' , inports=5, size=16+16+10, oports=[
# IssuePort(fu=[SIMD_Unit()]),
# IssuePort(fu=[SIMD_Unit()]),
# IssuePort(fu=[SIMD_Unit()]),
# IssuePort(fu=[SIMD_Unit()]),
# IssuePort(fu=[SIMD_Unit()])
# ], scheduleToExecDelay=3),
# ]
# intSlotNum = 12
# fpSlotNum = 12
# xbarWakeup = True
class ECoreScheduler(Scheduler):
IQs = [
IssueQue(name='intIQ0' , inports=2, size=2*12, oports=[
IssuePort(fu=[IntALU(), IntBRU()]),
IssuePort(fu=[IntALU(), IntBRU()])
]),
IssueQue(name='intIQ1' , inports=2, size=2*12, oports=[
IssuePort(fu=[IntALU(), IntBRU()]),
IssuePort(fu=[IntALU(), IntBRU()])
]),
IssueQue(name='intIQ2' , inports=2, size=2*12, oports=[
IssuePort(fu=[IntMult(), IntDiv(), IntMisc()])
]),
IssueQue(name='memIQ0' , inports=2, size=2*16, oports=[
IssuePort(fu=[ReadPort()])
]),
IssueQue(name='memIQ1' , inports=2, size=2*16, oports=[
IssuePort(fu=[RdWrPort()])
]),
IssueQue(name='fpIQ0' , inports=2, size=18, oports=[
IssuePort(fu=[FP_ALU(), FP_MAC()]),
IssuePort(fu=[FP_ALU(), FP_MAC()])
], scheduleToExecDelay=3),
IssueQue(name='fpIQ1' , inports=2, size=18, oports=[
IssuePort(fu=[FP_MISC(), FP_SLOW()])
], scheduleToExecDelay=3),
IssueQue(name='vecIQ0' , inports=2, size=16, oports=[
IssuePort(fu=[SIMD_Unit()]),
IssuePort(fu=[SIMD_Unit()])
], scheduleToExecDelay=3),
]
intSlotNum = 12
fpSlotNum = 12
xbarWakeup = True

class ECore2ReadScheduler(Scheduler):
IQs = [
IssueQue(name='intIQ0' , inports=2, size=2*12, oports=[
IssuePort(fu=[IntALU(), IntBRU()]),
IssuePort(fu=[IntALU(), IntBRU()])
]),
IssueQue(name='intIQ1' , inports=2, size=2*12, oports=[
IssuePort(fu=[IntALU(), IntBRU()]),
IssuePort(fu=[IntALU(), IntBRU()])
]),
IssueQue(name='intIQ2' , inports=2, size=2*12, oports=[
IssuePort(fu=[IntMult(), IntDiv(), IntMisc()])
]),
IssueQue(name='memIQ0' , inports=2, size=2*16, oports=[
IssuePort(fu=[ReadPort()]),
IssuePort(fu=[ReadPort()])
]),
IssueQue(name='memIQ1' , inports=2, size=2*16, oports=[
IssuePort(fu=[WritePort()])
]),
IssueQue(name='fpIQ0' , inports=2, size=18, oports=[
IssuePort(fu=[FP_ALU(), FP_MAC()]),
IssuePort(fu=[FP_ALU(), FP_MAC()])
], scheduleToExecDelay=3),
IssueQue(name='fpIQ1' , inports=2, size=18, oports=[
IssuePort(fu=[FP_MISC()])
], scheduleToExecDelay=3),
IssueQue(name='fpIQ4' , inports=2, size=18, oports=[
IssuePort(fu=[FP_SLOW()])
], scheduleToExecDelay=3),
IssueQue(name='vecIQ0' , inports=2, size=16, oports=[
IssuePort(fu=[SIMD_Unit()]),
IssuePort(fu=[SIMD_Unit()])
], scheduleToExecDelay=3),
]
intSlotNum = 12
fpSlotNum = 12
xbarWakeup = True


class KunminghuScheduler(Scheduler):
Expand Down
30 changes: 17 additions & 13 deletions src/cpu/o3/FuncUnitConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ class ReadPort(FUDesc):
OpDesc(opClass='VectorWholeRegisterLoad', opLat=3)]

class WritePort(FUDesc):
opList = [ OpDesc(opClass='MemWrite', opLat=3),
OpDesc(opClass='FloatMemWrite', opLat=4),
opList = [ OpDesc(opClass='MemWrite', opLat=2),
OpDesc(opClass='FloatMemWrite', opLat=3),
OpDesc(opClass='VectorUnitStrideStore'),
OpDesc(opClass='VectorSegUnitStrideStore'),
OpDesc(opClass='VectorUnitStrideMaskStore'),
Expand All @@ -159,21 +159,25 @@ class WritePort(FUDesc):

class RdWrPort(FUDesc):
opList = [ OpDesc(opClass='MemRead', opLat=2),
OpDesc(opClass='MemWrite', opLat=4),
OpDesc(opClass='FloatMemRead'),
OpDesc(opClass='FloatMemWrite'),
OpDesc(opClass='VectorUnitStrideLoad', opLat=2),
OpDesc(opClass='VectorUnitStrideMaskLoad', opLat=2),
OpDesc(opClass='VectorStridedLoad', opLat=2),
OpDesc(opClass='VectorIndexedLoad', opLat=2),
OpDesc(opClass='VectorUnitStrideFaultOnlyFirstLoad', opLat=2),
OpDesc(opClass='VectorWholeRegisterLoad', opLat=2),
OpDesc(opClass='MemWrite', opLat=2),
OpDesc(opClass='FloatMemRead', opLat=2),
OpDesc(opClass='FloatMemWrite', opLat=3),
OpDesc(opClass='VectorUnitStrideLoad', opLat=3),
OpDesc(opClass='VectorSegUnitStrideLoad', opLat=3),
OpDesc(opClass='VectorUnitStrideMaskLoad', opLat=3),
OpDesc(opClass='VectorSegUnitStrideMaskLoad', opLat=3),
OpDesc(opClass='VectorStridedLoad', opLat=3),
OpDesc(opClass='VectorSegStridedLoad', opLat=3),
OpDesc(opClass='VectorIndexedLoad', opLat=3),
OpDesc(opClass='VectorSegIndexedLoad', opLat=3),
OpDesc(opClass='VectorUnitStrideFaultOnlyFirstLoad', opLat=3),
OpDesc(opClass='VectorWholeRegisterLoad', opLat=3),
OpDesc(opClass='VectorUnitStrideStore'),
OpDesc(opClass='VectorSegUnitStrideStore'),
OpDesc(opClass='VectorUnitStrideMaskStore'),
OpDesc(opClass='VectorStridedStore'),
OpDesc(opClass='VectorIndexedStore'),
OpDesc(opClass='VectorWholeRegisterStore')
]
OpDesc(opClass='VectorWholeRegisterStore')]
count = 0

class IprPort(FUDesc):
Expand Down
3 changes: 1 addition & 2 deletions src/cpu/o3/issue_queue.hh
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,6 @@ class IssueQue : public SimObject
public:
inline void clearBusy(uint32_t pi) { portBusy.at(pi) = 0; }


IssueQue(const IssueQueParams &params);
void setIQID(int id) { IQID = id; }
void setCPU(CPU* cpu);
Expand All @@ -147,7 +146,7 @@ class IssueQue : public SimObject
void tick();
bool full();
bool ready();
int emptyEntries() const { return instNum; }
int emptyEntries() const { return iqsize - instNum; }
void insert(const DynInstPtr& inst);
void insertNonSpec(const DynInstPtr& inst);

Expand Down
12 changes: 8 additions & 4 deletions src/cpu/o3/lsq.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ LSQ::LSQ(CPU *cpu_ptr, IEW *iew_ptr, const BaseO3CPUParams &params)
_cacheBlocked(false),
cacheStorePorts(params.cacheStorePorts), usedStorePorts(0),
cacheLoadPorts(params.cacheLoadPorts), usedLoadPorts(0),lastConflictCheckTick(0),
recentlyloadAddr(16),
enableBankConflictCheck(params.BankConflictCheck),
waitingForStaleTranslation(false),
staleTranslationWaitTxnId(0),
Expand Down Expand Up @@ -210,9 +211,13 @@ LSQ::bankConflictedCheck(Addr vaddr)
if (l1dBankAddresses.size() == 0) {
l1dBankAddresses.push_back(bankNum(vaddr));
} else {
if (recentlyloadAddr.contains(vaddr)) {
return false;
}
auto bank_it = std::find(l1dBankAddresses.begin(), l1dBankAddresses.end(), bankNum(vaddr));
if (bank_it == l1dBankAddresses.end()) {
l1dBankAddresses.push_back(bankNum(vaddr));
recentlyloadAddr.insert(vaddr, {});
} else {
now_bank_conflict = true;
}
Expand Down Expand Up @@ -1317,8 +1322,8 @@ LSQ::SbufferRequest::recvTimingResp(PacketPtr pkt)
{
// Dump inst num, request addr, and packet addr
DPRINTF(StoreBuffer,
"Sbuffer Req::recvTimingResp: entry[%#x] sbuffer index: %lu\n",
_packets[0]->getAddr(), this->sbuffer_index);
"Sbuffer Req::recvTimingResp: entry[%#x]\n",
_packets[0]->getAddr());
assert(_numOutstandingPackets == 1);
flags.set(Flag::Complete);
assert(pkt == _packets.front());
Expand Down Expand Up @@ -1484,8 +1489,7 @@ LSQ::SbufferRequest::sendPacketToCache()
{
assert(_numOutstandingPackets == 0);
bool success = _port.sbufferSendPacket(_packets.at(0));
DPRINTF(StoreBuffer, "Sbuffer Req::sendPacketToCache: entry[%#x] sbuffer index: %lu\n", _packets[0]->getAddr(),
this->sbuffer_index);
DPRINTF(StoreBuffer, "Sbuffer Req::sendPacketToCache: entry[%#x]\n", _packets[0]->getAddr());
if (success) {
_numOutstandingPackets = 1;
}
Expand Down
7 changes: 6 additions & 1 deletion src/cpu/o3/lsq.hh
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@
#include <queue>
#include <vector>

#include <boost/compute/detail/lru_cache.hpp>

#include "arch/generic/mmu.hh"
#include "arch/generic/tlb.hh"
#include "base/flags.hh"
Expand All @@ -72,6 +74,7 @@ namespace o3
class CPU;
class IEW;
class LSQUnit;
class StoreBufferEntry;

class LSQ
{
Expand Down Expand Up @@ -691,7 +694,7 @@ class LSQ
{
CPU* cpu;
public:
uint64_t sbuffer_index=-1;
StoreBufferEntry* sbuffer_entry=nullptr;
SbufferRequest(CPU* cpu, LSQUnit* port, Addr blockpaddr, uint8_t* data);

void addReq(Addr blockVaddr, Addr blockPaddr, const std::vector<bool> byteEnable);
Expand Down Expand Up @@ -972,6 +975,8 @@ class LSQ
Tick lastConflictCheckTick;

std::vector<int64_t> l1dBankAddresses;
struct NullStruct {};
boost::compute::detail::lru_cache<uint64_t, NullStruct> recentlyloadAddr;

bool enableBankConflictCheck;

Expand Down
Loading

0 comments on commit da01165

Please sign in to comment.