Skip to content

Commit

Permalink
cpu-o3: clean and foramt code
Browse files Browse the repository at this point in the history
  • Loading branch information
tastynoob committed Oct 14, 2024
1 parent 9ba66be commit 2221b7b
Show file tree
Hide file tree
Showing 8 changed files with 176 additions and 147 deletions.
6 changes: 3 additions & 3 deletions src/arch/riscv/RiscvCPU.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,12 @@ class RiscvMinorCPU(BaseMinorCPU, RiscvCPU):
mmu = RiscvMMU()

class XiangshanCore(RiscvO3CPU):
pass
scheduler = KunminghuScheduler()

class XiangshanECore(XiangshanCore):
fetchWidth = 8
decodeWidth = 4
renameWidth = 4
wbWidth = 6

numROBEntries = 150
LQEntries = 48
Expand All @@ -73,12 +72,12 @@ class XiangshanECore(XiangshanCore):
numPhysVecPredRegs = 36
numPhysCCRegs = 0
numPhysRMiscRegs = 40
scheduler = ECoreScheduler()

class XiangshanECore2Read(XiangshanCore):
fetchWidth = 8
decodeWidth = 4
renameWidth = 4
wbWidth = 6

numROBEntries = 150
LQEntries = 48
Expand All @@ -89,3 +88,4 @@ class XiangshanECore2Read(XiangshanCore):
numPhysVecPredRegs = 36
numPhysCCRegs = 0
numPhysRMiscRegs = 40
scheduler = ECore2ReadScheduler()
123 changes: 73 additions & 50 deletions src/cpu/o3/FUPool.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,56 +85,79 @@ class Scheduler(SimObject):
specWakeupNetwork = VectorParam.SpecWakeupChannel([], "")
xbarWakeup = Param.Bool(False, "use xbar wakeup network, (will override specWakeupNetwork)")

# class DefaultScheduler(Scheduler):
# IQs = [
# IssueQue(name='intIQ0' , inports=2, size=2*12, oports=[
# IssuePort(fu=[IntBRU(), IntMisc()]),
# IssuePort(fu=[IntBRU(), IntMisc()])
# ]),
# IssueQue(name='intIQ1' , inports=2, size=2*12, oports=[
# IssuePort(fu=[IntALU(), IntMult()]),
# IssuePort(fu=[IntALU(), IntMult()]),
# ]),
# IssueQue(name='intIQ2' , inports=2, size=2*12, oports=[
# IssuePort(fu=[IntALU(), IntDiv()]),
# IssuePort(fu=[IntALU(), IntDiv()]),
# ]),
# IssueQue(name='memIQ0' , inports=6, size=3*16, oports=[
# IssuePort(fu=[ReadPort()]),
# IssuePort(fu=[ReadPort()]),
# IssuePort(fu=[ReadPort()]),
# ]),
# IssueQue(name='memIQ1' , inports=4, size=2*16, oports=[
# IssuePort(fu=[WritePort()]),
# IssuePort(fu=[WritePort()])
# ]),
# IssueQue(name='fpIQ0' , inports=2, size=18, oports=[
# IssuePort(fu=[FP_ALU(), FP_MISC(), FP_MAC()])
# ], scheduleToExecDelay=3),
# IssueQue(name='fpIQ1' , inports=2, size=18, oports=[
# IssuePort(fu=[FP_ALU(), FP_MAC()])
# ], scheduleToExecDelay=3),
# IssueQue(name='fpIQ2' , inports=2, size=18, oports=[
# IssuePort(fu=[FP_ALU(), FP_MAC()])
# ], scheduleToExecDelay=3),
# IssueQue(name='fpIQ3' , inports=2, size=18, oports=[
# IssuePort(fu=[FP_ALU(), FP_MAC()])
# ], scheduleToExecDelay=3),
# IssueQue(name='fpIQ4' , inports=2, size=18, oports=[
# IssuePort(fu=[FP_SLOW()]),
# IssuePort(fu=[FP_SLOW()])
# ], scheduleToExecDelay=3),
# IssueQue(name='vecIQ0' , inports=5, size=16+16+10, oports=[
# IssuePort(fu=[SIMD_Unit()]),
# IssuePort(fu=[SIMD_Unit()]),
# IssuePort(fu=[SIMD_Unit()]),
# IssuePort(fu=[SIMD_Unit()]),
# IssuePort(fu=[SIMD_Unit()])
# ], scheduleToExecDelay=3),
# ]
# intSlotNum = 12
# fpSlotNum = 12
# xbarWakeup = True
class ECoreScheduler(Scheduler):
IQs = [
IssueQue(name='intIQ0' , inports=2, size=2*12, oports=[
IssuePort(fu=[IntALU(), IntBRU()]),
IssuePort(fu=[IntALU(), IntBRU()])
]),
IssueQue(name='intIQ1' , inports=2, size=2*12, oports=[
IssuePort(fu=[IntALU(), IntBRU()]),
IssuePort(fu=[IntALU(), IntBRU()])
]),
IssueQue(name='intIQ2' , inports=2, size=2*12, oports=[
IssuePort(fu=[IntMult(), IntDiv(), IntMisc()])
]),
IssueQue(name='memIQ0' , inports=2, size=2*16, oports=[
IssuePort(fu=[ReadPort()])
]),
IssueQue(name='memIQ1' , inports=2, size=2*16, oports=[
IssuePort(fu=[RdWrPort()])
]),
IssueQue(name='fpIQ0' , inports=2, size=18, oports=[
IssuePort(fu=[FP_ALU(), FP_MAC()]),
IssuePort(fu=[FP_ALU(), FP_MAC()])
], scheduleToExecDelay=3),
IssueQue(name='fpIQ1' , inports=2, size=18, oports=[
IssuePort(fu=[FP_MISC(), FP_SLOW()])
], scheduleToExecDelay=3),
IssueQue(name='vecIQ0' , inports=2, size=16, oports=[
IssuePort(fu=[SIMD_Unit()]),
IssuePort(fu=[SIMD_Unit()])
], scheduleToExecDelay=3),
]
intSlotNum = 12
fpSlotNum = 12
xbarWakeup = True

class ECore2ReadScheduler(Scheduler):
IQs = [
IssueQue(name='intIQ0' , inports=2, size=2*12, oports=[
IssuePort(fu=[IntALU(), IntBRU()]),
IssuePort(fu=[IntALU(), IntBRU()])
]),
IssueQue(name='intIQ1' , inports=2, size=2*12, oports=[
IssuePort(fu=[IntALU(), IntBRU()]),
IssuePort(fu=[IntALU(), IntBRU()])
]),
IssueQue(name='intIQ2' , inports=2, size=2*12, oports=[
IssuePort(fu=[IntMult(), IntDiv(), IntMisc()])
]),
IssueQue(name='memIQ0' , inports=2, size=2*16, oports=[
IssuePort(fu=[ReadPort()]),
IssuePort(fu=[ReadPort()])
]),
IssueQue(name='memIQ1' , inports=2, size=2*16, oports=[
IssuePort(fu=[WritePort()])
]),
IssueQue(name='fpIQ0' , inports=2, size=18, oports=[
IssuePort(fu=[FP_ALU(), FP_MAC()]),
IssuePort(fu=[FP_ALU(), FP_MAC()])
], scheduleToExecDelay=3),
IssueQue(name='fpIQ1' , inports=2, size=18, oports=[
IssuePort(fu=[FP_MISC()])
], scheduleToExecDelay=3),
IssueQue(name='fpIQ4' , inports=2, size=18, oports=[
IssuePort(fu=[FP_SLOW()])
], scheduleToExecDelay=3),
IssueQue(name='vecIQ0' , inports=2, size=16, oports=[
IssuePort(fu=[SIMD_Unit()]),
IssuePort(fu=[SIMD_Unit()])
], scheduleToExecDelay=3),
]
intSlotNum = 12
fpSlotNum = 12
xbarWakeup = True


class KunminghuScheduler(Scheduler):
Expand Down
26 changes: 15 additions & 11 deletions src/cpu/o3/FuncUnitConfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,21 +159,25 @@ class WritePort(FUDesc):

class RdWrPort(FUDesc):
opList = [ OpDesc(opClass='MemRead', opLat=2),
OpDesc(opClass='MemWrite', opLat=4),
OpDesc(opClass='FloatMemRead'),
OpDesc(opClass='FloatMemWrite'),
OpDesc(opClass='VectorUnitStrideLoad', opLat=2),
OpDesc(opClass='VectorUnitStrideMaskLoad', opLat=2),
OpDesc(opClass='VectorStridedLoad', opLat=2),
OpDesc(opClass='VectorIndexedLoad', opLat=2),
OpDesc(opClass='VectorUnitStrideFaultOnlyFirstLoad', opLat=2),
OpDesc(opClass='VectorWholeRegisterLoad', opLat=2),
OpDesc(opClass='MemWrite', opLat=2),
OpDesc(opClass='FloatMemRead', opLat=2),
OpDesc(opClass='FloatMemWrite', opLat=3),
OpDesc(opClass='VectorUnitStrideLoad', opLat=3),
OpDesc(opClass='VectorSegUnitStrideLoad', opLat=3),
OpDesc(opClass='VectorUnitStrideMaskLoad', opLat=3),
OpDesc(opClass='VectorSegUnitStrideMaskLoad', opLat=3),
OpDesc(opClass='VectorStridedLoad', opLat=3),
OpDesc(opClass='VectorSegStridedLoad', opLat=3),
OpDesc(opClass='VectorIndexedLoad', opLat=3),
OpDesc(opClass='VectorSegIndexedLoad', opLat=3),
OpDesc(opClass='VectorUnitStrideFaultOnlyFirstLoad', opLat=3),
OpDesc(opClass='VectorWholeRegisterLoad', opLat=3),
OpDesc(opClass='VectorUnitStrideStore'),
OpDesc(opClass='VectorSegUnitStrideStore'),
OpDesc(opClass='VectorUnitStrideMaskStore'),
OpDesc(opClass='VectorStridedStore'),
OpDesc(opClass='VectorIndexedStore'),
OpDesc(opClass='VectorWholeRegisterStore')
]
OpDesc(opClass='VectorWholeRegisterStore')]
count = 0

class IprPort(FUDesc):
Expand Down
8 changes: 8 additions & 0 deletions src/cpu/o3/inst_queue.cc
Original file line number Diff line number Diff line change
Expand Up @@ -521,6 +521,7 @@ InstructionQueue::processFUCompletion(const DynInstPtr &inst, int fu_idx)
bool
InstructionQueue::execLatencyCheck(const DynInstPtr& inst, uint32_t& op_latency)
{
// Leading zero count
auto lzc = [](RegVal val) {
for (int i = 0; i < 64; i++) {
if (val & (0x1lu << 63)) {
Expand All @@ -540,14 +541,20 @@ InstructionQueue::execLatencyCheck(const DynInstPtr& inst, uint32_t& op_latency)
rs2 = cpu->readArchIntReg(inst->srcRegIdx(1).index(),
inst->threadNumber);
// rs1 / rs2 : 0x80/0x8 ,delay_ = 4
// get the leading zero difference between rs1 and rs2 (rs1 > rs2)
delay_ = std::max(lzc(std::labs(rs2)) - lzc(std::labs(rs1)), 0);
if (rs2 == 1) {
// rs1 / 1 = rs1
op_latency = 6;
} else if (rs1 == rs2) {
// rs1 / rs2 = 1 rem 0
op_latency = 8;
} else if (lzc(std::labs(rs2)) - lzc(std::labs(rs1)) < 0) {
// if rs2 > rs1 then rs1/rs2 = 0 rem rs1
op_latency = 6;
} else {
// base_latency + dynamic_latency
// dynamic_latency determined by delay_
op_latency = 8 + delay_ / 4;
}
return true;
Expand All @@ -556,6 +563,7 @@ InstructionQueue::execLatencyCheck(const DynInstPtr& inst, uint32_t& op_latency)
inst->threadNumber);
rs2 = cpu->readArchFloatReg(inst->srcRegIdx(1).index(),
inst->threadNumber);
// for special values, fsqrt/fdiv early finish
switch (inst->staticInst->operWid()) {
case 32:
if (__isnanf(*((float*)(&rs1))) ||
Expand Down
Loading

0 comments on commit 2221b7b

Please sign in to comment.