diff --git a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm index 6c07416a2d..6112f38c7f 100644 --- a/src/mem/ruby/protocol/GPU_VIPER-TCC.sm +++ b/src/mem/ruby/protocol/GPU_VIPER-TCC.sm @@ -126,6 +126,7 @@ machine(MachineType:TCC, "TCC Cache") void unset_tbe(); void wakeUpAllBuffers(); void wakeUpBuffers(Addr a); + void wakeUpAllBuffers(Addr a); MachineID mapAddressToMachine(Addr addr, MachineType mtype); @@ -569,6 +570,14 @@ machine(MachineType:TCC, "TCC Cache") probeNetwork_in.dequeue(clockEdge()); } + action(st_stallAndWaitRequest, "st", desc="Stall and wait on the address") { + stall_and_wait(coreRequestNetwork_in, address); + } + + action(wada_wakeUpAllDependentsAddr, "wada", desc="Wake up any requests waiting for this address") { + wakeUpAllBuffers(address); + } + action(z_stall, "z", desc="stall") { // built-in } @@ -606,13 +615,22 @@ machine(MachineType:TCC, "TCC Cache") // they can cause a resource stall deadlock! transition(WI, {RdBlk, WrVicBlk, Atomic, WrVicBlkBack}) { //TagArrayRead} { - z_stall; + // put putting the stalled requests in a buffer, we reduce resource contention + // since they won't try again every cycle and will instead only try again once + // woken up + st_stallAndWaitRequest; } transition(A, {RdBlk, WrVicBlk, WrVicBlkBack}) { //TagArrayRead} { - z_stall; + // put putting the stalled requests in a buffer, we reduce resource contention + // since they won't try again every cycle and will instead only try again once + // woken up + st_stallAndWaitRequest; } transition(IV, {WrVicBlk, Atomic, WrVicBlkBack}) { //TagArrayRead} { - z_stall; + // put putting the stalled requests in a buffer, we reduce resource contention + // since they won't try again every cycle and will instead only try again once + // woken up + st_stallAndWaitRequest; } transition({M, V}, RdBlk) {TagArrayRead, DataArrayRead} { p_profileHit; @@ -660,9 +678,10 @@ transition(I, Atomic, A) {TagArrayRead} { transition(A, Atomic) { p_profileMiss; - at_atomicThrough; - ina_incrementNumAtomics; - p_popRequestQueue; + // put putting the stalled requests in a buffer, we reduce resource contention + // since they won't try again every cycle and will instead only try again once + // woken up + st_stallAndWaitRequest; } transition({M, W}, Atomic, WI) {TagArrayRead} { @@ -750,6 +769,7 @@ transition(I, Atomic, A) {TagArrayRead} { wcb_writeCacheBlock; sdr_sendDataResponse; pr_popResponseQueue; + wada_wakeUpAllDependentsAddr; dt_deallocateTBE; } @@ -762,6 +782,7 @@ transition(I, Atomic, A) {TagArrayRead} { transition(A, AtomicDone, I) {TagArrayRead, TagArrayWrite} { dt_deallocateTBE; + wada_wakeUpAllDependentsAddr; ptr_popTriggerQueue; } diff --git a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm index 4d24891b51..0138db36ac 100644 --- a/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm +++ b/src/mem/ruby/protocol/MOESI_AMD_Base-dir.sm @@ -1092,15 +1092,15 @@ machine(MachineType:Directory, "AMD Baseline protocol") stall_and_wait(dmaRequestQueue_in, address); } - action(wa_wakeUpDependents, "wa", desc="Wake up any requests waiting for this address") { + action(wad_wakeUpDependents, "wad", desc="Wake up any requests waiting for this address") { wakeUpBuffers(address); } - action(wa_wakeUpAllDependents, "waa", desc="Wake up any requests waiting for this region") { + action(wa_wakeUpAllDependents, "wa", desc="Wake up any requests waiting for this region") { wakeUpAllBuffers(); } - action(wa_wakeUpAllDependentsAddr, "waaa", desc="Wake up any requests waiting for this address") { + action(wada_wakeUpAllDependentsAddr, "wada", desc="Wake up any requests waiting for this address") { wakeUpAllBuffers(address); } @@ -1206,7 +1206,7 @@ machine(MachineType:Directory, "AMD Baseline protocol") d_writeDataToMemory; al_allocateL3Block; pr_profileL3HitMiss; //Must come after al_allocateL3Block and before dt_deallocateTBE - wa_wakeUpDependents; + wad_wakeUpDependents; dt_deallocateTBE; pr_popResponseQueue; } @@ -1232,12 +1232,12 @@ machine(MachineType:Directory, "AMD Baseline protocol") } transition({B}, CoreUnblock, U) { - wa_wakeUpAllDependentsAddr; + wada_wakeUpAllDependentsAddr; pu_popUnblockQueue; } transition(B, UnblockWriteThrough, U) { - wa_wakeUpDependents; + wada_wakeUpAllDependentsAddr; pt_popTriggerQueue; } @@ -1280,7 +1280,7 @@ machine(MachineType:Directory, "AMD Baseline protocol") transition(BDR_M, MemData, U) { mt_writeMemDataToTBE; dd_sendResponseDmaData; - wa_wakeUpAllDependentsAddr; + wada_wakeUpAllDependentsAddr; dt_deallocateTBE; pm_popMemQueue; } @@ -1365,7 +1365,7 @@ machine(MachineType:Directory, "AMD Baseline protocol") transition(BDW_P, ProbeAcksComplete, U) { // Check for pending requests from the core we put to sleep while waiting // for a response - wa_wakeUpAllDependentsAddr; + wada_wakeUpAllDependentsAddr; dt_deallocateTBE; pt_popTriggerQueue; } @@ -1374,7 +1374,7 @@ machine(MachineType:Directory, "AMD Baseline protocol") dd_sendResponseDmaData; // Check for pending requests from the core we put to sleep while waiting // for a response - wa_wakeUpAllDependentsAddr; + wada_wakeUpAllDependentsAddr; dt_deallocateTBE; pt_popTriggerQueue; }