diff --git a/.jenkins/build_test_run.sh b/.jenkins/build_test_run.sh index 22e48655c9..b76867a2fe 100644 --- a/.jenkins/build_test_run.sh +++ b/.jenkins/build_test_run.sh @@ -55,6 +55,8 @@ test () { } # Run default program with and without specified configuration +# Space needed before 'retired' and 'cycles' to differentiate from other +# subfeilds. E.g., 'branches.retired' run () { cd "$SIMENG_INSTALL" || exit @@ -62,16 +64,16 @@ run () { echo "Simulation without configuration file argument:" cat run echo "" - compare_outputs "$(grep "retired:" run | rev | cut -d ' ' -f1 | rev)" "6721" "retired instructions" - compare_outputs "$(grep "cycles:" run | rev | cut -d ' ' -f1 | rev)" "6721" "simulated cycles" + compare_outputs "$(grep " retired:" run | rev | cut -d ' ' -f1 | rev)" "6721" "retired instructions" + compare_outputs "$(grep " cycles:" run | rev | cut -d ' ' -f1 | rev)" "6721" "simulated cycles" echo "" ./bin/simeng "$SIMENG_TOP"/configs/tx2.yaml > run echo "Simulation with configuration file argument:" cat run echo "" - compare_outputs "$(grep "retired:" run | rev | cut -d ' ' -f1 | rev)" "6724" "retired instructions" - compare_outputs "$(grep "cycles:" run | rev | cut -d ' ' -f1 | rev)" "8612" "simulated cycles" + compare_outputs "$(grep " retired:" run | rev | cut -d ' ' -f1 | rev)" "6724" "retired instructions" + compare_outputs "$(grep " cycles:" run | rev | cut -d ' ' -f1 | rev)" "7867" "simulated cycles" echo "" } diff --git a/docs/sphinx/developer/components/branchPred.rst b/docs/sphinx/developer/components/branchPred.rst index ba91fe98d9..6a03c85129 100644 --- a/docs/sphinx/developer/components/branchPred.rst +++ b/docs/sphinx/developer/components/branchPred.rst @@ -3,7 +3,7 @@ Branch prediction SimEng's fetch unit is supplied with an instance of the abstract ``BranchPredictor`` class to enable speculative execution. -Access to the ``BranchPredictor`` is supported through the ``predict``, ``update``, and ``flush`` functions. ``predict`` provides a branch prediction, both target and direction, ``update`` updates an instructions' prediction, and ``flush`` provides optional algorithm specific flushing functionality. +Access to the ``BranchPredictor`` is supported through the ``predict``, ``update``, and ``flush`` functions. ``predict`` provides a branch prediction, both target and direction, for a branch instruction. ``update`` updates the branch predictor's prediction mechanism on the actual outcome of a branch. ``flush`` provides algorithm specific flushing functionality. The ``predict`` function is passed an instruction address, branch type, and a possible known target. The branch type argument currently supports the following types: @@ -17,13 +17,15 @@ The usage of these parameters within a branch predictor's ``predict`` function i The ``update`` function is passed the branch outcome, the instruction address, and the branch type. From this information, any algorithms or branch structures may be updated. +The state of the branch predictor when ``predict`` is called on a branch is stored in the ``ftq`` to be used by the ``update`` function. For instance, the perceptron predictor stores the globalHistory and confidence for each prediction, but future predictors may store alternative state. The ``ftq`` is a queue that has an entry for each in-flight branch. A single entry is added to the back of the ftq on ``predict``, and a single entry is removed from the front of the queue on ``update`` and from the back of the queue on ``flush``. + Generic Predictor ----------------- The algorithm(s) held within a ``BranchPredictor`` class instance can be model-specific, however, SimEng provides a ``GenericPredictor`` which contains the following logic. Global History - For indexing relevant prediction structures, a global history can be utilised. The global history value uses n-bits to store the n most recent branch direction outcomes, with the left-most bit being the oldest. + For indexing relevant prediction structures, a global history can be utilised. The global history value stores the n most recent branch direction outcomes in an unsigned integer, with the least-significant bit being the most recent branch direction. The global history is speculatively updated on ``predict``, and is corrected if needed on ``update`` and ``flush``. To facilitate this speculative updating, and rolling-back on correction, for a global history of n the branch predictor keeps track of the 2n most recent branch outcomes. Valid values for Global History are 1-32. Branch Target Buffer (BTB) For each entry, the BTB stores the most recent target along with an n-bit saturating counter for an associated direction. The indexing of this structure uses the lower bits of an instruction address XOR'ed with the current global branch history value. @@ -41,7 +43,7 @@ Perceptron Predictor The ``PerceptronPredictor`` has the same overall structure as the ``GenericPredictor`` but replaces the saturating counter as a means for direction prediction with a perceptron. The ``PerceptronPredictor`` contains the following logic. Global History - For indexing relevant prediction structures and for retrieving a direction from the perceptrons, a global history can be utilised. The global history value uses n-bits to store the n most recent branch direction outcomes, with the left-most bit being the oldest. + For indexing relevant prediction structures, a global history can be utilised. The global history value stores the n most recent branch direction outcomes in an unsigned integer, with the least-significant bit being the most recent branch direction. The global history is speculatively updated on ``predict``, and is corrected if needed on ``update`` and ``flush``. To facilitate this speculative updating, and rolling-back on correction, for a global history of n the branch predictor keeps track of the 2n most recent branch outcomes. Valid values for Global History are 1-32. Branch Target Buffer (BTB) For each entry, the BTB stores the most recent target along with a perceptron for an associated direction. The indexing of this structure uses the lower, non-zero bits of an instruction address XOR'ed with the current global branch history value. diff --git a/docs/sphinx/user/running_simeng.rst b/docs/sphinx/user/running_simeng.rst index 92a9682032..0a97fc50fe 100644 --- a/docs/sphinx/user/running_simeng.rst +++ b/docs/sphinx/user/running_simeng.rst @@ -26,7 +26,8 @@ Exit Clause The reason why the simulation has halted. Most commonly this is due to the invoking of the ``exit()`` system call by the workload under simulation. Statistics - A selection of simulation statistics describing the emergent simulated PMU-style hardware events. + A selection of simulation statistics describing the emergent simulated PMU-style hardware events. With respect to branch statistics, the misprediction rate +is calculated as branches mispredicted / branches retired. All non-workload outputs from SimEng are prefixed with a tag of the format ``[SimEng:Object]`` (e.g. ``[SimEng:ExceptionHandler]``). If the output came from the root of the framework, the ``Object`` field is omitted. diff --git a/src/include/simeng/BranchPredictor.hh b/src/include/simeng/BranchPredictor.hh deleted file mode 100644 index 8e2ddd0797..0000000000 --- a/src/include/simeng/BranchPredictor.hh +++ /dev/null @@ -1,65 +0,0 @@ -#pragma once - -#include -#include - -namespace simeng { - -/** The types of branches recognised. */ -enum class BranchType { - Conditional = 0, - LoopClosing, - Return, - SubroutineCall, - Unconditional, - Unknown -}; - -/** A branch result prediction for an instruction. */ -struct BranchPrediction { - /** Whether the branch will be taken. */ - bool taken; - - /** The branch instruction's target address. If `taken = false`, the value - * will be ignored. */ - uint64_t target; - - /** Check for equality of two branch predictions . */ - bool operator==(const BranchPrediction& other) { - if ((taken == other.taken) && (target == other.target)) - return true; - else - return false; - } - - /** Check for inequality of two branch predictions . */ - bool operator!=(const BranchPrediction& other) { - if ((taken != other.taken) || (target != other.target)) - return true; - else - return false; - } -}; - -/** An abstract branch predictor interface. */ -class BranchPredictor { - public: - virtual ~BranchPredictor(){}; - - /** Generate a branch prediction for the specified instruction address with a - * branch type and possible known branch offset. */ - virtual BranchPrediction predict(uint64_t address, BranchType type, - int64_t knownOffset) = 0; - - /** Provide branch results to update the prediction model for the specified - * instruction address. */ - virtual void update(uint64_t address, bool taken, uint64_t targetAddress, - BranchType type) = 0; - - /** Provides flushing behaviour for the implemented branch prediction schemes - * via the instruction address. - */ - virtual void flush(uint64_t address) = 0; -}; - -} // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/CoreInstance.hh b/src/include/simeng/CoreInstance.hh index 2dc064fa50..64e2f9e1f5 100644 --- a/src/include/simeng/CoreInstance.hh +++ b/src/include/simeng/CoreInstance.hh @@ -2,15 +2,15 @@ #include -#include "simeng/AlwaysNotTakenPredictor.hh" #include "simeng/Core.hh" #include "simeng/Elf.hh" -#include "simeng/GenericPredictor.hh" -#include "simeng/PerceptronPredictor.hh" #include "simeng/SpecialFileDirGen.hh" #include "simeng/arch/Architecture.hh" #include "simeng/arch/aarch64/Architecture.hh" #include "simeng/arch/riscv/Architecture.hh" +#include "simeng/branchpredictors/AlwaysNotTakenPredictor.hh" +#include "simeng/branchpredictors/GenericPredictor.hh" +#include "simeng/branchpredictors/PerceptronPredictor.hh" #include "simeng/config/SimInfo.hh" #include "simeng/kernel/Linux.hh" #include "simeng/memory/FixedLatencyMemoryInterface.hh" diff --git a/src/include/simeng/Instruction.hh b/src/include/simeng/Instruction.hh index 0fbaa932d5..b5a4e33e3b 100644 --- a/src/include/simeng/Instruction.hh +++ b/src/include/simeng/Instruction.hh @@ -3,9 +3,9 @@ #include #include "capstone/capstone.h" -#include "simeng/BranchPredictor.hh" #include "simeng/Register.hh" #include "simeng/RegisterValue.hh" +#include "simeng/branchpredictors/BranchPrediction.hh" #include "simeng/memory/MemoryInterface.hh" #include "simeng/span.hh" @@ -154,10 +154,10 @@ class Instruction { bool wasBranchMispredicted() const { assert(executed_ && "Branch misprediction check requires instruction to have executed"); - // Flag as mispredicted if taken state was wrongly predicted, or taken and - // predicted target is wrong - return (branchTaken_ != prediction_.taken || - (branchTaken_ && (prediction_.target != branchAddress_))); + // Flag as mispredicted if taken state was wrongly predicted, or taken + // and predicted target is wrong + return ((branchTaken_ != prediction_.isTaken) || + (prediction_.target != branchAddress_)); } /** Check whether an exception has been encountered while processing this @@ -178,7 +178,7 @@ class Instruction { * executing it. */ uint16_t getStallCycles() const { return stallCycles_; } - /** Retrieve the number of cycles this instruction will take to be prcoessed + /** Retrieve the number of cycles this instruction will take to be processed * by the LSQ. */ uint16_t getLSQLatency() const { return lsqExecutionLatency_; } diff --git a/src/include/simeng/arch/Architecture.hh b/src/include/simeng/arch/Architecture.hh index 70782c079c..aa293d6f5f 100644 --- a/src/include/simeng/arch/Architecture.hh +++ b/src/include/simeng/arch/Architecture.hh @@ -3,10 +3,10 @@ #include #include -#include "simeng/BranchPredictor.hh" #include "simeng/Core.hh" #include "simeng/Instruction.hh" #include "simeng/arch/ProcessStateChange.hh" +#include "simeng/branchpredictors/BranchPredictor.hh" #include "simeng/kernel/Linux.hh" #include "simeng/memory/MemoryInterface.hh" diff --git a/src/include/simeng/arch/aarch64/Instruction.hh b/src/include/simeng/arch/aarch64/Instruction.hh index 315a555a00..76e74d7eb7 100644 --- a/src/include/simeng/arch/aarch64/Instruction.hh +++ b/src/include/simeng/arch/aarch64/Instruction.hh @@ -3,10 +3,10 @@ #include #include -#include "simeng/BranchPredictor.hh" #include "simeng/Instruction.hh" #include "simeng/arch/aarch64/InstructionGroups.hh" #include "simeng/arch/aarch64/operandContainer.hh" +#include "simeng/branchpredictors/BranchPredictor.hh" struct cs_arm64_op; diff --git a/src/include/simeng/arch/riscv/Instruction.hh b/src/include/simeng/arch/riscv/Instruction.hh index 888900ba18..9e707449c6 100644 --- a/src/include/simeng/arch/riscv/Instruction.hh +++ b/src/include/simeng/arch/riscv/Instruction.hh @@ -5,9 +5,9 @@ #include #include -#include "simeng/BranchPredictor.hh" #include "simeng/Instruction.hh" #include "simeng/arch/riscv/InstructionGroups.hh" +#include "simeng/branchpredictors/BranchPredictor.hh" namespace simeng { namespace arch { diff --git a/src/include/simeng/AlwaysNotTakenPredictor.hh b/src/include/simeng/branchpredictors/AlwaysNotTakenPredictor.hh similarity index 56% rename from src/include/simeng/AlwaysNotTakenPredictor.hh rename to src/include/simeng/branchpredictors/AlwaysNotTakenPredictor.hh index 7ec8027d4b..382a495420 100644 --- a/src/include/simeng/AlwaysNotTakenPredictor.hh +++ b/src/include/simeng/branchpredictors/AlwaysNotTakenPredictor.hh @@ -1,11 +1,11 @@ #pragma once -#include "simeng/BranchPredictor.hh" +#include "simeng/branchpredictors/BranchPredictor.hh" namespace simeng { -/** An "Always Not Taken" branch predictor; predicts all branches as not taken. - */ +/** An "Always Not Taken" branch predictor; predicts all branches as not + * taken. */ class AlwaysNotTakenPredictor : public BranchPredictor { public: /** Generate a branch prediction for the specified instruction address; will @@ -13,14 +13,18 @@ class AlwaysNotTakenPredictor : public BranchPredictor { BranchPrediction predict(uint64_t address, BranchType type, int64_t knownOffset) override; - /** Provide branch results to update the prediction model for the specified - * instruction address. As this model is static, this does nothing. */ - void update(uint64_t address, bool taken, uint64_t targetAddress, - BranchType type) override; + /** Updates appropriate predictor model objects based on the address, type and + * outcome of the branch instruction. Update must be called on + * branches in program order. To check this, instructionId is also passed + * to this function. */ + void update(uint64_t address, bool isTaken, uint64_t targetAddress, + BranchType type, uint64_t instructionId) override; /** Provide flush logic for branch prediction scheme. As there's no flush * logic for an always taken predictor, this does nothing. */ void flush(uint64_t address) override; + + private: }; } // namespace simeng diff --git a/src/include/simeng/branchpredictors/BranchPrediction.hh b/src/include/simeng/branchpredictors/BranchPrediction.hh new file mode 100644 index 0000000000..aac7de52ea --- /dev/null +++ b/src/include/simeng/branchpredictors/BranchPrediction.hh @@ -0,0 +1,37 @@ +#pragma once + +#include + +namespace simeng { + +/** The types of branches recognised. */ +enum class BranchType { + Conditional = 0, + LoopClosing, + Return, + SubroutineCall, + Unconditional, + Unknown +}; + +/** A branch result prediction for an instruction. */ +struct BranchPrediction { + /** Whether the branch will be taken. */ + bool isTaken; + + /** The branch instruction's target address. If `isTaken == false`, the value + * will be ignored. */ + uint64_t target; + + /** Check for equality of two branch predictions . */ + bool operator==(const BranchPrediction& other) { + return ((isTaken == other.isTaken) && (target == other.target)); + } + + /** Check for inequality of two branch predictions . */ + bool operator!=(const BranchPrediction& other) { + return ((isTaken != other.isTaken) || (target != other.target)); + } +}; + +} // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/branchpredictors/BranchPredictor.hh b/src/include/simeng/branchpredictors/BranchPredictor.hh new file mode 100644 index 0000000000..7779fe0703 --- /dev/null +++ b/src/include/simeng/branchpredictors/BranchPredictor.hh @@ -0,0 +1,97 @@ +#pragma once + +#include +#include + +#include "simeng/Instruction.hh" +#include "simeng/branchpredictors/BranchPrediction.hh" +#include "simeng/pipeline/PipelineBuffer.hh" + +namespace simeng { + +/** An abstract branch predictor interface. */ +class BranchPredictor { + public: + virtual ~BranchPredictor(){}; + + /** Generate a branch prediction for the supplied instruction address, a + * branch type, and a known branch offset. Returns a branch direction and + * branch target address. */ + virtual BranchPrediction predict(uint64_t address, BranchType type, + int64_t knownOffset) = 0; + + /** Updates appropriate predictor model objects based on the address, type and + * outcome of the branch instruction. Update must be called on + * branches in program order. To check this, instructionId is also passed + * to this function. */ + virtual void update(uint64_t address, bool isTaken, uint64_t targetAddress, + BranchType type, uint64_t instructionId) = 0; + + /** Provides flushing behaviour for the implemented branch prediction schemes + * via the instruction address. Branches must be flushed in reverse + * program order (though, if a block of n instructions is being flushed at + * once, the exact order that the individual instructions within this block + * are flushed does not matter so long as they are all flushed). */ + virtual void flush(uint64_t address) = 0; + + /** + * Overloaded function for flushing branch instructions from a + * PipelineBuffer. Accepts a reference to a PipelineBuffer of microOps. + * Iterates over the entries of the PipelineBuffer and, if they are a + * branch instruction, flushes them. + */ + void flushBranchesInBufferFromSelf( + pipeline::PipelineBuffer>& buffer) { + for (size_t slot = 0; slot < buffer.getWidth(); slot++) { + auto& uop = buffer.getTailSlots()[slot]; + if (uop != nullptr && uop->isBranch()) { + flush(uop->getInstructionAddress()); + } + + uop = buffer.getHeadSlots()[slot]; + if (uop != nullptr && uop->isBranch()) { + flush(uop->getInstructionAddress()); + } + } + } + + /** + * Overloaded function for flushing branch instructions from a + * PipelineBuffer. Accepts a reference to a PipelineBuffer macroOps. + * Iterates over the entries of the PipelineBuffer and, if they are a + * branch instruction, flushes them. + */ + void flushBranchesInBufferFromSelf( + pipeline::PipelineBuffer>>& + buffer) { + for (size_t slot = 0; slot < buffer.getWidth(); slot++) { + auto& macroOp = buffer.getTailSlots()[slot]; + for (size_t uop = 0; uop < macroOp.size(); uop++) { + if (macroOp[uop]->isBranch()) { + flush(macroOp[uop]->getInstructionAddress()); + } + } + macroOp = buffer.getHeadSlots()[slot]; + for (size_t uop = 0; uop < macroOp.size(); uop++) { + if (macroOp[uop]->isBranch()) { + flush(macroOp[uop]->getInstructionAddress()); + } + } + } + } + + /** lastUpdatedInstructionId_ is used only in debug mode. Clang throws a + * warning (which becomes an error with our cmake flags) for unused + * variables. If the [[maybe_unused]] attribute is added to avoid this, + * then gcc throws a warning (which becomes an error) because it ignores + * this attribute. Therefore, to avoid the above catch 22, this variable is + * hidden behind an ifdef such that it is declared only in debug mode; when + * it is used. */ +#ifndef NDEBUG + /** The Id of the last instruction that update was called on -- used to + * ensure that update is called in program order. */ + uint64_t lastUpdatedInstructionId_ = 0; +#endif +}; + +} // namespace simeng \ No newline at end of file diff --git a/src/include/simeng/GenericPredictor.hh b/src/include/simeng/branchpredictors/GenericPredictor.hh similarity index 54% rename from src/include/simeng/GenericPredictor.hh rename to src/include/simeng/branchpredictors/GenericPredictor.hh index 28e7ccbaac..ae1aff6d05 100644 --- a/src/include/simeng/GenericPredictor.hh +++ b/src/include/simeng/branchpredictors/GenericPredictor.hh @@ -1,10 +1,11 @@ #pragma once +#include #include #include #include -#include "simeng/BranchPredictor.hh" +#include "simeng/branchpredictors/BranchPredictor.hh" #include "simeng/config/SimInfo.hh" namespace simeng { @@ -27,17 +28,23 @@ class GenericPredictor : public BranchPredictor { ~GenericPredictor(); /** Generate a branch prediction for the supplied instruction address, a - * branch type, and a known branch offset; defaults to 0 meaning offset is not - * known. Returns a branch direction and branch target address. */ + * branch type, and a known branch offset. Returns a branch direction and + * branch target address. */ BranchPrediction predict(uint64_t address, BranchType type, - int64_t knownOffset = 0) override; - - /** Updates appropriate predictor model objects based on the address and - * outcome of the branch instruction. */ - void update(uint64_t address, bool taken, uint64_t targetAddress, - BranchType type) override; - - /** Provides RAS rewinding behaviour. */ + int64_t knownOffset) override; + + /** Updates appropriate predictor model objects based on the address, type and + * outcome of the branch instruction. Update must be called on + * branches in program order. To check this, instructionId is also passed + * to this function. */ + void update(uint64_t address, bool isTaken, uint64_t targetAddress, + BranchType type, uint64_t instructionId) override; + + /** Provides flushing behaviour for the implemented branch prediction schemes + * via the instruction address. Branches must be flushed in reverse + * program order (though, if a block of n instructions is being flushed at + * once, the exact order that the individual instructions within this block + * are flushed does not matter so long as they are all flushed). */ void flush(uint64_t address) override; private: @@ -48,19 +55,25 @@ class GenericPredictor : public BranchPredictor { * counter and a branch target. */ std::vector> btb_; - /** The previous BTB index calculated for an address. */ - std::map btbHistory_; + /** Fetch Target Queue containing the direction prediction and previous global + * history state of branches that are currently unresolved */ + std::deque> ftq_; /** The number of bits used to form the saturating counter in a BTB entry. */ uint8_t satCntBits_; - /** A n-bit history of previous branch directions where n is equal to - * globalHistoryLength_. */ + /** An n-bit history of previous branch directions where n is equal to + * globalHistoryLength_. Each bit represents a branch taken (1) or not + * taken (0), with the most recent branch being the least-significant-bit */ uint64_t globalHistory_ = 0; /** The number of previous branch directions recorded globally. */ uint16_t globalHistoryLength_; + /** A bit mask for truncating the global history to the correct size. + * Stored as a member variable to avoid duplicative calculation */ + uint64_t globalHistoryMask_; + /** A return address stack. */ std::deque ras_; diff --git a/src/include/simeng/PerceptronPredictor.hh b/src/include/simeng/branchpredictors/PerceptronPredictor.hh similarity index 60% rename from src/include/simeng/PerceptronPredictor.hh rename to src/include/simeng/branchpredictors/PerceptronPredictor.hh index b76e4dd7e4..d9e05bca52 100644 --- a/src/include/simeng/PerceptronPredictor.hh +++ b/src/include/simeng/branchpredictors/PerceptronPredictor.hh @@ -1,10 +1,11 @@ #pragma once +#include #include #include #include -#include "simeng/BranchPredictor.hh" +#include "simeng/branchpredictors/BranchPredictor.hh" #include "simeng/config/SimInfo.hh" namespace simeng { @@ -30,17 +31,23 @@ class PerceptronPredictor : public BranchPredictor { ~PerceptronPredictor(); /** Generate a branch prediction for the supplied instruction address, a - * branch type, and a known branch offset; defaults to 0 meaning offset is not - * known. Returns a branch direction and branch target address. */ + * branch type, and a known branch offset. Returns a branch direction and + * branch target address. */ BranchPrediction predict(uint64_t address, BranchType type, - int64_t knownOffset = 0) override; - - /** Updates appropriate predictor model objects based on the address and - * outcome of the branch instruction. */ - void update(uint64_t address, bool taken, uint64_t targetAddress, - BranchType type) override; - - /** Provides RAS rewinding behaviour. */ + int64_t knownOffset) override; + + /** Updates appropriate predictor model objects based on the address, type and + * outcome of the branch instruction. Update must be called on + * branches in program order. To check this, instructionId is also passed + * to this function. */ + void update(uint64_t address, bool isTaken, uint64_t targetAddress, + BranchType type, uint64_t instructionId) override; + + /** Provides flushing behaviour for the implemented branch prediction schemes + * via the instruction address. Branches must be flushed in reverse + * program order (though, if a block of n instructions is being flushed at + * once, the exact order that the individual instructions within this block + * are flushed does not matter so long as they are all flushed). */ void flush(uint64_t address) override; private: @@ -59,16 +66,26 @@ class PerceptronPredictor : public BranchPredictor { * in Jiminez and Lin */ std::vector, uint64_t>> btb_; - /** The previous hashed index for an address. */ - std::map btbHistory_; + /** Fetch Target Queue containing the dot product of the perceptron and the + * global history; and the global history, both at the time of prediction, + * for each of the branch instructions that are currently unresolved. The dot + * product represents the confidence of the perceptrons direction + * prediction and is needed for a correct update when the branch + * instruction is resolved. */ + std::deque> ftq_; /** An n-bit history of previous branch directions where n is equal to - * globalHistoryLength_. */ + * globalHistoryLength_. Each bit represents a branch taken (1) or not + * taken (0), with the most recent branch being the least-significant-bit */ uint64_t globalHistory_ = 0; /** The number of previous branch directions recorded globally. */ uint64_t globalHistoryLength_; + /** A bit mask for truncating the global history to the correct size. + * Stored as a member variable to avoid duplicative calculation */ + uint64_t globalHistoryMask_; + /** The magnitude of the dot product of the perceptron and the global history, * below which the perceptron's weight must be updated */ uint64_t trainingThreshold_; diff --git a/src/include/simeng/models/outoforder/Core.hh b/src/include/simeng/models/outoforder/Core.hh index 2b1b16f8a0..82c3de37cd 100644 --- a/src/include/simeng/models/outoforder/Core.hh +++ b/src/include/simeng/models/outoforder/Core.hh @@ -134,6 +134,9 @@ class Core : public simeng::Core { /** A pointer to the instruction responsible for generating the exception. */ std::shared_ptr exceptionGeneratingInstruction_; + + /** Reference to the current branch predictor */ + BranchPredictor& branchPredictor_; }; } // namespace outoforder diff --git a/src/include/simeng/pipeline/ExecuteUnit.hh b/src/include/simeng/pipeline/ExecuteUnit.hh index 14d8b47e7c..cd11eb23d6 100644 --- a/src/include/simeng/pipeline/ExecuteUnit.hh +++ b/src/include/simeng/pipeline/ExecuteUnit.hh @@ -3,7 +3,6 @@ #include #include -#include "simeng/BranchPredictor.hh" #include "simeng/Instruction.hh" #include "simeng/pipeline/PipelineBuffer.hh" @@ -33,8 +32,7 @@ class ExecuteUnit { std::function&)> handleLoad, std::function&)> handleStore, std::function&)> raiseException, - BranchPredictor& predictor, bool pipelined = true, - const std::vector& blockingGroups = {}); + bool pipelined = true, const std::vector& blockingGroups = {}); /** Tick the execute unit. Places incoming instructions into the pipeline and * executes an instruction that has reached the head of the pipeline, if @@ -56,12 +54,6 @@ class ExecuteUnit { * stall, if applicable. */ void purgeFlushed(); - /** Retrieve the number of branch instructions that have been executed. */ - uint64_t getBranchExecutedCount() const; - - /** Retrieve the number of branch mispredictions. */ - uint64_t getBranchMispredictedCount() const; - /** Retrieve the number of active execution cycles. */ uint64_t getCycles() const; @@ -91,10 +83,6 @@ class ExecuteUnit { /** A function handle called upon exception generation. */ std::function&)> raiseException_; - /** A reference to the branch predictor, for updating with prediction results. - */ - BranchPredictor& predictor_; - /** Whether this unit is pipelined, or if all instructions should stall until * complete. */ bool pipelined_; @@ -129,12 +117,6 @@ class ExecuteUnit { /** The cycle this unit will become unstalled. */ uint64_t stallUntil_ = 0; - /** The number of branch instructions that were executed. */ - uint64_t branchesExecuted_ = 0; - - /** The number of branch mispredictions that were observed. */ - uint64_t branchMispredicts_ = 0; - /** The number of active execution cycles that were observed. */ uint64_t cycles_ = 0; }; diff --git a/src/include/simeng/pipeline/FetchUnit.hh b/src/include/simeng/pipeline/FetchUnit.hh index 09e8bb31f6..0eebb3c6b9 100644 --- a/src/include/simeng/pipeline/FetchUnit.hh +++ b/src/include/simeng/pipeline/FetchUnit.hh @@ -69,6 +69,9 @@ class FetchUnit { /** Clear the loop buffer. */ void flushLoopBuffer(); + /** Retrieve the number of branch instructions that have been fetched. */ + uint64_t getBranchFetchedCount() const; + private: /** An output buffer connecting this unit to the decode unit. */ PipelineBuffer& output_; @@ -118,6 +121,9 @@ class FetchUnit { /** The amount of data currently in the fetch buffer. */ uint16_t bufferedBytes_ = 0; + /** The number of branch instructions that were fetched. */ + uint64_t branchesFetched_ = 0; + /** Let the following PipelineFetchUnitTest derived classes be a friend of * this class to allow proper testing of 'tick' function. */ friend class PipelineFetchUnitTest_invalidMinBytesAtEndOfBuffer_Test; diff --git a/src/include/simeng/pipeline/ReorderBuffer.hh b/src/include/simeng/pipeline/ReorderBuffer.hh index 278a0174ca..2e6e68e37b 100644 --- a/src/include/simeng/pipeline/ReorderBuffer.hh +++ b/src/include/simeng/pipeline/ReorderBuffer.hh @@ -4,6 +4,7 @@ #include #include "simeng/Instruction.hh" +#include "simeng/branchpredictors/BranchPredictor.hh" #include "simeng/pipeline/LoadStoreQueue.hh" #include "simeng/pipeline/RegisterAliasTable.hh" @@ -85,6 +86,12 @@ class ReorderBuffer { /** Get the number of speculated loads which violated load-store ordering. */ uint64_t getViolatingLoadsCount() const; + /** Retrieve the number of branch mispredictions. */ + uint64_t getBranchMispredictedCount() const; + + /** Retrieve the number of retired brancehs. */ + uint64_t getRetiredBranchesCount() const; + private: /** A reference to the register alias table. */ RegisterAliasTable& rat_; @@ -144,6 +151,12 @@ class ReorderBuffer { /** The number of speculative loads which violated load-store ordering. */ uint64_t loadViolations_ = 0; + + /** The number of branch mispredictions that were observed. */ + uint64_t branchMispredicts_ = 0; + + /** The number of retired branch instructions */ + uint64_t retiredBranches_ = 0; }; } // namespace pipeline diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt index ffabd8bbca..ae659e2338 100644 --- a/src/lib/CMakeLists.txt +++ b/src/lib/CMakeLists.txt @@ -14,6 +14,9 @@ set(SIMENG_SOURCES arch/riscv/Instruction_decode.cc arch/riscv/Instruction_execute.cc arch/riscv/InstructionMetadata.cc + branchpredictors/AlwaysNotTakenPredictor.cc + branchpredictors/GenericPredictor.cc + branchpredictors/PerceptronPredictor.cc config/ModelConfig.cc kernel/Linux.cc kernel/LinuxProcess.cc @@ -35,13 +38,10 @@ set(SIMENG_SOURCES pipeline/RenameUnit.cc pipeline/ReorderBuffer.cc pipeline/WritebackUnit.cc - AlwaysNotTakenPredictor.cc ArchitecturalRegisterFileSet.cc CMakeLists.txt CoreInstance.cc Elf.cc - GenericPredictor.cc - PerceptronPredictor.cc RegisterFileSet.cc RegisterValue.cc SpecialFileDirGen.cc diff --git a/src/lib/arch/aarch64/Instruction.cc b/src/lib/arch/aarch64/Instruction.cc index 13c75117df..e3b697433e 100644 --- a/src/lib/arch/aarch64/Instruction.cc +++ b/src/lib/arch/aarch64/Instruction.cc @@ -108,7 +108,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { if (!isBranch()) { // Instruction isn't a branch; if predicted as taken, it will require a // flush - return {prediction_.taken, instructionAddress_ + 4}; + return {prediction_.isTaken, instructionAddress_ + 4}; } // Not enough information to determine this was a misprediction diff --git a/src/lib/arch/riscv/Instruction.cc b/src/lib/arch/riscv/Instruction.cc index 29c4793c44..c71b581a60 100644 --- a/src/lib/arch/riscv/Instruction.cc +++ b/src/lib/arch/riscv/Instruction.cc @@ -103,7 +103,7 @@ std::tuple Instruction::checkEarlyBranchMisprediction() const { if (!isBranch()) { // Instruction isn't a branch; if predicted as taken, it will require a // flush - return {prediction_.taken, instructionAddress_ + 4}; + return {prediction_.isTaken, instructionAddress_ + 4}; } // Not enough information to determine this was a misprediction diff --git a/src/lib/AlwaysNotTakenPredictor.cc b/src/lib/branchpredictors/AlwaysNotTakenPredictor.cc similarity index 73% rename from src/lib/AlwaysNotTakenPredictor.cc rename to src/lib/branchpredictors/AlwaysNotTakenPredictor.cc index b7f33e6c22..f9ccb416bc 100644 --- a/src/lib/AlwaysNotTakenPredictor.cc +++ b/src/lib/branchpredictors/AlwaysNotTakenPredictor.cc @@ -1,15 +1,14 @@ -#include "simeng/AlwaysNotTakenPredictor.hh" +#include "simeng/branchpredictors/AlwaysNotTakenPredictor.hh" namespace simeng { - BranchPrediction AlwaysNotTakenPredictor::predict( [[maybe_unused]] uint64_t address, BranchType type, int64_t knownOffset) { return {false, 0}; } void AlwaysNotTakenPredictor::update(uint64_t address, bool taken, - uint64_t targetAddress, BranchType type) {} + uint64_t targetAddress, BranchType type, + uint64_t instructionId) {} void AlwaysNotTakenPredictor::flush(uint64_t address) {} - } // namespace simeng diff --git a/src/lib/GenericPredictor.cc b/src/lib/branchpredictors/GenericPredictor.cc similarity index 53% rename from src/lib/GenericPredictor.cc rename to src/lib/branchpredictors/GenericPredictor.cc index 7379c661af..fce3fd2b1d 100644 --- a/src/lib/GenericPredictor.cc +++ b/src/lib/branchpredictors/GenericPredictor.cc @@ -1,4 +1,4 @@ -#include "simeng/GenericPredictor.hh" +#include "simeng/branchpredictors/GenericPredictor.hh" #include @@ -20,37 +20,43 @@ GenericPredictor::GenericPredictor(ryml::ConstNodeRef config) ? weaklyTaken : (weaklyTaken - 1); // Create branch prediction structures - btb_ = - std::vector>(1 << btbBits_, {satCntVal, 0}); - // Alter globalHistoryLength_ value to better suit required format in update() - globalHistoryLength_ = (1 << globalHistoryLength_) - 1; + btb_ = std::vector>(1ull << btbBits_, + {satCntVal, 0}); + + // Generate a bitmask that is used to ensure only the relevant number of + // bits are stored in the global history. This is two times the + // globalHistoryLength_ to allow rolling back of the speculatively updated + // global history in the event of a misprediction. + globalHistoryMask_ = (1ull << (globalHistoryLength_ * 2)) - 1; } GenericPredictor::~GenericPredictor() { btb_.clear(); ras_.clear(); rasHistory_.clear(); + ftq_.clear(); } BranchPrediction GenericPredictor::predict(uint64_t address, BranchType type, int64_t knownOffset) { - // Get index via an XOR hash between the global history and the lower btbBits_ - // bits of the instruction address - uint64_t hashedIndex = (address & ((1 << btbBits_) - 1)) ^ globalHistory_; - btbHistory_[address] = hashedIndex; + // Get index via an XOR hash between the global history and the instruction + // address. This hash is then ANDed to keep it within bounds of the btb. + // The address is shifted to remove the two least-significant bits as these + // are always 0 in an ISA with 4-byte aligned instructions. + uint64_t hashedIndex = + ((address >> 2) ^ globalHistory_) & ((1ull << btbBits_) - 1); // Get prediction from BTB - bool direction = - btb_[hashedIndex].first < (1 << (satCntBits_ - 1)) ? false : true; + bool direction = btb_[hashedIndex].first >= (1ull << (satCntBits_ - 1)); uint64_t target = (knownOffset != 0) ? address + knownOffset : btb_[hashedIndex].second; BranchPrediction prediction = {direction, target}; // Amend prediction based on branch type if (type == BranchType::Unconditional) { - prediction.taken = true; + prediction.isTaken = true; } else if (type == BranchType::Return) { - prediction.taken = true; + prediction.isTaken = true; // Return branches can use the RAS if an entry is available if (ras_.size() > 0) { prediction.target = ras_.back(); @@ -59,7 +65,7 @@ BranchPrediction GenericPredictor::predict(uint64_t address, BranchType type, ras_.pop_back(); } } else if (type == BranchType::SubroutineCall) { - prediction.taken = true; + prediction.isTaken = true; // Subroutine call branches must push their associated return address to RAS if (ras_.size() >= rasSize_) { ras_.pop_front(); @@ -68,30 +74,53 @@ BranchPrediction GenericPredictor::predict(uint64_t address, BranchType type, // Record that this address is a branch-and-link instruction rasHistory_[address] = 0; } else if (type == BranchType::Conditional) { - if (!prediction.taken) prediction.target = address + 4; + if (!prediction.isTaken) prediction.target = address + 4; } + + // Store the hashed index for correct hashing in update() + ftq_.emplace_back(prediction.isTaken, hashedIndex); + + // Speculatively update the global history + globalHistory_ = + ((globalHistory_ << 1) | prediction.isTaken) & globalHistoryMask_; + return prediction; } -void GenericPredictor::update(uint64_t address, bool taken, - uint64_t targetAddress, BranchType type) { - // Get previous index calculated for the instruction address supplied - uint64_t hashedIndex = btbHistory_[address]; +void GenericPredictor::update(uint64_t address, bool isTaken, + uint64_t targetAddress, BranchType type, + uint64_t instructionId) { + // Make sure that this function is called in program order; and then update + // the lastUpdatedInstructionId variable + assert(instructionId >= lastUpdatedInstructionId_ && + (lastUpdatedInstructionId_ = instructionId) >= 0 && + "Update not called on branch instructions in program order"); + + // Get previous prediction and index calculated from the FTQ + bool prevPrediction = ftq_.front().first; + uint64_t hashedIndex = ftq_.front().second; + ftq_.pop_front(); // Calculate 2-bit saturating counter value uint8_t satCntVal = btb_[hashedIndex].first; // Only alter value if it would transition to a valid state - if (!((satCntVal == (1 << satCntBits_) - 1) && taken) && - !(satCntVal == 0 && !taken)) { - satCntVal += taken ? 1 : -1; + if (!((satCntVal == (1 << satCntBits_) - 1) && isTaken) && + !(satCntVal == 0 && !isTaken)) { + satCntVal += isTaken ? 1 : -1; } // Update BTB entry - btb_[hashedIndex] = {satCntVal, targetAddress}; + btb_[hashedIndex].first = satCntVal; + if (isTaken) { + btb_[hashedIndex].second = targetAddress; + } - // Update global history value with new direction - globalHistory_ = ((globalHistory_ << 1) | taken) & globalHistoryLength_; - return; + // Update global history if prediction was incorrect + if (prevPrediction != isTaken) { + // Bit-flip the global history bit corresponding to this prediction + // We know how many predictions there have since been by the size of the FTQ + globalHistory_ ^= (1ull << (ftq_.size())); + } } void GenericPredictor::flush(uint64_t address) { @@ -115,6 +144,13 @@ void GenericPredictor::flush(uint64_t address) { } rasHistory_.erase(it); } -} + assert((ftq_.size() > 0) && + "Cannot flush instruction from Branch Predictor " + "when the ftq is empty"); + ftq_.pop_back(); + + // Roll back global history + globalHistory_ >>= 1; +} } // namespace simeng diff --git a/src/lib/PerceptronPredictor.cc b/src/lib/branchpredictors/PerceptronPredictor.cc similarity index 59% rename from src/lib/PerceptronPredictor.cc rename to src/lib/branchpredictors/PerceptronPredictor.cc index 8d1202f8d6..2e517939eb 100644 --- a/src/lib/PerceptronPredictor.cc +++ b/src/lib/branchpredictors/PerceptronPredictor.cc @@ -1,4 +1,4 @@ -#include "simeng/PerceptronPredictor.hh" +#include "simeng/branchpredictors/PerceptronPredictor.hh" namespace simeng { @@ -8,8 +8,9 @@ PerceptronPredictor::PerceptronPredictor(ryml::ConstNodeRef config) config["Branch-Predictor"]["Global-History-Length"].as()), rasSize_(config["Branch-Predictor"]["RAS-entries"].as()) { // Build BTB based on config options - uint32_t btbSize = (1 << btbBits_); + uint32_t btbSize = (1ul << btbBits_); btb_.resize(btbSize); + // Initialise perceptron values with 0 for the global history weights, and 1 // for the bias weight; and initialise the target with 0 (i.e., unknown) for (uint32_t i = 0; i < btbSize; i++) { @@ -20,11 +21,18 @@ PerceptronPredictor::PerceptronPredictor(ryml::ConstNodeRef config) // Set up training threshold according to empirically determined formula trainingThreshold_ = (uint64_t)((1.93 * globalHistoryLength_) + 14); + + // Generate a bitmask that is used to ensure only the relevant number of + // bits are stored in the global history. This is two times the + // globalHistoryLength_ to allow rolling back of the speculatively updated + // global history in the event of a misprediction. + globalHistoryMask_ = (1ull << (globalHistoryLength_ * 2)) - 1; } PerceptronPredictor::~PerceptronPredictor() { ras_.clear(); rasHistory_.clear(); + ftq_.clear(); } BranchPrediction PerceptronPredictor::predict(uint64_t address, BranchType type, @@ -32,23 +40,22 @@ BranchPrediction PerceptronPredictor::predict(uint64_t address, BranchType type, // Get the hashed index for the prediction table. XOR the global history with // the non-zero bits of the address, and then keep only the btbBits_ bits of // the output to keep it in bounds of the prediction table. + // The address is shifted to remove the two least-significant bits as these + // are always 0 in an ISA with 4-byte aligned instructions. uint64_t hashedIndex = - ((address >> 2) ^ globalHistory_) & ((1 << btbBits_) - 1); - - // Store the global history for correct hashing in update() -- - // needs to be global history and not the hashed index as hashing loses - // information at longer global history lengths - btbHistory_[address] = globalHistory_; + ((address >> 2) ^ globalHistory_) & ((1ull << btbBits_) - 1); // Retrieve the perceptron from the BTB std::vector perceptron = btb_[hashedIndex].first; // Get dot product of perceptron and history int64_t Pout = getDotProduct(perceptron, globalHistory_); + // Determine direction prediction based on its sign bool direction = (Pout >= 0); - // Retrieve target prediction + // If there is a known offset then calculate target accordingly, otherwise + // retrieve the target prediction from the btb. uint64_t target = (knownOffset != 0) ? address + knownOffset : btb_[hashedIndex].second; @@ -56,9 +63,9 @@ BranchPrediction PerceptronPredictor::predict(uint64_t address, BranchType type, // Amend prediction based on branch type if (type == BranchType::Unconditional) { - prediction.taken = true; + prediction.isTaken = true; } else if (type == BranchType::Return) { - prediction.taken = true; + prediction.isTaken = true; // Return branches can use the RAS if an entry is available if (ras_.size() > 0) { prediction.target = ras_.back(); @@ -67,7 +74,7 @@ BranchPrediction PerceptronPredictor::predict(uint64_t address, BranchType type, ras_.pop_back(); } } else if (type == BranchType::SubroutineCall) { - prediction.taken = true; + prediction.isTaken = true; // Subroutine call branches must push their associated return address to RAS if (ras_.size() >= rasSize_) { ras_.pop_front(); @@ -76,35 +83,57 @@ BranchPrediction PerceptronPredictor::predict(uint64_t address, BranchType type, // Record that this address is a branch-and-link instruction rasHistory_[address] = 0; } else if (type == BranchType::Conditional) { - if (!prediction.taken) prediction.target = address + 4; + if (!prediction.isTaken) prediction.target = address + 4; } + + // Store the Pout and global history for correct update() -- + // needs to be global history and not the hashed index as hashing loses + // information and the global history is required for updating perceptrons. + ftq_.emplace_back(Pout, globalHistory_); + + // Speculatively update the global history based on the direction + // prediction being made + globalHistory_ = + ((globalHistory_ << 1) | prediction.isTaken) & globalHistoryMask_; + return prediction; } -void PerceptronPredictor::update(uint64_t address, bool taken, - uint64_t targetAddress, BranchType type) { - // Work out hash index - uint64_t prevGlobalHistory = btbHistory_[address]; +void PerceptronPredictor::update(uint64_t address, bool isTaken, + uint64_t targetAddress, BranchType type, + uint64_t instructionId) { + // Make sure that this function is called in program order; and then update + // the lastUpdatedInstructionId variable + assert(instructionId >= lastUpdatedInstructionId_ && + (lastUpdatedInstructionId_ = instructionId) >= 0 && + "Update not called on branch instructions in program order"); + + // Retrieve the previous global history and branch direction prediction from + // the front of the ftq (assumes branches are updated in program order). + int64_t prevPout = ftq_.front().first; + uint64_t prevGlobalHistory = ftq_.front().second; + ftq_.pop_front(); + + // Work out hashed index uint64_t hashedIndex = - ((address >> 2) ^ prevGlobalHistory) & ((1 << btbBits_) - 1); + ((address >> 2) ^ prevGlobalHistory) & ((1ull << btbBits_) - 1); std::vector perceptron = btb_[hashedIndex].first; // Work out the most recent prediction - int64_t Pout = getDotProduct(perceptron, prevGlobalHistory); - bool directionPrediction = (Pout >= 0); + bool directionPrediction = (prevPout >= 0); // Update the perceptron if the prediction was wrong, or the dot product's // magnitude was not greater than the training threshold - if ((directionPrediction != taken) || - (static_cast(std::abs(Pout)) < trainingThreshold_)) { - int8_t t = (taken) ? 1 : -1; + if ((directionPrediction != isTaken) || + (static_cast(std::abs(prevPout)) < trainingThreshold_)) { + int8_t t = (isTaken) ? 1 : -1; for (uint64_t i = 0; i < globalHistoryLength_; i++) { - int8_t xi = - ((prevGlobalHistory & (1 << ((globalHistoryLength_ - 1) - i))) == 0) - ? -1 - : 1; + int8_t xi = ((prevGlobalHistory & + (1ull << ((globalHistoryLength_ - 1) - i))) == 0) + ? -1 + : 1; int8_t product_xi_t = xi * t; // Make sure no overflow (+-127) if (!(perceptron[i] == 127 && product_xi_t == 1) && @@ -116,11 +145,14 @@ void PerceptronPredictor::update(uint64_t address, bool taken, } btb_[hashedIndex].first = perceptron; - btb_[hashedIndex].second = targetAddress; + if (isTaken) { + btb_[hashedIndex].second = targetAddress; + } - globalHistory_ = - ((globalHistory_ << 1) | taken) & ((1 << globalHistoryLength_) - 1); - return; + // Update global history if prediction was incorrect + // Bit-flip the global history bit corresponding to this prediction + // We know how many predictions there have since been by the size of the FTQ + if (directionPrediction != isTaken) globalHistory_ ^= (1ull << (ftq_.size())); } void PerceptronPredictor::flush(uint64_t address) { @@ -144,6 +176,14 @@ void PerceptronPredictor::flush(uint64_t address) { } rasHistory_.erase(it); } + + assert((ftq_.size() > 0) && + "Cannot flush instruction from Branch Predictor " + "when the ftq is empty"); + ftq_.pop_back(); + + // Roll back global history + globalHistory_ >>= 1; } int64_t PerceptronPredictor::getDotProduct( @@ -152,7 +192,7 @@ int64_t PerceptronPredictor::getDotProduct( for (uint64_t i = 0; i < globalHistoryLength_; i++) { // Get branch direction for ith entry in the history bool historyTaken = - ((history & (1 << ((globalHistoryLength_ - 1) - i))) != 0); + ((history & (1ull << ((globalHistoryLength_ - 1) - i))) != 0); Pout += historyTaken ? perceptron[i] : (0 - perceptron[i]); } return Pout; diff --git a/src/lib/config/ModelConfig.cc b/src/lib/config/ModelConfig.cc index 049c24d04a..6d6152ced4 100644 --- a/src/lib/config/ModelConfig.cc +++ b/src/lib/config/ModelConfig.cc @@ -514,7 +514,7 @@ void ModelConfig::setExpectations(bool isDefault) { expectations_["Branch-Predictor"].addChild( ExpectationNode::createExpectation(8, "Global-History-Length")); expectations_["Branch-Predictor"]["Global-History-Length"] - .setValueBounds(1, UINT16_MAX); + .setValueBounds(1, 32); expectations_["Branch-Predictor"].addChild( ExpectationNode::createExpectation(8, "RAS-entries")); diff --git a/src/lib/models/inorder/Core.cc b/src/lib/models/inorder/Core.cc index b196d2cf8c..add307f714 100644 --- a/src/lib/models/inorder/Core.cc +++ b/src/lib/models/inorder/Core.cc @@ -30,8 +30,7 @@ Core::Core(memory::MemoryInterface& instructionMemory, [this](auto regs, auto values) { forwardOperands(regs, values); }, [this](auto instruction) { handleLoad(instruction); }, [this](auto instruction) { storeData(instruction); }, - [this](auto instruction) { raiseException(instruction); }, - branchPredictor, false), + [this](auto instruction) { raiseException(instruction); }, false), writebackUnit_(completionSlots_, registerFileSet_, [](auto insnId) {}) { // Query and apply initial state auto state = isa.getInitialState(); @@ -148,23 +147,10 @@ std::map Core::getStats() const { std::ostringstream ipcStr; ipcStr << std::setprecision(2) << ipc; - // Sum up the branch stats reported across the execution units. - uint64_t totalBranchesExecuted = 0; - uint64_t totalBranchMispredicts = 0; - totalBranchesExecuted += executeUnit_.getBranchExecutedCount(); - totalBranchMispredicts += executeUnit_.getBranchMispredictedCount(); - auto branchMissRate = 100.0f * static_cast(totalBranchMispredicts) / - static_cast(totalBranchesExecuted); - std::ostringstream branchMissRateStr; - branchMissRateStr << std::setprecision(3) << branchMissRate << "%"; - return {{"cycles", std::to_string(ticks_)}, {"retired", std::to_string(retired)}, {"ipc", ipcStr.str()}, - {"flushes", std::to_string(flushes_)}, - {"branch.executed", std::to_string(totalBranchesExecuted)}, - {"branch.mispredict", std::to_string(totalBranchMispredicts)}, - {"branch.missrate", branchMissRateStr.str()}}; + {"flushes", std::to_string(flushes_)}}; } void Core::raiseException(const std::shared_ptr& instruction) { diff --git a/src/lib/models/outoforder/Core.cc b/src/lib/models/outoforder/Core.cc index 4f7cf0f42d..af920e32aa 100644 --- a/src/lib/models/outoforder/Core.cc +++ b/src/lib/models/outoforder/Core.cc @@ -72,7 +72,8 @@ Core::Core(memory::MemoryInterface& instructionMemory, config["LSQ-L1-Interface"]["Permitted-Stores-Per-Cycle"] .as()), portAllocator_(portAllocator), - commitWidth_(config["Pipeline-Widths"]["Commit"].as()) { + commitWidth_(config["Pipeline-Widths"]["Commit"].as()), + branchPredictor_(branchPredictor) { for (size_t i = 0; i < config["Execution-Units"].num_children(); i++) { // Create vector of blocking groups std::vector blockingGroups = {}; @@ -87,7 +88,7 @@ Core::Core(memory::MemoryInterface& instructionMemory, }, [this](auto uop) { loadStoreQueue_.startLoad(uop); }, [this](auto uop) { loadStoreQueue_.supplyStoreData(uop); }, - [](auto uop) { uop->setCommitReady(); }, branchPredictor, + [](auto uop) { uop->setCommitReady(); }, config["Execution-Units"][i]["Pipelined"].as(), blockingGroups); } // Provide reservation size getter to A64FX port allocator @@ -223,16 +224,12 @@ std::map Core::getStats() const { auto backendStalls = dispatchIssueUnit_.getBackendStalls(); auto portBusyStalls = dispatchIssueUnit_.getPortBusyStalls(); - uint64_t totalBranchesExecuted = 0; - uint64_t totalBranchMispredicts = 0; + uint64_t totalBranchesFetched = fetchUnit_.getBranchFetchedCount(); + uint64_t totalBranchesRetired = reorderBuffer_.getRetiredBranchesCount(); + uint64_t totalBranchMispredicts = reorderBuffer_.getBranchMispredictedCount(); - // Sum up the branch stats reported across the execution units. - for (auto& eu : executionUnits_) { - totalBranchesExecuted += eu.getBranchExecutedCount(); - totalBranchMispredicts += eu.getBranchMispredictedCount(); - } - auto branchMissRate = 100.0f * static_cast(totalBranchMispredicts) / - static_cast(totalBranchesExecuted); + auto branchMissRate = 100.0 * static_cast(totalBranchMispredicts) / + static_cast(totalBranchesRetired); std::ostringstream branchMissRateStr; branchMissRateStr << std::setprecision(3) << branchMissRate << "%"; @@ -250,8 +247,9 @@ std::map Core::getStats() const { {"issue.frontendStalls", std::to_string(frontendStalls)}, {"issue.backendStalls", std::to_string(backendStalls)}, {"issue.portBusyStalls", std::to_string(portBusyStalls)}, - {"branch.executed", std::to_string(totalBranchesExecuted)}, - {"branch.mispredict", std::to_string(totalBranchMispredicts)}, + {"branch.fetched", std::to_string(totalBranchesFetched)}, + {"branch.retired", std::to_string(totalBranchesRetired)}, + {"branch.mispredicted", std::to_string(totalBranchMispredicts)}, {"branch.missrate", branchMissRateStr.str()}, {"lsq.loadViolations", std::to_string(reorderBuffer_.getViolatingLoadsCount())}}; @@ -263,12 +261,18 @@ void Core::raiseException(const std::shared_ptr& instruction) { } void Core::handleException() { + // Check for branch instructions in buffer, and flush them from the BP. + // Then empty the buffers + branchPredictor_.flushBranchesInBufferFromSelf(fetchToDecodeBuffer_); fetchToDecodeBuffer_.fill({}); fetchToDecodeBuffer_.stall(false); + branchPredictor_.flushBranchesInBufferFromSelf(decodeToRenameBuffer_); decodeToRenameBuffer_.fill(nullptr); decodeToRenameBuffer_.stall(false); + // Instructions in this buffer are already accounted for in the ROB so no + // need to check for branch instructions in this buffer renameToDispatchBuffer_.fill(nullptr); renameToDispatchBuffer_.stall(false); @@ -343,14 +347,20 @@ void Core::flushIfNeeded() { targetAddress = reorderBuffer_.getFlushAddress(); } + // Check for branch instructions in buffer, and flush them from the BP. + // Then empty the buffers fetchUnit_.flushLoopBuffer(); fetchUnit_.updatePC(targetAddress); + branchPredictor_.flushBranchesInBufferFromSelf(fetchToDecodeBuffer_); fetchToDecodeBuffer_.fill({}); fetchToDecodeBuffer_.stall(false); + branchPredictor_.flushBranchesInBufferFromSelf(decodeToRenameBuffer_); decodeToRenameBuffer_.fill(nullptr); decodeToRenameBuffer_.stall(false); + // Instructions in this buffer are already accounted for in the ROB so no + // need to check for branch instructions in this buffer renameToDispatchBuffer_.fill(nullptr); renameToDispatchBuffer_.stall(false); @@ -369,8 +379,11 @@ void Core::flushIfNeeded() { // Update PC and wipe Fetch/Decode buffer. targetAddress = decodeUnit_.getFlushAddress(); + // Check for branch instructions in buffer, and flush them from the BP. + // Then empty the buffers fetchUnit_.flushLoopBuffer(); fetchUnit_.updatePC(targetAddress); + branchPredictor_.flushBranchesInBufferFromSelf(fetchToDecodeBuffer_); fetchToDecodeBuffer_.fill({}); fetchToDecodeBuffer_.stall(false); diff --git a/src/lib/pipeline/DecodeUnit.cc b/src/lib/pipeline/DecodeUnit.cc index cd152be942..31df59fa66 100644 --- a/src/lib/pipeline/DecodeUnit.cc +++ b/src/lib/pipeline/DecodeUnit.cc @@ -64,7 +64,7 @@ void DecodeUnit::tick() { if (!uop->isBranch()) { // Non-branch incorrectly predicted as a branch; let the predictor know predictor_.update(uop->getInstructionAddress(), false, pc_, - uop->getBranchType()); + uop->getBranchType(), uop->getInstructionId()); } // Remove macro-operations in microOps_ buffer after macro-operation // decoded in this cycle @@ -93,7 +93,13 @@ bool DecodeUnit::shouldFlush() const { return shouldFlush_; } uint64_t DecodeUnit::getFlushAddress() const { return pc_; } uint64_t DecodeUnit::getEarlyFlushes() const { return earlyFlushes_; } -void DecodeUnit::purgeFlushed() { microOps_.clear(); } +void DecodeUnit::purgeFlushed() { + while (!microOps_.empty()) { + if (microOps_.back()->isBranch()) + predictor_.flush(microOps_.back()->getInstructionAddress()); + microOps_.pop_back(); + } +} } // namespace pipeline } // namespace simeng diff --git a/src/lib/pipeline/ExecuteUnit.cc b/src/lib/pipeline/ExecuteUnit.cc index c87c2e1845..28a04dde28 100644 --- a/src/lib/pipeline/ExecuteUnit.cc +++ b/src/lib/pipeline/ExecuteUnit.cc @@ -13,15 +13,13 @@ ExecuteUnit::ExecuteUnit( std::function&)> handleLoad, std::function&)> handleStore, std::function&)> raiseException, - BranchPredictor& predictor, bool pipelined, - const std::vector& blockingGroups) + bool pipelined, const std::vector& blockingGroups) : input_(input), output_(output), forwardOperands_(forwardOperands), handleLoad_(handleLoad), handleStore_(handleStore), raiseException_(raiseException), - predictor_(predictor), pipelined_(pipelined), blockingGroups_(blockingGroups) {} @@ -141,19 +139,10 @@ void ExecuteUnit::execute(std::shared_ptr& uop) { if (uop->isBranch()) { pc_ = uop->getBranchAddress(); - // Update branch predictor with branch results - predictor_.update(uop->getInstructionAddress(), uop->wasBranchTaken(), pc_, - uop->getBranchType()); - - // Update the branch instruction counter - branchesExecuted_++; - if (uop->wasBranchMispredicted()) { // Misprediction; flush the pipeline shouldFlush_ = true; flushAfter_ = uop->getInstructionId(); - // Update the branch misprediction counter - branchMispredicts_++; } } @@ -215,13 +204,6 @@ void ExecuteUnit::purgeFlushed() { } } -uint64_t ExecuteUnit::getBranchExecutedCount() const { - return branchesExecuted_; -} -uint64_t ExecuteUnit::getBranchMispredictedCount() const { - return branchMispredicts_; -} - uint64_t ExecuteUnit::getCycles() const { return cycles_; } bool ExecuteUnit::isEmpty() const { diff --git a/src/lib/pipeline/FetchUnit.cc b/src/lib/pipeline/FetchUnit.cc index 7bc59051e9..ea59f1a071 100644 --- a/src/lib/pipeline/FetchUnit.cc +++ b/src/lib/pipeline/FetchUnit.cc @@ -53,6 +53,16 @@ void FetchUnit::tick() { // Set prediction to recorded value during loop buffer filling if (macroOp[0]->isBranch()) { macroOp[0]->setBranchPrediction(loopBuffer_.front().prediction); + // Calling predict() in order to log the branch in the branch + // predictor. The branch needs to be logged in the branch predictor + // so that the branch predictor has the information needed to update + // itself when the branch instruction is retired. However, we are + // reusing the prediction from the loop buffer, thus we do not + // use the return value from predict(). + branchPredictor_.predict(macroOp[0]->getInstructionAddress(), + macroOp[0]->getBranchType(), + macroOp[0]->getKnownOffset()); + branchesFetched_++; } // Cycle queue by moving front entry to back @@ -147,6 +157,7 @@ void FetchUnit::tick() { if (macroOp[0]->isBranch()) { prediction = branchPredictor_.predict(pc_, macroOp[0]->getBranchType(), macroOp[0]->getKnownOffset()); + branchesFetched_++; macroOp[0]->setBranchPrediction(prediction); } @@ -159,7 +170,7 @@ void FetchUnit::tick() { if (pc_ == loopBoundaryAddress_) { if (macroOp[0]->isBranch() && - !macroOp[0]->getBranchPrediction().taken) { + !macroOp[0]->getBranchPrediction().isTaken) { // loopBoundaryAddress_ has been fetched whilst filling the loop // buffer BUT this is a branch, predicted to branch out of the loop // being buffered. Stop filling the loop buffer and don't supply to @@ -179,7 +190,8 @@ void FetchUnit::tick() { // loopBoundaryAddress_ has been fetched whilst loop buffer is waiting, // start filling Loop Buffer if the branch predictor tells us to // reenter the detected loop - if (macroOp[0]->isBranch() && !macroOp[0]->getBranchPrediction().taken) { + if (macroOp[0]->isBranch() && + !macroOp[0]->getBranchPrediction().isTaken) { // If branch is not taken then we aren't re-entering the detected // loop, therefore Loop Buffer stays idle loopBufferState_ = LoopBufferState::IDLE; @@ -196,12 +208,12 @@ void FetchUnit::tick() { bufferOffset += bytesRead; bufferedBytes_ -= bytesRead; - if (!prediction.taken) { - // Predicted as not taken; increment PC to next instruction - pc_ += bytesRead; - } else { + if (prediction.isTaken) { // Predicted as taken; set PC to predicted target address pc_ = prediction.target; + } else { + // Predicted as not taken; increment PC to next instruction + pc_ += bytesRead; } if (pc_ >= programByteLength_) { @@ -209,7 +221,7 @@ void FetchUnit::tick() { break; } - if (prediction.taken) { + if (prediction.isTaken) { if (slot + 1 < output_.getWidth()) { branchStalls_++; } @@ -280,5 +292,7 @@ void FetchUnit::flushLoopBuffer() { loopBoundaryAddress_ = 0; } +uint64_t FetchUnit::getBranchFetchedCount() const { return branchesFetched_; } + } // namespace pipeline } // namespace simeng diff --git a/src/lib/pipeline/ReorderBuffer.cc b/src/lib/pipeline/ReorderBuffer.cc index 32889bf93e..e72e6e79dc 100644 --- a/src/lib/pipeline/ReorderBuffer.cc +++ b/src/lib/pipeline/ReorderBuffer.cc @@ -152,6 +152,18 @@ unsigned int ReorderBuffer::commit(uint64_t maxCommitSize) { 0}; } } + + // If it is a branch, now update the predictor (here to ensure order of + // updates is correct) + if (uop->isBranch()) { + predictor_.update(uop->getInstructionAddress(), uop->wasBranchTaken(), + uop->getBranchAddress(), uop->getBranchType(), + uop->getInstructionId()); + // Update the branches retired and mispredicted counters + retiredBranches_++; + if (uop->wasBranchMispredicted()) branchMispredicts_++; + } + buffer_.pop_front(); } @@ -206,5 +218,12 @@ uint64_t ReorderBuffer::getViolatingLoadsCount() const { return loadViolations_; } +uint64_t ReorderBuffer::getBranchMispredictedCount() const { + return branchMispredicts_; +} + +uint64_t ReorderBuffer::getRetiredBranchesCount() const { + return retiredBranches_; +} } // namespace pipeline } // namespace simeng diff --git a/test/regression/RegressionTest.cc b/test/regression/RegressionTest.cc index d4bf8e3a42..237b0518a3 100644 --- a/test/regression/RegressionTest.cc +++ b/test/regression/RegressionTest.cc @@ -2,8 +2,8 @@ #include -#include "simeng/GenericPredictor.hh" -#include "simeng/PerceptronPredictor.hh" +#include "simeng/branchpredictors/GenericPredictor.hh" +#include "simeng/branchpredictors/PerceptronPredictor.hh" #include "simeng/config/SimInfo.hh" #include "simeng/kernel/Linux.hh" #include "simeng/kernel/LinuxProcess.hh" diff --git a/test/unit/GenericPredictorTest.cc b/test/unit/GenericPredictorTest.cc index c546157021..c7d6011c29 100644 --- a/test/unit/GenericPredictorTest.cc +++ b/test/unit/GenericPredictorTest.cc @@ -1,6 +1,6 @@ #include "MockInstruction.hh" #include "gtest/gtest.h" -#include "simeng/GenericPredictor.hh" +#include "simeng/branchpredictors/GenericPredictor.hh" namespace simeng { @@ -24,7 +24,7 @@ TEST_F(GenericPredictorTest, Miss) { "Fallback-Static-Predictor: Always-Taken}}"); auto predictor = simeng::GenericPredictor(); auto prediction = predictor.predict(0, BranchType::Conditional, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); simeng::config::SimInfo::addToConfig( "{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 11, " @@ -32,9 +32,9 @@ TEST_F(GenericPredictorTest, Miss) { "Fallback-Static-Predictor: Always-Not-Taken}}"); predictor = simeng::GenericPredictor(); prediction = predictor.predict(0, BranchType::Conditional, 0); - EXPECT_FALSE(prediction.taken); + EXPECT_FALSE(prediction.isTaken); prediction = predictor.predict(8, BranchType::Unconditional, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); } // Tests that a GenericPredictor will predict branch-and-link return pairs @@ -46,35 +46,35 @@ TEST_F(GenericPredictorTest, RAS) { "Fallback-Static-Predictor: Always-Taken}}"); auto predictor = simeng::GenericPredictor(); auto prediction = predictor.predict(8, BranchType::SubroutineCall, 8); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 16); prediction = predictor.predict(24, BranchType::SubroutineCall, 8); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 32); prediction = predictor.predict(40, BranchType::SubroutineCall, 8); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 48); prediction = predictor.predict(56, BranchType::SubroutineCall, 8); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 64); prediction = predictor.predict(72, BranchType::SubroutineCall, 8); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 80); prediction = predictor.predict(84, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 76); prediction = predictor.predict(68, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 60); prediction = predictor.predict(52, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 44); prediction = predictor.predict(36, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 28); prediction = predictor.predict(20, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 12); } @@ -82,18 +82,23 @@ TEST_F(GenericPredictorTest, RAS) { // correctly, when no address aliasing has occurred TEST_F(GenericPredictorTest, Hit) { simeng::config::SimInfo::addToConfig( - "{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 11, " + "{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 5, " "Saturating-Count-Bits: 2, Global-History-Length: 1, RAS-entries: 5, " "Fallback-Static-Predictor: Always-Taken}}"); auto predictor = simeng::GenericPredictor(); - predictor.update(0, true, 16, BranchType::Conditional); - predictor.update(0, true, 16, BranchType::Conditional); - predictor.update(0, true, 16, BranchType::Conditional); - predictor.update(0, true, 16, BranchType::Conditional); - predictor.update(0, false, 16, BranchType::Conditional); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 0); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 1); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 2); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 3); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 4); auto prediction = predictor.predict(0, BranchType::Conditional, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 16); } @@ -101,60 +106,120 @@ TEST_F(GenericPredictorTest, Hit) { // behaviours of the same branch but in different states of the program TEST_F(GenericPredictorTest, GlobalIndexing) { simeng::config::SimInfo::addToConfig( - "{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 11, " - "Saturating-Count-Bits: 2, Global-History-Length: 5, RAS-entries: 5, " + "{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 10, " + "Saturating-Count-Bits: 2, Global-History-Length: 10, RAS-entries: 5, " "Fallback-Static-Predictor: Always-Not-Taken}}"); auto predictor = simeng::GenericPredictor(); // Spool up first global history pattern - predictor.update(0, true, 4, BranchType::Unconditional); - predictor.update(0, false, 4, BranchType::Unconditional); - predictor.update(0, false, 4, BranchType::Unconditional); - predictor.update(0, false, 4, BranchType::Unconditional); - predictor.update(0, true, 4, BranchType::Unconditional); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 0); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 1); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 2); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 3); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 4); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 5); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 6); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 7); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 8); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 9); // Ensure default behaviour for first encounter - auto prediction = predictor.predict(0x1F, BranchType::Conditional, 0); - EXPECT_FALSE(prediction.taken); - EXPECT_EQ(prediction.target, 0x23); + auto prediction = predictor.predict(0x7C, BranchType::Conditional, 0); + EXPECT_FALSE(prediction.isTaken); + EXPECT_EQ(prediction.target, 0x80); // Set entry in BTB - predictor.update(0x1F, true, 0xAB, BranchType::Conditional); + predictor.update(0x7C, true, 0xAB, BranchType::Conditional, 10); // Spool up second global history pattern - predictor.update(0, false, 4, BranchType::Unconditional); - predictor.update(0, true, 4, BranchType::Unconditional); - predictor.update(0, true, 4, BranchType::Unconditional); - predictor.update(0, true, 4, BranchType::Unconditional); - predictor.update(0, false, 4, BranchType::Unconditional); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 16, BranchType::Conditional, 11); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 12); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 13); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 14); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 16, BranchType::Conditional, 15); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 16, BranchType::Conditional, 16); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 17); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 18); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 19); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 16, BranchType::Conditional, 20); // Ensure default behaviour for re-encounter but with different global history - prediction = predictor.predict(0x1F, BranchType::Conditional, 0); - EXPECT_FALSE(prediction.taken); - EXPECT_EQ(prediction.target, 0x23); + prediction = predictor.predict(0x7C, BranchType::Conditional, 0); + EXPECT_FALSE(prediction.isTaken); + EXPECT_EQ(prediction.target, 0x80); // Set entry in BTB - predictor.update(0x1F, true, 0xBA, BranchType::Conditional); + predictor.update(0x7C, true, 0xBA, BranchType::Conditional, 21); // Recreate first global history pattern - predictor.update(0, true, 4, BranchType::Unconditional); - predictor.update(0, false, 4, BranchType::Unconditional); - predictor.update(0, false, 4, BranchType::Unconditional); - predictor.update(0, false, 4, BranchType::Unconditional); - predictor.update(0, true, 4, BranchType::Unconditional); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 22); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 23); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 24); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 25); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 26); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 27); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 28); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 29); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 30); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 31); // Get prediction - prediction = predictor.predict(0x1F, BranchType::Conditional, 0); - EXPECT_TRUE(prediction.taken); + prediction = predictor.predict(0x7C, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 0xAB); // Set entry in BTB - predictor.update(0x1F, true, 0xAB, BranchType::Conditional); + predictor.update(0x7C, true, 0xAB, BranchType::Conditional, 32); // Recreate second global history pattern - predictor.update(0, false, 4, BranchType::Unconditional); - predictor.update(0, true, 4, BranchType::Unconditional); - predictor.update(0, true, 4, BranchType::Unconditional); - predictor.update(0, true, 4, BranchType::Unconditional); - predictor.update(0, false, 4, BranchType::Unconditional); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 33); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 34); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 35); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 36); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 37); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 16, BranchType::Conditional, 38); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 39); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 40); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 41); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 16, BranchType::Conditional, 42); // Get prediction - prediction = predictor.predict(0x1F, BranchType::Conditional, 0); - EXPECT_TRUE(prediction.taken); + prediction = predictor.predict(0x7C, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 0xBA); - predictor.update(0x1F, true, 0xBA, BranchType::Conditional); + predictor.update(0x7C, true, 0xBA, BranchType::Conditional, 43); } // Test Flush of RAS functionality @@ -166,21 +231,21 @@ TEST_F(GenericPredictorTest, flush) { auto predictor = simeng::GenericPredictor(); // Add some entries to the RAS auto prediction = predictor.predict(8, BranchType::SubroutineCall, 8); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 16); prediction = predictor.predict(24, BranchType::SubroutineCall, 8); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 32); prediction = predictor.predict(40, BranchType::SubroutineCall, 8); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 48); // Start getting entries from RAS prediction = predictor.predict(52, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 44); prediction = predictor.predict(36, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 28); // Flush address @@ -188,11 +253,61 @@ TEST_F(GenericPredictorTest, flush) { // Continue getting entries from RAS prediction = predictor.predict(20, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 28); prediction = predictor.predict(16, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 12); } +// Test that update correctly corrects the speculatively updated global history +TEST_F(GenericPredictorTest, speculativeGlobalHistory) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: {Type: Generic, BTB-Tag-Bits: 6, " + "Saturating-Count-Bits: 2, Global-History-Length: 6, RAS-entries: 10, " + "Fallback-Static-Predictor: Always-Taken}}"); + auto predictor = simeng::GenericPredictor(); + BranchPrediction pred; + + // Set up the target prediction for btb entry 000111 to be 65536. No other + // target predictions will be set during this test, so we can confirm that + // we are accessing this btb entry by on the basis of this target prediction + pred = predictor.predict(28, BranchType::Conditional, 0); + EXPECT_TRUE(pred.isTaken); // Default behaviour is to predict taken + EXPECT_EQ(pred.target, 0); // Target prediction not yet set + predictor.update(28, true, 65536, BranchType::Conditional, 0); + + // Set up a speculative global history of 111111 on the basis of predictions + pred = predictor.predict(4, BranchType::Conditional, 0); // GH = 000011 + EXPECT_TRUE(pred.isTaken); + EXPECT_EQ(pred.target, 0); + pred = predictor.predict(4, BranchType::Conditional, 0); // GH = 000111 + EXPECT_TRUE(pred.isTaken); + EXPECT_EQ(pred.target, 0); + pred = predictor.predict(4, BranchType::Conditional, 0); // GH = 001111 + EXPECT_TRUE(pred.isTaken); + EXPECT_EQ(pred.target, 0); + pred = predictor.predict(4, BranchType::Conditional, 0); // GH = 011111 + EXPECT_TRUE(pred.isTaken); + EXPECT_EQ(pred.target, 0); + pred = predictor.predict(4, BranchType::Conditional, 0); // GH = 111111 + EXPECT_TRUE(pred.isTaken); + EXPECT_EQ(pred.target, 0); + + // Get prediction for address 224 to access btb entry 000111 + pred = predictor.predict(224, BranchType::Conditional, 0); // GH = 111111 + // Confirm prediction target is 65536 + EXPECT_EQ(pred.target, 65536); + EXPECT_TRUE(pred.isTaken); + + // Now correct the speculative global history using updates + predictor.update(4, false, 8, BranchType::Conditional, 1); // GH = 011111 + predictor.update(4, false, 8, BranchType::Conditional, 2); // GH = 001111 + predictor.update(4, false, 8, BranchType::Conditional, 3); // GH = 000111 + + // Now a prediction for address 0 should access btb entry 000111 + pred = predictor.predict(0, BranchType::Conditional, 0); + EXPECT_EQ(pred.target, 65536); +} + } // namespace simeng diff --git a/test/unit/MockBranchPredictor.hh b/test/unit/MockBranchPredictor.hh index 05868a6fed..2727e6db51 100644 --- a/test/unit/MockBranchPredictor.hh +++ b/test/unit/MockBranchPredictor.hh @@ -1,7 +1,7 @@ #pragma once #include "gmock/gmock.h" -#include "simeng/BranchPredictor.hh" +#include "simeng/branchpredictors/BranchPredictor.hh" namespace simeng { @@ -10,8 +10,9 @@ class MockBranchPredictor : public BranchPredictor { public: MOCK_METHOD3(predict, BranchPrediction(uint64_t address, BranchType type, int64_t knownTarget)); - MOCK_METHOD4(update, void(uint64_t address, bool taken, - uint64_t targetAddress, BranchType type)); + MOCK_METHOD5(update, + void(uint64_t address, bool taken, uint64_t targetAddress, + BranchType type, uint64_t instructionId)); MOCK_METHOD1(flush, void(uint64_t address)); }; diff --git a/test/unit/PerceptronPredictorTest.cc b/test/unit/PerceptronPredictorTest.cc index b276b3795f..7768ab0ba0 100644 --- a/test/unit/PerceptronPredictorTest.cc +++ b/test/unit/PerceptronPredictorTest.cc @@ -1,6 +1,6 @@ #include "MockInstruction.hh" #include "gtest/gtest.h" -#include "simeng/PerceptronPredictor.hh" +#include "simeng/branchpredictors/PerceptronPredictor.hh" namespace simeng { @@ -23,9 +23,9 @@ TEST_F(PerceptronPredictorTest, Miss) { "Global-History-Length: 10, RAS-entries: 5}}"); auto predictor = simeng::PerceptronPredictor(); auto prediction = predictor.predict(0, BranchType::Conditional, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); prediction = predictor.predict(8, BranchType::Unconditional, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); } // Tests that the PerceptronPredictor will predict branch-and-link return pairs @@ -36,35 +36,35 @@ TEST_F(PerceptronPredictorTest, RAS) { "Global-History-Length: 10, RAS-entries: 10}}"); auto predictor = simeng::PerceptronPredictor(); auto prediction = predictor.predict(8, BranchType::SubroutineCall, 8); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 16); prediction = predictor.predict(24, BranchType::SubroutineCall, 8); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 32); prediction = predictor.predict(40, BranchType::SubroutineCall, 8); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 48); prediction = predictor.predict(56, BranchType::SubroutineCall, 8); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 64); prediction = predictor.predict(72, BranchType::SubroutineCall, 8); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 80); prediction = predictor.predict(84, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 76); prediction = predictor.predict(68, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 60); prediction = predictor.predict(52, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 44); prediction = predictor.predict(36, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 28); prediction = predictor.predict(20, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 12); } @@ -72,17 +72,22 @@ TEST_F(PerceptronPredictorTest, RAS) { // branch correctly, when no address aliasing has occurred TEST_F(PerceptronPredictorTest, Hit) { simeng::config::SimInfo::addToConfig( - "{Branch-Predictor: {Type: Perceptron, BTB-Tag-Bits: 11, " + "{Branch-Predictor: {Type: Perceptron, BTB-Tag-Bits: 5, " "Global-History-Length: 1, RAS-entries: 5}}"); auto predictor = simeng::PerceptronPredictor(); - predictor.update(0, true, 16, BranchType::Conditional); - predictor.update(0, true, 16, BranchType::Conditional); - predictor.update(0, true, 16, BranchType::Conditional); - predictor.update(0, true, 16, BranchType::Conditional); - predictor.update(0, false, 16, BranchType::Conditional); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 0); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 1); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 2); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 3); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 16, BranchType::Conditional, 4); auto prediction = predictor.predict(0, BranchType::Conditional, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 16); } @@ -90,59 +95,119 @@ TEST_F(PerceptronPredictorTest, Hit) { // behaviours of the same branch but in different states of the program TEST_F(PerceptronPredictorTest, GlobalIndexing) { simeng::config::SimInfo::addToConfig( - "{Branch-Predictor: {Type: Perceptron, BTB-Tag-Bits: 11, " - "Global-History-Length: 5, RAS-entries: 5}}"); + "{Branch-Predictor: {Type: Perceptron, BTB-Tag-Bits: 10, " + "Global-History-Length: 10, RAS-entries: 5}}"); auto predictor = simeng::PerceptronPredictor(); // Spool up first global history pattern - predictor.update(0, true, 4, BranchType::Unconditional); - predictor.update(0, false, 4, BranchType::Unconditional); - predictor.update(0, false, 4, BranchType::Unconditional); - predictor.update(0, false, 4, BranchType::Unconditional); - predictor.update(0, true, 4, BranchType::Unconditional); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 0); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 1); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 2); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 3); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 4); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 5); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 6); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 7); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 8); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 9); // Ensure default behaviour for first encounter - auto prediction = predictor.predict(0x1F, BranchType::Conditional, 0); - EXPECT_TRUE(prediction.taken); + auto prediction = predictor.predict(0x7C, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 0); // Set entry in BTB - predictor.update(0x1F, false, 0xAB, BranchType::Conditional); + predictor.update(0x7C, false, 0x80, BranchType::Conditional, 10); // Spool up second global history pattern - predictor.update(0, false, 4, BranchType::Unconditional); - predictor.update(0, true, 4, BranchType::Unconditional); - predictor.update(0, true, 4, BranchType::Unconditional); - predictor.update(0, true, 4, BranchType::Unconditional); - predictor.update(0, false, 4, BranchType::Unconditional); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 11); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 12); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 13); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 14); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 15); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 16); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 17); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 18); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 19); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 20); // Ensure default behaviour for re-encounter but with different global history - prediction = predictor.predict(0x1F, BranchType::Conditional, 0); - EXPECT_TRUE(prediction.taken); + prediction = predictor.predict(0x7C, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 0); // Set entry in BTB - predictor.update(0x1F, true, 0xBA, BranchType::Conditional); + predictor.update(0x7C, true, 0xBA, BranchType::Conditional, 21); // Recreate first global history pattern - predictor.update(0, true, 4, BranchType::Unconditional); - predictor.update(0, false, 4, BranchType::Unconditional); - predictor.update(0, false, 4, BranchType::Unconditional); - predictor.update(0, false, 4, BranchType::Unconditional); - predictor.update(0, true, 4, BranchType::Unconditional); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 22); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 23); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 24); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 25); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 26); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 27); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 28); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 29); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 30); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 31); // Get prediction - prediction = predictor.predict(0x1F, BranchType::Conditional, 0); - EXPECT_FALSE(prediction.taken); - EXPECT_EQ(prediction.target, 0x23); + prediction = predictor.predict(0x7C, BranchType::Conditional, 0); + EXPECT_FALSE(prediction.isTaken); + EXPECT_EQ(prediction.target, 0x80); // Set entry in BTB - predictor.update(0x1F, true, 0xAB, BranchType::Conditional); + predictor.update(0x7C, true, 0x80, BranchType::Conditional, 32); // Recreate second global history pattern - predictor.update(0, false, 4, BranchType::Unconditional); - predictor.update(0, true, 4, BranchType::Unconditional); - predictor.update(0, true, 4, BranchType::Unconditional); - predictor.update(0, true, 4, BranchType::Unconditional); - predictor.update(0, false, 4, BranchType::Unconditional); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 33); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 34); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 35); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 36); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 37); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 38); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 39); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 40); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 4, BranchType::Conditional, 41); + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 42); // Get prediction - prediction = predictor.predict(0x1F, BranchType::Conditional, 0); - EXPECT_TRUE(prediction.taken); + prediction = predictor.predict(0x7C, BranchType::Conditional, 0); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 0xBA); - predictor.update(0x1F, true, 0xBA, BranchType::Conditional); + predictor.update(0x7C, true, 0xBA, BranchType::Conditional, 43); } // Test Flush of RAS functionality @@ -153,21 +218,21 @@ TEST_F(PerceptronPredictorTest, flush) { auto predictor = simeng::PerceptronPredictor(); // Add some entries to the RAS auto prediction = predictor.predict(8, BranchType::SubroutineCall, 8); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 16); prediction = predictor.predict(24, BranchType::SubroutineCall, 8); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 32); prediction = predictor.predict(40, BranchType::SubroutineCall, 8); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 48); // Start getting entries from RAS prediction = predictor.predict(52, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 44); prediction = predictor.predict(36, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 28); // Flush address @@ -175,11 +240,91 @@ TEST_F(PerceptronPredictorTest, flush) { // Continue getting entries from RAS prediction = predictor.predict(20, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 28); prediction = predictor.predict(16, BranchType::Return, 0); - EXPECT_TRUE(prediction.taken); + EXPECT_TRUE(prediction.isTaken); EXPECT_EQ(prediction.target, 12); } +// Test that update corrects the speculatively updated global history +TEST_F(PerceptronPredictorTest, speculativeGlobalHistory) { + simeng::config::SimInfo::addToConfig( + "{Branch-Predictor: {Type: Perceptron, BTB-Tag-Bits: 6, " + "Global-History-Length: 6, RAS-entries: 5}}"); + auto predictor = simeng::PerceptronPredictor(); + BranchPrediction pred; + + // Set up the target prediction for btb entry 000111 to be 65536. No other + // target predictions will be set during this test, so we can confirm that + // we are accessing this btb entry by on the basis of this target + // prediction. This takes a bit more setting up than the Generic predictor + // as perceptrons are more complicated than saturating counters. + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 0); // GH = 000000 + + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 1); // GH = 000000 + + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, false, 4, BranchType::Conditional, 2); // GH = 000000 + + predictor.predict(28, BranchType::Conditional, 0); + predictor.update(28, true, 65536, BranchType::Conditional, 3); // GH = 000001 + + predictor.predict(24, BranchType::Conditional, 0); + predictor.update(24, true, 65536, BranchType::Conditional, 4); // GH = 000011 + + predictor.predict(16, BranchType::Conditional, 0); + predictor.update(16, true, 65536, BranchType::Conditional, 5); // GH = 000111 + + predictor.predict(0, BranchType::Conditional, 0); + predictor.update(0, true, 65536, BranchType::Conditional, 6); // GH = 001111 + + predictor.predict(32, BranchType::Conditional, 0); + predictor.update(32, true, 65536, BranchType::Conditional, 7); // GH = 011111 + + predictor.predict(96, BranchType::Conditional, 0); + predictor.update(96, true, 65536, BranchType::Conditional, 8); // GH = 111111 + + pred = predictor.predict(224, BranchType::Conditional, 0); + EXPECT_TRUE(pred.isTaken); // Should be set to taken + EXPECT_EQ(pred.target, 65536); // Should be set to 65536 + predictor.update(224, true, 65536, BranchType::Conditional, + 9); // GH = 111111 + + // Set up a speculative global history of 111111 on the basis of predictions + pred = predictor.predict(4, BranchType::Conditional, 0); // GH = 111111 + EXPECT_TRUE(pred.isTaken); + EXPECT_EQ(pred.target, 0); + pred = predictor.predict(4, BranchType::Conditional, 0); // GH = 111111 + EXPECT_TRUE(pred.isTaken); + EXPECT_EQ(pred.target, 0); + pred = predictor.predict(4, BranchType::Conditional, 0); // GH = 111111 + EXPECT_TRUE(pred.isTaken); + EXPECT_EQ(pred.target, 0); + pred = predictor.predict(4, BranchType::Conditional, 0); // GH = 111111 + EXPECT_TRUE(pred.isTaken); + EXPECT_EQ(pred.target, 0); + pred = predictor.predict(4, BranchType::Conditional, 0); // GH = 111111 + EXPECT_TRUE(pred.isTaken); + EXPECT_EQ(pred.target, 0); + + // Get prediction for address 224 to access btb entry 000111 + pred = predictor.predict(224, BranchType::Conditional, 0); // GH = 111111 + // Confirm prediction target is 65536 + EXPECT_EQ(pred.target, 65536); + EXPECT_TRUE(pred.isTaken); + + // Now correct the speculative global history using updates + predictor.update(4, false, 8, BranchType::Conditional, 10); // GH = 011111 + predictor.update(4, false, 8, BranchType::Conditional, 11); // GH = 001111 + predictor.update(4, false, 8, BranchType::Conditional, 12); // GH = 000111 + + // Now a prediction for address 0 should access btb entry 000111 + pred = predictor.predict(0, BranchType::Conditional, 0); + EXPECT_TRUE(pred.isTaken); + EXPECT_EQ(pred.target, 65536); +} + } // namespace simeng diff --git a/test/unit/pipeline/DecodeUnitTest.cc b/test/unit/pipeline/DecodeUnitTest.cc index f86dbc0caf..eed1ab60ae 100644 --- a/test/unit/pipeline/DecodeUnitTest.cc +++ b/test/unit/pipeline/DecodeUnitTest.cc @@ -82,7 +82,8 @@ TEST_F(PipelineDecodeUnitTest, Flush) { EXPECT_CALL(*uop, isBranch()).WillOnce(Return(false)); // Check the predictor is updated with the correct instruction address and PC - EXPECT_CALL(predictor, update(2, false, 1, BranchType::Unconditional)); + EXPECT_CALL(predictor, update(2, false, 1, BranchType::Unconditional, + uop->getInstructionId())); decodeUnit.tick(); diff --git a/test/unit/pipeline/ExecuteUnitTest.cc b/test/unit/pipeline/ExecuteUnitTest.cc index 0f82593ff6..3665628b8c 100644 --- a/test/unit/pipeline/ExecuteUnitTest.cc +++ b/test/unit/pipeline/ExecuteUnitTest.cc @@ -35,7 +35,7 @@ class PipelineExecuteUnitTest : public testing::Test { [this](auto instruction) { executionHandlers.raiseException(instruction); }, - predictor, true, {3, 4, 5}), + true, {3, 4, 5}), uop(new MockInstruction), secondUop(new MockInstruction), thirdUop(new MockInstruction), @@ -135,12 +135,6 @@ TEST_F(PipelineExecuteUnitTest, ExecuteBranch) { uop->setBranchResults(taken, pc); })); - // Check that the branch predictor was updated with the results - EXPECT_CALL(*uop, getBranchType()).Times(1); - EXPECT_CALL(predictor, - update(insnAddress, taken, pc, BranchType::Unconditional)) - .Times(1); - // Check that empty forwarding call is made EXPECT_CALL(executionHandlers, forwardOperands(IsEmpty(), IsEmpty())) .Times(1); @@ -152,8 +146,6 @@ TEST_F(PipelineExecuteUnitTest, ExecuteBranch) { EXPECT_EQ(executeUnit.shouldFlush(), false); EXPECT_EQ(output.getTailSlots()[0].get(), uop); - EXPECT_EQ(executeUnit.getBranchExecutedCount(), 1); - EXPECT_EQ(executeUnit.getBranchMispredictedCount(), 0); } // Test that an instruction that already encountered an exception will raise it @@ -288,13 +280,6 @@ TEST_F(PipelineExecuteUnitTest, mispredictedBranch) { uop->setBranchResults(taken, pc); })); - // Check that the branch predictor was updated with the results - EXPECT_CALL(*uop, getBranchType()).Times(1); - - EXPECT_CALL(predictor, - update(insnAddress, taken, pc, BranchType::Conditional)) - .Times(1); - // Check that empty forwarding call is made EXPECT_CALL(executionHandlers, forwardOperands(IsEmpty(), IsEmpty())) .Times(1); @@ -306,8 +291,6 @@ TEST_F(PipelineExecuteUnitTest, mispredictedBranch) { EXPECT_EQ(executeUnit.shouldFlush(), true); EXPECT_EQ(output.getTailSlots()[0].get(), uop); - EXPECT_EQ(executeUnit.getBranchExecutedCount(), 1); - EXPECT_EQ(executeUnit.getBranchMispredictedCount(), 1); EXPECT_EQ(executeUnit.getFlushAddress(), pc); EXPECT_EQ(executeUnit.getFlushInsnId(), insnID); } diff --git a/test/unit/pipeline/FetchUnitTest.cc b/test/unit/pipeline/FetchUnitTest.cc index 8ecdc7d88b..90870fb5e2 100644 --- a/test/unit/pipeline/FetchUnitTest.cc +++ b/test/unit/pipeline/FetchUnitTest.cc @@ -744,6 +744,72 @@ TEST_P(PipelineFetchUnitTest, invalidMinBytesreadsDontComplete) { } } +// Test that the Fetch unit is correctly tallying the number of branch +// instructions fetched, and that the getBranchFetchedCount getter function +// returns the correct value +TEST_P(PipelineFetchUnitTest, branchesFetchedCountedIncorrectly) { + // Set instructions to be fetched from memory + memory::MemoryReadResult memReadResultA = { + {0x0, blockSize}, RegisterValue(0xFFFF, blockSize), 1}; + span nextBlockA = {&memReadResultA, 1}; + memory::MemoryReadResult memReadResultB = { + {0x10, blockSize}, RegisterValue(0xFFFF, blockSize), 1}; + span nextBlockB = {&memReadResultB, 1}; + EXPECT_CALL(memory, getCompletedReads()).WillRepeatedly(Return(nextBlockA)); + + ON_CALL(isa, getMaxInstructionSize()).WillByDefault(Return(insnMaxSizeBytes)); + + // Set the instructions to be returned from predecode + MacroOp mOp2 = {uopPtr2}; + ON_CALL(isa, predecode(_, _, Gt(0x8), _)) + .WillByDefault(DoAll(SetArgReferee<3>(mOp2), Return(4))); + ON_CALL(*uop2, isBranch()).WillByDefault(Return(true)); + MacroOp mOp = {uopPtr}; + ON_CALL(isa, predecode(_, _, Lt(0xC), _)) + .WillByDefault(DoAll(SetArgReferee<3>(mOp), Return(4))); + ON_CALL(*uop, isBranch()).WillByDefault(Return(false)); + EXPECT_CALL(predictor, predict(_, _, _)) + .WillOnce(Return(BranchPrediction({true, 0x0}))); + + // Fetch instructions from data block -- one branch instruction + for (int i = 0; i < 4; i++) { + fetchUnit.tick(); + } + + // Confirm that the correct number of fetched branches has been recorded by + // the Fetch Unit + EXPECT_EQ(fetchUnit.getBranchFetchedCount(), 1); + + // Fetch the next block of instructions from memory and change the expected + // outcome of the branch predictor + fetchUnit.requestFromPC(); + EXPECT_CALL(predictor, predict(_, _, _)) + .WillRepeatedly(Return(BranchPrediction({false, 0x0}))); + + // Fetch instructions from data block -- one branch instruction + for (int i = 0; i < 4; i++) { + fetchUnit.tick(); + } + + // Confirm that the correct number of fetched branches has been recorded by + // the Fetch Unit + EXPECT_EQ(fetchUnit.getBranchFetchedCount(), 2); + + const memory::MemoryAccessTarget target = {0x10, blockSize}; + EXPECT_CALL(memory, getCompletedReads()).WillRepeatedly(Return(nextBlockB)); + EXPECT_CALL(memory, requestRead(target, _)).Times(1); + + // Fetch instructions from data block -- four branch instructions + fetchUnit.requestFromPC(); + for (int i = 0; i < 4; i++) { + fetchUnit.tick(); + } + + // Confirm that the correct number of fetched branches has been recorded by + // the Fetch Unit + EXPECT_EQ(fetchUnit.getBranchFetchedCount(), 6); +} + INSTANTIATE_TEST_SUITE_P(PipelineFetchUnitTests, PipelineFetchUnitTest, ::testing::Values(std::pair(2, 4), std::pair(4, 4))); diff --git a/test/unit/pipeline/RenameUnitTest.cc b/test/unit/pipeline/RenameUnitTest.cc index 6b1dc640c5..3f3013adf6 100644 --- a/test/unit/pipeline/RenameUnitTest.cc +++ b/test/unit/pipeline/RenameUnitTest.cc @@ -427,7 +427,7 @@ TEST_F(RenameUnitTest, serializedDest) { EXPECT_CALL(*uop2, getDestinationRegisters()).Times(1); EXPECT_CALL(*uop2, isLoad()).WillOnce(Return(false)); EXPECT_CALL(*uop2, isStoreAddress()).WillOnce(Return(false)); - EXPECT_CALL(*uop2, isBranch()).WillOnce(Return(false)); + EXPECT_CALL(*uop2, isBranch()).Times(2).WillRepeatedly(Return(false)); rob.commit(1); EXPECT_EQ(rob.size(), 0); diff --git a/test/unit/pipeline/ReorderBufferTest.cc b/test/unit/pipeline/ReorderBufferTest.cc index abc33d871a..ff3b63756d 100644 --- a/test/unit/pipeline/ReorderBufferTest.cc +++ b/test/unit/pipeline/ReorderBufferTest.cc @@ -361,30 +361,43 @@ TEST_F(ReorderBufferTest, branch) { uopPtr->setInstructionId(0); uopPtr->setInstructionAddress(insnAddr); uopPtr->setBranchPrediction(pred); + uop->setExecuted(true); uopPtr->setCommitReady(); // First pass through ROB -- seen count reset to 0 as new branch reorderBuffer.reserve(uopPtr); - EXPECT_CALL(*uop, isBranch()).Times(1); + EXPECT_CALL(*uop, isBranch()).Times(2); + EXPECT_CALL(predictor, + update(4096, uop->wasBranchTaken(), uop->getBranchAddress(), + uop->getBranchType(), uop->getInstructionId())); reorderBuffer.commit(1); EXPECT_NE(loopBoundaryAddr, insnAddr); // Second pass through ROB -- seen count = 1 reorderBuffer.reserve(uopPtr); - EXPECT_CALL(*uop, isBranch()).Times(1); + EXPECT_CALL(*uop, isBranch()).Times(2); + EXPECT_CALL(predictor, + update(4096, uop->wasBranchTaken(), uop->getBranchAddress(), + uop->getBranchType(), uop->getInstructionId())); reorderBuffer.commit(1); EXPECT_NE(loopBoundaryAddr, insnAddr); // Third pass through ROB -- seen count = 2 reorderBuffer.reserve(uopPtr); - EXPECT_CALL(*uop, isBranch()).Times(1); + EXPECT_CALL(*uop, isBranch()).Times(2); + EXPECT_CALL(predictor, + update(4096, uop->wasBranchTaken(), uop->getBranchAddress(), + uop->getBranchType(), uop->getInstructionId())); reorderBuffer.commit(1); EXPECT_NE(loopBoundaryAddr, insnAddr); // Fourth pass through ROB -- seen count = 3; exceeds detection theshold, // loopBoundaryAddr updated reorderBuffer.reserve(uopPtr); - EXPECT_CALL(*uop, isBranch()).Times(1); + EXPECT_CALL(*uop, isBranch()).Times(2); + EXPECT_CALL(predictor, + update(4096, uop->wasBranchTaken(), uop->getBranchAddress(), + uop->getBranchType(), uop->getInstructionId())); reorderBuffer.commit(1); EXPECT_EQ(loopBoundaryAddr, insnAddr); @@ -397,28 +410,43 @@ TEST_F(ReorderBufferTest, branch) { // Re-do loop detecition // First pass through ROB -- seen count reset to 0 as new branch reorderBuffer.reserve(uopPtr); - EXPECT_CALL(*uop, isBranch()).Times(1); + EXPECT_CALL(*uop, isBranch()).Times(2); + EXPECT_CALL(predictor, + update(4096, uop->wasBranchTaken(), uop->getBranchAddress(), + uop->getBranchType(), uop->getInstructionId())); reorderBuffer.commit(1); EXPECT_NE(loopBoundaryAddr, insnAddr); // Second pass through ROB -- seen count = 1 reorderBuffer.reserve(uopPtr); - EXPECT_CALL(*uop, isBranch()).Times(1); + EXPECT_CALL(*uop, isBranch()).Times(2); + EXPECT_CALL(predictor, + update(4096, uop->wasBranchTaken(), uop->getBranchAddress(), + uop->getBranchType(), uop->getInstructionId())); reorderBuffer.commit(1); EXPECT_NE(loopBoundaryAddr, insnAddr); // Third pass through ROB -- seen count = 2 reorderBuffer.reserve(uopPtr); - EXPECT_CALL(*uop, isBranch()).Times(1); + EXPECT_CALL(*uop, isBranch()).Times(2); + EXPECT_CALL(predictor, + update(4096, uop->wasBranchTaken(), uop->getBranchAddress(), + uop->getBranchType(), uop->getInstructionId())); reorderBuffer.commit(1); EXPECT_NE(loopBoundaryAddr, insnAddr); - // Fourth pass through ROB -- seen count = 3; exceeds detection theshold, + // Fourth pass through ROB -- seen count = 3; exceeds detection threshold, // loopBoundaryAddr updated reorderBuffer.reserve(uopPtr); - EXPECT_CALL(*uop, isBranch()).Times(1); + EXPECT_CALL(*uop, isBranch()).Times(2); + EXPECT_CALL(predictor, + update(4096, uop->wasBranchTaken(), uop->getBranchAddress(), + uop->getBranchType(), uop->getInstructionId())); reorderBuffer.commit(1); EXPECT_EQ(loopBoundaryAddr, insnAddr); + + // Check that branch misprediction metrics have been correctly collected + EXPECT_EQ(reorderBuffer.getBranchMispredictedCount(), 8); } // Tests that only those destination registers which have been renamed are