Discussion:
[gem5-dev] Change in gem5/gem5[master]: cpu: split LTAGE implementation into a base TAGE and a derived LTAGE
(too old to reply)
Pau Cabre (Gerrit)
2018-11-20 00:09:38 UTC
Permalink
Pau Cabre has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/14435


Change subject: cpu: split LTAGE implementation into a base TAGE and a
derived LTAGE
......................................................................

cpu: split LTAGE implementation into a base TAGE and a derived LTAGE

The new derived LTAGE is equivalent to the original LTAGE implementation
The default values of the TAGE branch predictor match the settings of the
8C-TAGE configuration described in https://www.jilp.org/vol8/v8paper1.pdf

Change-Id: I8323adbfd5c9a45db23cfff234218280e639f9ed
Signed-off-by: Pau Cabre <***@metempsy.com>
---
M src/cpu/pred/BranchPredictor.py
M src/cpu/pred/SConscript
M src/cpu/pred/ltage.cc
M src/cpu/pred/ltage.hh
A src/cpu/pred/tage.cc
A src/cpu/pred/tage.hh
6 files changed, 1,110 insertions(+), 828 deletions(-)



diff --git a/src/cpu/pred/BranchPredictor.py
b/src/cpu/pred/BranchPredictor.py
index 9d83abb..2c622cd 100644
--- a/src/cpu/pred/BranchPredictor.py
+++ b/src/cpu/pred/BranchPredictor.py
@@ -87,34 +87,55 @@
choicePredictorSize = Param.Unsigned(8192, "Size of choice predictor")
choiceCtrBits = Param.Unsigned(2, "Bits of choice counters")

-class LTAGE(BranchPredictor):
+# TAGE branch predictor as described in
https://www.jilp.org/vol8/v8paper1.pdf
+# The default sizes below are for the 8C-TAGE configuration (63.5 Kbits)
+class TAGE(BranchPredictor):
+ type = 'TAGE'
+ cxx_class = 'TAGE'
+ cxx_header = "cpu/pred/tage.hh"
+
+ nHistoryTables = Param.Unsigned(7, "Number of history tables")
+ minHist = Param.Unsigned(5, "Minimum history size of LTAGE")
+ maxHist = Param.Unsigned(130, "Maximum history size of LTAGE")
+
+ tagTableTagWidths = VectorParam.Unsigned(
+ [0, 9, 9, 10, 10, 11, 11, 12], "Tag size in TAGE tag tables")
+ logTagTableSizes = VectorParam.Int(
+ [13, 9, 9, 9, 9, 9, 9, 9], "Log2 of TAGE table sizes")
+ logRatioBiModalHystEntries = Param.Unsigned(2,
+ "Log num of prediction entries for a shared hysteresis bit " \
+ "for the Bimodal")
+
+ tagTableCounterBits = Param.Unsigned(3, "Number of tag table counter
bits")
+ tagTableUBits = Param.Unsigned(2, "Number of tag table u bits")
+
+ histBufferSize = Param.Unsigned(2097152,
+ "A large number to track all branch histories(2MEntries
default)")
+
+ pathHistBits = Param.Unsigned(16, "Path history size")
+ logUResetPeriod = Param.Unsigned(18,
+ "Log period in number of branches to reset TAGE useful counters")
+ useAltOnNaBits = Param.Unsigned(4, "Size of the USE_ALT_ON_NA counter")
+
+
+# LTAGE branch predictor as described in
+# https://www.irisa.fr/caps/people/seznec/L-TAGE.pdf
+# It is basically a TAGE predictor plus a loop predictor
+# The differnt TAGE sizes are updated according to the paper values (256
Kbits)
+class LTAGE(TAGE):
type = 'LTAGE'
cxx_class = 'LTAGE'
cxx_header = "cpu/pred/ltage.hh"

- logRatioBiModalHystEntries = Param.Unsigned(2,
- "Log num of prediction entries for a shared hysteresis bit " \
- "for the Bimodal")
- logSizeLoopPred = Param.Unsigned(8, "Log size of the loop predictor")
- nHistoryTables = Param.Unsigned(12, "Number of history tables")
- tagTableCounterBits = Param.Unsigned(3, "Number of tag table counter
bits")
- tagTableUBits = Param.Unsigned(2, "Number of tag table u bits")
- histBufferSize = Param.Unsigned(2097152,
- "A large number to track all branch histories(2MEntries
default)")
- minHist = Param.Unsigned(4, "Minimum history size of LTAGE")
- maxHist = Param.Unsigned(640, "Maximum history size of LTAGE")
- pathHistBits = Param.Unsigned(16, "Path history size")
- tagTableTagWidths = VectorParam.Unsigned(
- [0, 7, 7, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15],
- "Tag size in TAGE tag tables")
- logTagTableSizes = VectorParam.Int(
- [14, 10, 10, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9],
- "Log2 of TAGE table sizes")
- logUResetPeriod = Param.Unsigned(19,
- "Log period in number of branches to reset TAGE useful counters")
- useAltOnNaBits = Param.Unsigned(4, "Size of the USE_ALT_ON_NA counter")
- withLoopBits = Param.Unsigned(7, "Size of the WITHLOOP counter")
+ nHistoryTables = 12
+ minHist = 4
+ maxHist = 640
+ tagTableTagWidths = [0, 7, 7, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15]
+ logTagTableSizes = [14, 10, 10, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9]
+ logUResetPeriod = 19

+ logSizeLoopPred = Param.Unsigned(8, "Log size of the loop predictor")
+ withLoopBits = Param.Unsigned(7, "Size of the WITHLOOP counter")
loopTableAgeBits = Param.Unsigned(8, "Number of age bits per loop
entry")
loopTableConfidenceBits = Param.Unsigned(2,
"Number of confidence bits per loop entry")
diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript
index 1cdf7bb..b0e6309 100644
--- a/src/cpu/pred/SConscript
+++ b/src/cpu/pred/SConscript
@@ -43,7 +43,9 @@
Source('ras.cc')
Source('tournament.cc')
Source ('bi_mode.cc')
+Source('tage.cc')
Source('ltage.cc')
DebugFlag('FreeList')
DebugFlag('Branch')
+DebugFlag('Tage')
DebugFlag('LTage')
diff --git a/src/cpu/pred/ltage.cc b/src/cpu/pred/ltage.cc
index d6cc087..3be0193 100644
--- a/src/cpu/pred/ltage.cc
+++ b/src/cpu/pred/ltage.cc
@@ -48,16 +48,8 @@
#include "debug/LTage.hh"

LTAGE::LTAGE(const LTAGEParams *params)
- : BPredUnit(params),
- logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
+ : TAGE(params),
logSizeLoopPred(params->logSizeLoopPred),
- nHistoryTables(params->nHistoryTables),
- tagTableCounterBits(params->tagTableCounterBits),
- tagTableUBits(params->tagTableUBits),
- histBufferSize(params->histBufferSize),
- minHist(params->minHist),
- maxHist(params->maxHist),
- pathHistBits(params->pathHistBits),
loopTableAgeBits(params->loopTableAgeBits),
loopTableConfidenceBits(params->loopTableConfidenceBits),
loopTableTagBits(params->loopTableTagBits),
@@ -66,18 +58,9 @@
confidenceThreshold((1 << loopTableConfidenceBits) - 1),
loopTagMask((1 << loopTableTagBits) - 1),
loopNumIterMask((1 << loopTableIterBits) - 1),
- tagTableTagWidths(params->tagTableTagWidths),
- logTagTableSizes(params->logTagTableSizes),
- threadHistory(params->numThreads),
- logUResetPeriod(params->logUResetPeriod),
- useAltOnNaBits(params->useAltOnNaBits),
+ loopUseCounter(0),
withLoopBits(params->withLoopBits)
{
- // Current method for periodically resetting the u counter bits only
- // works for 1 or 2 bits
- // Also make sure that it is not 0
- assert(tagTableUBits <= 2 && (tagTableUBits > 0));
-
// we use uint16_t type for these vales, so they cannot be more than
// 16 bits
assert(loopTableTagBits <= 16);
@@ -85,81 +68,7 @@

assert(logSizeLoopPred >= logLoopTableAssoc);

- // we use int type for the path history, so it cannot be more than
- // its size
- assert(pathHistBits <= (sizeof(int)*8));
-
- // initialize the counter to half of the period
- assert(logUResetPeriod != 0);
- tCounter = ULL(1) << (logUResetPeriod - 1);
-
- assert(params->histBufferSize > params->maxHist * 2);
- useAltPredForNewlyAllocated = 0;
-
- for (auto& history : threadHistory) {
- history.pathHist = 0;
- history.globalHistory = new uint8_t[histBufferSize];
- history.gHist = history.globalHistory;
- memset(history.gHist, 0, histBufferSize);
- history.ptGhist = 0;
- }
-
- histLengths = new int [nHistoryTables+1];
- histLengths[1] = minHist;
- histLengths[nHistoryTables] = maxHist;
-
- for (int i = 2; i <= nHistoryTables; i++) {
- histLengths[i] = (int) (((double) minHist *
- pow ((double) (maxHist) / (double) minHist,
- (double) (i - 1) / (double) ((nHistoryTables- 1))))
- + 0.5);
- }
-
- assert(tagTableTagWidths.size() == (nHistoryTables+1));
- assert(logTagTableSizes.size() == (nHistoryTables+1));
-
- // First entry is for the Bimodal table and it is untagged in this
- // implementation
- assert(tagTableTagWidths[0] == 0);
-
- for (auto& history : threadHistory) {
- history.computeIndices = new FoldedHistory[nHistoryTables+1];
- history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
- history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
-
- for (int i = 1; i <= nHistoryTables; i++) {
- history.computeIndices[i].init(
- histLengths[i], (logTagTableSizes[i]));
- history.computeTags[0][i].init(
- history.computeIndices[i].origLength,
tagTableTagWidths[i]);
- history.computeTags[1][i].init(
- history.computeIndices[i].origLength,
tagTableTagWidths[i]-1);
- DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
- histLengths[i], logTagTableSizes[i],
tagTableTagWidths[i]);
- }
- }
-
- const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0];
- btablePrediction.resize(bimodalTableSize, false);
- btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
- true);
-
ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
- gtable = new TageEntry*[nHistoryTables + 1];
- for (int i = 1; i <= nHistoryTables; i++) {
- gtable[i] = new TageEntry[1<<(logTagTableSizes[i])];
- }
-
- tableIndices = new int [nHistoryTables+1];
- tableTags = new int [nHistoryTables+1];
-
- loopUseCounter = 0;
-}
-
-int
-LTAGE::bindex(Addr pc_in) const
-{
- return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) -
1));
}

int
@@ -176,113 +85,9 @@
return (((pc_in >> instShiftAmt) & mask) << logLoopTableAssoc);
}

-int
-LTAGE::F(int A, int size, int bank) const
-{
- int A1, A2;
-
- A = A & ((ULL(1) << size) - 1);
- A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1));
- A2 = (A >> logTagTableSizes[bank]);
- A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
- + (A2 >> (logTagTableSizes[bank] - bank));
- A = A1 ^ A2;
- A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
- + (A >> (logTagTableSizes[bank] - bank));
- return (A);
-}
-
-
-// gindex computes a full hash of pc, ghist and pathHist
-int
-LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
-{
- int index;
- int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits :
- histLengths[bank];
- const Addr shiftedPc = pc >> instShiftAmt;
- index =
- shiftedPc ^
- (shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^
- threadHistory[tid].computeIndices[bank].comp ^
- F(threadHistory[tid].pathHist, hlen, bank);
-
- return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1));
-}
-
-
-// Tag computation
-uint16_t
-LTAGE::gtag(ThreadID tid, Addr pc, int bank) const
-{
- int tag = (pc >> instShiftAmt) ^
- threadHistory[tid].computeTags[0][bank].comp ^
- (threadHistory[tid].computeTags[1][bank].comp << 1);
-
- return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1));
-}
-
-
-// Up-down saturating counter
-void
-LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
-{
- assert(nbits <= sizeof(int8_t) << 3);
- if (taken) {
- if (ctr < ((1 << (nbits - 1)) - 1))
- ctr++;
- } else {
- if (ctr > -(1 << (nbits - 1)))
- ctr--;
- }
-}
-
-// Up-down unsigned saturating counter
-void
-LTAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits)
-{
- assert(nbits <= sizeof(uint8_t) << 3);
- if (up) {
- if (ctr < ((1 << nbits) - 1))
- ctr++;
- } else {
- if (ctr)
- ctr--;
- }
-}
-
-// Bimodal prediction
-bool
-LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
-{
- return btablePrediction[bi->bimodalIndex];
-}
-
-
-// Update the bimodal predictor: a hysteresis bit is shared among N
prediction
-// bits (N = 2 ^ logRatioBiModalHystEntries)
-void
-LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi)
-{
- int inter = (btablePrediction[bi->bimodalIndex] << 1)
- + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries];
- if (taken) {
- if (inter < 3)
- inter++;
- } else if (inter > 0) {
- inter--;
- }
- const bool pred = inter >> 1;
- const bool hyst = inter & 1;
- btablePrediction[bi->bimodalIndex] = pred;
- btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] =
hyst;
- DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred,
hyst);
-}
-
-
//loop prediction: only used if high confidence
bool
-LTAGE::getLoop(Addr pc, BranchInfo* bi) const
+LTAGE::getLoop(Addr pc, LTageBranchInfo* bi) const
{
bi->loopHit = -1;
bi->loopPredValid = false;
@@ -308,7 +113,7 @@
}

void
-LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi)
+LTAGE::specLoopUpdate(Addr pc, bool taken, LTageBranchInfo* bi)
{
if (bi->loopHit>=0) {
int index = lindex(pc);
@@ -322,7 +127,7 @@
}

void
-LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
+LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi)
{
int idx = bi->loopIndex + bi->loopHit;
if (bi->loopHit >= 0) {
@@ -409,319 +214,57 @@

}

-// shifting the global history: we manage the history in a big table in
order
-// to reduce simulation time
-void
-LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
-{
- if (pt == 0) {
- DPRINTF(LTage, "Rolling over the histories\n");
- // Copy beginning of globalHistoryBuffer to end, such that
- // the last maxHist outcomes are still reachable
- // through pt[0 .. maxHist - 1].
- for (int i = 0; i < maxHist; i++)
- tab[histBufferSize - maxHist + i] = tab[i];
- pt = histBufferSize - maxHist;
- h = &tab[pt];
- }
- pt--;
- h--;
- h[0] = (dir) ? 1 : 0;
-}
-
-// Get GHR for hashing indirect predictor
-// Build history backwards from pointer in
-// bp_history.
-unsigned
-LTAGE::getGHR(ThreadID tid, void *bp_history) const
-{
- BranchInfo* bi = static_cast<BranchInfo*>(bp_history);
- unsigned val = 0;
- for (unsigned i = 0; i < 32; i++) {
- // Make sure we don't go out of bounds
- int gh_offset = bi->ptGhist + i;
- assert(&(threadHistory[tid].globalHistory[gh_offset]) <
- threadHistory[tid].globalHistory + histBufferSize);
- val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
- }
-
- return val;
-}
-
//prediction
bool
LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
{
- BranchInfo *bi = new BranchInfo(nHistoryTables+1);
+ LTageBranchInfo *bi = new LTageBranchInfo(nHistoryTables+1);
b = (void*)(bi);
- Addr pc = branch_pc;
- bool pred_taken = true;
- bi->loopHit = -1;
+
+ bool pred_taken = tagePredict(tid, branch_pc, cond_branch, bi);

if (cond_branch) {
- // TAGE prediction
+ bi->loopPred = getLoop(branch_pc, bi); // loop prediction

- // computes the table addresses and the partial tags
- for (int i = 1; i <= nHistoryTables; i++) {
- tableIndices[i] = gindex(tid, pc, i);
- bi->tableIndices[i] = tableIndices[i];
- tableTags[i] = gtag(tid, pc, i);
- bi->tableTags[i] = tableTags[i];
+ if ((loopUseCounter >= 0) && bi->loopPredValid) {
+ pred_taken = bi->loopPred;
}
-
- bi->bimodalIndex = bindex(pc);
-
- bi->hitBank = 0;
- bi->altBank = 0;
- //Look for the bank with longest matching history
- for (int i = nHistoryTables; i > 0; i--) {
- if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
- bi->hitBank = i;
- bi->hitBankIndex = tableIndices[bi->hitBank];
- break;
- }
- }
- //Look for the alternate bank
- for (int i = bi->hitBank - 1; i > 0; i--) {
- if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
- bi->altBank = i;
- bi->altBankIndex = tableIndices[bi->altBank];
- break;
- }
- }
- //computes the prediction and the alternate prediction
- if (bi->hitBank > 0) {
- if (bi->altBank > 0) {
- bi->altTaken =
- gtable[bi->altBank][tableIndices[bi->altBank]].ctr >=
0;
- }else {
- bi->altTaken = getBimodePred(pc, bi);
- }
-
- bi->longestMatchPred =
- gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
- bi->pseudoNewAlloc =
- abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <=
1;
-
- //if the entry is recognized as a newly allocated entry and
- //useAltPredForNewlyAllocated is positive use the alternate
- //prediction
- if ((useAltPredForNewlyAllocated < 0)
- || abs(2 *
- gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1)
1)
- bi->tagePred = bi->longestMatchPred;
- else
- bi->tagePred = bi->altTaken;
- } else {
- bi->altTaken = getBimodePred(pc, bi);
- bi->tagePred = bi->altTaken;
- bi->longestMatchPred = bi->altTaken;
- }
- //end TAGE prediction
-
- bi->loopPred = getLoop(pc, bi); // loop prediction
-
- pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ?
- (bi->loopPred): (bi->tagePred);
DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, "
"loopValid?:%d, loopUseCounter:%d, tagePred:%d,
altPred:%d\n",
branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
loopUseCounter, bi->tagePred, bi->altTaken);
}
- bi->branchPC = branch_pc;
- bi->condBranch = cond_branch;
+
specLoopUpdate(branch_pc, pred_taken, bi);
return pred_taken;
}

-// PREDICTOR UPDATE
void
-LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
- bool squashed)
+LTAGE::condBranchUpdate(Addr branch_pc, bool taken,
+ TageBranchInfo* tage_bi, int nrand)
{
- assert(bp_history);
+ LTageBranchInfo* bi = static_cast<LTageBranchInfo*>(tage_bi);

- BranchInfo *bi = static_cast<BranchInfo*>(bp_history);
+ // first update the loop predictor
+ loopUpdate(branch_pc, taken, bi);

- if (squashed) {
- // This restores the global history, then update it
- // and recomputes the folded histories.
- squash(tid, taken, bp_history);
- return;
+ if (bi->loopPredValid) {
+ if (bi->tagePred != bi->loopPred) {
+ ctrUpdate(loopUseCounter,
+ (bi->loopPred == taken),
+ withLoopBits);
+ }
}

- int nrand = random_mt.random<int>(0,3);
- Addr pc = branch_pc;
- if (bi->condBranch) {
- DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
- branch_pc, taken);
- // first update the loop predictor
- loopUpdate(pc, taken, bi);
-
- if (bi->loopPredValid) {
- if (bi->tagePred != bi->loopPred) {
- ctrUpdate(loopUseCounter,
- (bi->loopPred == taken),
- withLoopBits);
- }
- }
-
- // TAGE UPDATE
- // try to allocate a new entries only if prediction was wrong
- bool longest_match_pred = false;
- bool alloc = (bi->tagePred != taken) && (bi->hitBank <
nHistoryTables);
- if (bi->hitBank > 0) {
- // Manage the selection between longest matching and alternate
- // matching for "pseudo"-newly allocated longest matching entry
- longest_match_pred = bi->longestMatchPred;
- bool PseudoNewAlloc = bi->pseudoNewAlloc;
- // an entry is considered as newly allocated if its prediction
- // counter is weak
- if (PseudoNewAlloc) {
- if (longest_match_pred == taken) {
- alloc = false;
- }
- // if it was delivering the correct prediction, no need to
- // allocate new entry even if the overall prediction was
false
- if (longest_match_pred != bi->altTaken) {
- ctrUpdate(useAltPredForNewlyAllocated,
- bi->altTaken == taken, useAltOnNaBits);
- }
- }
- }
-
- if (alloc) {
- // is there some "unuseful" entry to allocate
- uint8_t min = 1;
- for (int i = nHistoryTables; i > bi->hitBank; i--) {
- if (gtable[i][bi->tableIndices[i]].u < min) {
- min = gtable[i][bi->tableIndices[i]].u;
- }
- }
-
- // we allocate an entry with a longer history
- // to avoid ping-pong, we do not choose systematically the
next
- // entry, but among the 3 next entries
- int Y = nrand &
- ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
- int X = bi->hitBank + 1;
- if (Y & 1) {
- X++;
- if (Y & 2)
- X++;
- }
- // No entry available, forces one to be available
- if (min > 0) {
- gtable[X][bi->tableIndices[X]].u = 0;
- }
-
-
- //Allocate only one entry
- for (int i = X; i <= nHistoryTables; i++) {
- if ((gtable[i][bi->tableIndices[i]].u == 0)) {
- gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
- gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
- break;
- }
- }
- }
- //periodic reset of u: reset is not complete but bit by bit
- tCounter++;
- if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) {
- // reset least significant bit
- // most significant bit becomes least significant bit
- for (int i = 1; i <= nHistoryTables; i++) {
- for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) {
- gtable[i][j].u = gtable[i][j].u >> 1;
- }
- }
- }
-
- if (bi->hitBank > 0) {
- DPRINTF(LTage, "Updating tag table entry (%d,%d) for
branch %lx\n",
- bi->hitBank, bi->hitBankIndex, branch_pc);
- ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
- tagTableCounterBits);
- // if the provider entry is not certified to be useful also
update
- // the alternate prediction
- if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
- if (bi->altBank > 0) {
- ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr,
taken,
- tagTableCounterBits);
- DPRINTF(LTage, "Updating tag table entry (%d,%d) for"
- " branch %lx\n", bi->hitBank, bi->hitBankIndex,
- branch_pc);
- }
- if (bi->altBank == 0) {
- baseUpdate(pc, taken, bi);
- }
- }
-
- // update the u counter
- if (bi->tagePred != bi->altTaken) {
- unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u,
- bi->tagePred == taken, tagTableUBits);
- }
- } else {
- baseUpdate(pc, taken, bi);
- }
-
- //END PREDICTOR UPDATE
- }
- if (!squashed) {
- delete bi;
- }
-}
-
-void
-LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
-{
- BranchInfo* bi = (BranchInfo*)(b);
- ThreadHistory& tHist = threadHistory[tid];
- // UPDATE HISTORIES
- bool pathbit = ((branch_pc >> instShiftAmt) & 1);
- //on a squash, return pointers to this and recompute indices.
- //update user history
- updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
- tHist.pathHist = (tHist.pathHist << 1) + pathbit;
- tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1));
-
- bi->ptGhist = tHist.ptGhist;
- bi->pathHist = tHist.pathHist;
- //prepare next index and tag computations for user branchs
- for (int i = 1; i <= nHistoryTables; i++)
- {
- bi->ci[i] = tHist.computeIndices[i].comp;
- bi->ct0[i] = tHist.computeTags[0][i].comp;
- bi->ct1[i] = tHist.computeTags[1][i].comp;
- tHist.computeIndices[i].update(tHist.gHist);
- tHist.computeTags[0][i].update(tHist.gHist);
- tHist.computeTags[1][i].update(tHist.gHist);
- }
- DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, "
- "path Hist: %x; pointer:%d\n", branch_pc, taken,
tHist.pathHist,
- tHist.ptGhist);
+ TAGE::condBranchUpdate(branch_pc, taken, bi, nrand);
}

void
LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
{
- BranchInfo* bi = (BranchInfo*)(bp_history);
- ThreadHistory& tHist = threadHistory[tid];
- DPRINTF(LTage, "Restoring branch info: %lx; taken? %d;
PathHistory:%x, "
- "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
- tHist.pathHist = bi->pathHist;
- tHist.ptGhist = bi->ptGhist;
- tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
- tHist.gHist[0] = (taken ? 1 : 0);
- for (int i = 1; i <= nHistoryTables; i++) {
- tHist.computeIndices[i].comp = bi->ci[i];
- tHist.computeTags[0][i].comp = bi->ct0[i];
- tHist.computeTags[1][i].comp = bi->ct1[i];
- tHist.computeIndices[i].update(tHist.gHist);
- tHist.computeTags[0][i].update(tHist.gHist);
- tHist.computeTags[1][i].update(tHist.gHist);
- }
+ TAGE::squash(tid, taken, bp_history);
+
+ LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history);

if (bi->condBranch) {
if (bi->loopHit >= 0) {
@@ -729,14 +272,12 @@
ltable[idx].currentIterSpec = bi->currentIter;
}
}
-
}

void
LTAGE::squash(ThreadID tid, void *bp_history)
{
- BranchInfo* bi = (BranchInfo*)(bp_history);
- DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC);
+ LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history);
if (bi->condBranch) {
if (bi->loopHit >= 0) {
int idx = bi->loopIndex + bi->loopHit;
@@ -744,48 +285,7 @@
}
}

- delete bi;
-}
-
-bool
-LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
-{
- bool retval = predict(tid, branch_pc, true, bp_history);
-
- DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
- updateHistories(tid, branch_pc, retval, bp_history);
- assert(threadHistory[tid].gHist ==
- &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
-
- return retval;
-}
-
-void
-LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
-{
- BranchInfo* bi = (BranchInfo*) bp_history;
- ThreadHistory& tHist = threadHistory[tid];
- DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc);
- assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
- tHist.gHist[0] = 0;
- for (int i = 1; i <= nHistoryTables; i++) {
- tHist.computeIndices[i].comp = bi->ci[i];
- tHist.computeTags[0][i].comp = bi->ct0[i];
- tHist.computeTags[1][i].comp = bi->ct1[i];
- tHist.computeIndices[i].update(tHist.gHist);
- tHist.computeTags[0][i].update(tHist.gHist);
- tHist.computeTags[1][i].update(tHist.gHist);
- }
-}
-
-void
-LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
-{
- DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc);
- predict(tid, br_pc, false, bp_history);
- updateHistories(tid, br_pc, true, bp_history);
- assert(threadHistory[tid].gHist ==
- &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
+ TAGE::squash(tid, bp_history);
}

LTAGE*
diff --git a/src/cpu/pred/ltage.hh b/src/cpu/pred/ltage.hh
index 6929ad0..d614026 100644
--- a/src/cpu/pred/ltage.hh
+++ b/src/cpu/pred/ltage.hh
@@ -52,25 +52,20 @@
#ifndef __CPU_PRED_LTAGE
#define __CPU_PRED_LTAGE

+
#include <vector>

#include "base/types.hh"
-#include "cpu/pred/bpred_unit.hh"
+#include "cpu/pred/tage.hh"
#include "params/LTAGE.hh"

-class LTAGE: public BPredUnit
+class LTAGE: public TAGE
{
public:
LTAGE(const LTAGEParams *params);

// Base class methods.
- void uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
override;
- bool lookup(ThreadID tid, Addr branch_addr, void* &bp_history)
override;
- void btbUpdate(ThreadID tid, Addr branch_addr, void* &bp_history)
override;
- void update(ThreadID tid, Addr branch_addr, bool taken, void
*bp_history,
- bool squashed) override;
void squash(ThreadID tid, void *bp_history) override;
- unsigned getGHR(ThreadID tid, void *bp_history) const override;

private:
// Prediction Structures
@@ -89,189 +84,40 @@
confidence(0), tag(0), age(0), dir(0) { }
};

- // Tage Entry
- struct TageEntry
- {
- int8_t ctr;
- uint16_t tag;
- uint8_t u;
- TageEntry() : ctr(0), tag(0), u(0) { }
- };
-
- // Folded History Table - compressed history
- // to mix with instruction PC to index partially
- // tagged tables.
- struct FoldedHistory
- {
- unsigned comp;
- int compLength;
- int origLength;
- int outpoint;
-
- void init(int original_length, int compressed_length)
- {
- comp = 0;
- origLength = original_length;
- compLength = compressed_length;
- outpoint = original_length % compressed_length;
- }
-
- void update(uint8_t * h)
- {
- comp = (comp << 1) | h[0];
- comp ^= h[origLength] << outpoint;
- comp ^= (comp >> compLength);
- comp &= (ULL(1) << compLength) - 1;
- }
- };
-
// Primary branch history entry
- struct BranchInfo
+ struct LTageBranchInfo : public TageBranchInfo
{
- int pathHist;
- int ptGhist;
- int hitBank;
- int hitBankIndex;
- int altBank;
- int altBankIndex;
- int bimodalIndex;
uint16_t loopTag;
uint16_t currentIter;

- bool tagePred;
- bool altTaken;
bool loopPred;
bool loopPredValid;
int loopIndex;
int loopHit;
- bool condBranch;
- bool longestMatchPred;
- bool pseudoNewAlloc;
- Addr branchPC;

- // Pointer to dynamically allocated storage
- // to save table indices and folded histories.
- // To do one call to new instead of five.
- int *storage;
-
- // Pointers to actual saved array within the dynamically
- // allocated storage.
- int *tableIndices;
- int *tableTags;
- int *ci;
- int *ct0;
- int *ct1;
-
- BranchInfo(int sz)
- : pathHist(0), ptGhist(0),
- hitBank(0), hitBankIndex(0),
- altBank(0), altBankIndex(0),
- bimodalIndex(0), loopTag(0), currentIter(0),
- tagePred(false), altTaken(false), loopPred(false),
- loopPredValid(false), loopIndex(0), loopHit(0),
- condBranch(false), longestMatchPred(false),
- pseudoNewAlloc(false), branchPC(0)
- {
- storage = new int [sz * 5];
- tableIndices = storage;
- tableTags = storage + sz;
- ci = tableTags + sz;
- ct0 = ci + sz;
- ct1 = ct0 + sz;
- }
-
- ~BranchInfo()
- {
- delete[] storage;
- }
+ LTageBranchInfo(int sz)
+ : TageBranchInfo(sz),
+ loopTag(0), currentIter(0),
+ loopPred(false),
+ loopPredValid(false), loopIndex(0), loopHit(0)
+ {}
};

/**
* Computes the index used to access the
- * bimodal table.
- * @param pc_in The unshifted branch PC.
- */
- int bindex(Addr pc_in) const;
-
- /**
- * Computes the index used to access the
* loop predictor.
* @param pc_in The unshifted branch PC.
*/
int lindex(Addr pc_in) const;

/**
- * Computes the index used to access a
- * partially tagged table.
- * @param tid The thread ID used to select the
- * global histories to use.
- * @param pc The unshifted branch PC.
- * @param bank The partially tagged table to access.
- */
- inline int gindex(ThreadID tid, Addr pc, int bank) const;
-
- /**
- * Utility function to shuffle the path history
- * depending on which tagged table we are accessing.
- * @param phist The path history.
- * @param size Number of path history bits to use.
- * @param bank The partially tagged table to access.
- */
- int F(int phist, int size, int bank) const;
-
- /**
- * Computes the partial tag of a tagged table.
- * @param tid the thread ID used to select the
- * global histories to use.
- * @param pc The unshifted branch PC.
- * @param bank The partially tagged table to access.
- */
- inline uint16_t gtag(ThreadID tid, Addr pc, int bank) const;
-
- /**
- * Updates a direction counter based on the actual
- * branch outcome.
- * @param ctr Reference to counter to update.
- * @param taken Actual branch outcome.
- * @param nbits Counter width.
- */
- void ctrUpdate(int8_t & ctr, bool taken, int nbits);
-
- /**
- * Updates an unsigned counter based on up/down parameter
- * @param ctr Reference to counter to update.
- * @param up Boolean indicating if the counter is
incremented/decremented
- * If true it is incremented, if false it is decremented
- * @param nbits Counter width.
- */
- void unsignedCtrUpdate(uint8_t & ctr, bool taken, unsigned nbits);
-
- /**
- * Get a branch prediction from the bimodal
- * predictor.
- * @param pc The unshifted branch PC.
- * @param bi Pointer to information on the
- * prediction.
- */
- bool getBimodePred(Addr pc, BranchInfo* bi) const;
-
- /**
- * Updates the bimodal predictor.
- * @param pc The unshifted branch PC.
- * @param taken The actual branch outcome.
- * @param bi Pointer to information on the prediction
- * recorded at prediction time.
- */
- void baseUpdate(Addr pc, bool taken, BranchInfo* bi);
-
- /**
* Get a branch prediction from the loop
* predictor.
* @param pc The unshifted branch PC.
* @param bi Pointer to information on the
* prediction.
*/
- bool getLoop(Addr pc, BranchInfo* bi) const;
+ bool getLoop(Addr pc, LTageBranchInfo* bi) const;

/**
* Updates the loop predictor.
@@ -280,69 +126,7 @@
* @param bi Pointer to information on the
* prediction recorded at prediction time.
*/
- void loopUpdate(Addr pc, bool Taken, BranchInfo* bi);
-
- /**
- * (Speculatively) updates the global branch history.
- * @param h Reference to pointer to global branch history.
- * @param dir (Predicted) outcome to update the histories
- * with.
- * @param tab
- * @param PT Reference to path history.
- */
- void updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &PT);
-
- /**
- * Get a branch prediction from L-TAGE. *NOT* an override of
- * BpredUnit::predict().
- * @param tid The thread ID to select the global
- * histories to use.
- * @param branch_pc The unshifted branch PC.
- * @param cond_branch True if the branch is conditional.
- * @param b Reference to wrapping pointer to allow storing
- * derived class prediction information in the base class.
- */
- bool predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b);
-
- /**
- * Update L-TAGE. Called at execute to repair histories on a
misprediction
- * and at commit to update the tables.
- * @param tid The thread ID to select the global
- * histories to use.
- * @param branch_pc The unshifted branch PC.
- * @param taken Actual branch outcome.
- * @param bi Pointer to information on the prediction
- * recorded at prediction time.
- */
- void update(ThreadID tid, Addr branch_pc, bool taken, BranchInfo* bi);
-
- /**
- * (Speculatively) updates global histories (path and direction).
- * Also recomputes compressed (folded) histories based on the
- * branch direction.
- * @param tid The thread ID to select the histories
- * to update.
- * @param branch_pc The unshifted branch PC.
- * @param taken (Predicted) branch direction.
- * @param b Wrapping pointer to BranchInfo (to allow
- * storing derived class prediction information in the
- * base class).
- */
- void updateHistories(ThreadID tid, Addr branch_pc, bool taken, void*
b);
-
- /**
- * Restores speculatively updated path and direction histories.
- * Also recomputes compressed (folded) histories based on the
- * correct branch outcome.
- * This version of squash() is called once on a branch misprediction.
- * @param tid The Thread ID to select the histories to rollback.
- * @param taken The correct branch outcome.
- * @param bp_history Wrapping pointer to BranchInfo (to allow
- * storing derived class prediction information in the
- * base class).
- * @post bp_history points to valid memory.
- */
- void squash(ThreadID tid, bool taken, void *bp_history);
+ void loopUpdate(Addr pc, bool Taken, LTageBranchInfo* bi);

/**
* Speculatively updates the loop predictor
@@ -352,17 +136,48 @@
* @param bi Pointer to information on the prediction
* recorded at prediction time.
*/
- void specLoopUpdate(Addr pc, bool taken, BranchInfo* bi);
+ void specLoopUpdate(Addr pc, bool taken, LTageBranchInfo* bi);

- const unsigned logRatioBiModalHystEntries;
+ /**
+ * Update LTAGE for conditional branches.
+ * @param branch_pc The unshifted branch PC.
+ * @param taken Actual branch outcome.
+ * @param bi Pointer to information on the prediction
+ * recorded at prediction time.
+ * @nrand Random int number from 0 to 3
+ */
+ void condBranchUpdate(
+ Addr branch_pc, bool taken, TageBranchInfo* bi, int nrand)
override;
+
+ /**
+ * Get a branch prediction from LTAGE. *NOT* an override of
+ * BpredUnit::predict().
+ * @param tid The thread ID to select the global
+ * histories to use.
+ * @param branch_pc The unshifted branch PC.
+ * @param cond_branch True if the branch is conditional.
+ * @param b Reference to wrapping pointer to allow storing
+ * derived class prediction information in the base class.
+ */
+ bool predict(
+ ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) override;
+
+ /**
+ * Restores speculatively updated path and direction histories.
+ * Also recomputes compressed (folded) histories based on the
+ * correct branch outcome.
+ * This version of squash() is called once on a branch misprediction.
+ * @param tid The Thread ID to select the histories to rollback.
+ * @param taken The correct branch outcome.
+ * @param bp_history Wrapping pointer to TageBranchInfo (to allow
+ * storing derived class prediction information in the
+ * base class).
+ * @post bp_history points to valid memory.
+ */
+ void squash(
+ ThreadID tid, bool taken, void *bp_history) override;
+
const unsigned logSizeLoopPred;
- const unsigned nHistoryTables;
- const unsigned tagTableCounterBits;
- const unsigned tagTableUBits;
- const unsigned histBufferSize;
- const unsigned minHist;
- const unsigned maxHist;
- const unsigned pathHistBits;
const unsigned loopTableAgeBits;
const unsigned loopTableConfidenceBits;
const unsigned loopTableTagBits;
@@ -372,48 +187,9 @@
const uint16_t loopTagMask;
const uint16_t loopNumIterMask;

- const std::vector<unsigned> tagTableTagWidths;
- const std::vector<int> logTagTableSizes;
-
- std::vector<bool> btablePrediction;
- std::vector<bool> btableHysteresis;
- TageEntry **gtable;
LoopEntry *ltable;

- // Keep per-thread histories to
- // support SMT.
- struct ThreadHistory {
- // Speculative path history
- // (LSB of branch address)
- int pathHist;
-
- // Speculative branch direction
- // history (circular buffer)
- // @TODO Convert to std::vector<bool>
- uint8_t *globalHistory;
-
- // Pointer to most recent branch outcome
- uint8_t* gHist;
-
- // Index to most recent branch outcome
- int ptGhist;
-
- // Speculative folded histories.
- FoldedHistory *computeIndices;
- FoldedHistory *computeTags[2];
- };
-
- std::vector<ThreadHistory> threadHistory;
-
- int *histLengths;
- int *tableIndices;
- int *tableTags;
-
int8_t loopUseCounter;
- int8_t useAltPredForNewlyAllocated;
- uint64_t tCounter;
- uint64_t logUResetPeriod;
- unsigned useAltOnNaBits;
unsigned withLoopBits;
};

diff --git a/src/cpu/pred/tage.cc b/src/cpu/pred/tage.cc
new file mode 100644
index 0000000..c22e6b7
--- /dev/null
+++ b/src/cpu/pred/tage.cc
@@ -0,0 +1,609 @@
+/*
+ * Copyright (c) 2014 The University of Wisconsin
+ *
+ * Copyright (c) 2006 INRIA (Institut National de Recherche en
+ * Informatique et en Automatique / French National Research Institute
+ * for Computer Science and Applied Mathematics)
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
+ * from André Seznec's code.
+ */
+
+/* @file
+ * Implementation of a TAGE branch predictor
+ */
+
+#include "cpu/pred/tage.hh"
+
+#include "base/intmath.hh"
+#include "base/logging.hh"
+#include "base/random.hh"
+#include "base/trace.hh"
+#include "debug/Fetch.hh"
+#include "debug/Tage.hh"
+
+TAGE::TAGE(const TAGEParams *params)
+ : BPredUnit(params),
+ logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
+ nHistoryTables(params->nHistoryTables),
+ tagTableCounterBits(params->tagTableCounterBits),
+ tagTableUBits(params->tagTableUBits),
+ histBufferSize(params->histBufferSize),
+ minHist(params->minHist),
+ maxHist(params->maxHist),
+ pathHistBits(params->pathHistBits),
+ tagTableTagWidths(params->tagTableTagWidths),
+ logTagTableSizes(params->logTagTableSizes),
+ threadHistory(params->numThreads),
+ logUResetPeriod(params->logUResetPeriod),
+ useAltOnNaBits(params->useAltOnNaBits)
+{
+ // Current method for periodically resetting the u counter bits only
+ // works for 1 or 2 bits
+ // Also make sure that it is not 0
+ assert(tagTableUBits <= 2 && (tagTableUBits > 0));
+
+ // we use int type for the path history, so it cannot be more than
+ // its size
+ assert(pathHistBits <= (sizeof(int)*8));
+
+ // initialize the counter to half of the period
+ assert(logUResetPeriod != 0);
+ tCounter = ULL(1) << (logUResetPeriod - 1);
+
+ assert(params->histBufferSize > params->maxHist * 2);
+ useAltPredForNewlyAllocated = 0;
+
+ for (auto& history : threadHistory) {
+ history.pathHist = 0;
+ history.globalHistory = new uint8_t[histBufferSize];
+ history.gHist = history.globalHistory;
+ memset(history.gHist, 0, histBufferSize);
+ history.ptGhist = 0;
+ }
+
+ histLengths = new int [nHistoryTables+1];
+ histLengths[1] = minHist;
+ histLengths[nHistoryTables] = maxHist;
+
+ for (int i = 2; i <= nHistoryTables; i++) {
+ histLengths[i] = (int) (((double) minHist *
+ pow ((double) (maxHist) / (double) minHist,
+ (double) (i - 1) / (double) ((nHistoryTables- 1))))
+ + 0.5);
+ }
+
+ assert(tagTableTagWidths.size() == (nHistoryTables+1));
+ assert(logTagTableSizes.size() == (nHistoryTables+1));
+
+ // First entry is for the Bimodal table and it is untagged in this
+ // implementation
+ assert(tagTableTagWidths[0] == 0);
+
+ for (auto& history : threadHistory) {
+ history.computeIndices = new FoldedHistory[nHistoryTables+1];
+ history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
+ history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
+
+ for (int i = 1; i <= nHistoryTables; i++) {
+ history.computeIndices[i].init(
+ histLengths[i], (logTagTableSizes[i]));
+ history.computeTags[0][i].init(
+ history.computeIndices[i].origLength,
tagTableTagWidths[i]);
+ history.computeTags[1][i].init(
+ history.computeIndices[i].origLength,
tagTableTagWidths[i]-1);
+ DPRINTF(Tage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
+ histLengths[i], logTagTableSizes[i],
tagTableTagWidths[i]);
+ }
+ }
+
+ const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0];
+ btablePrediction.resize(bimodalTableSize, false);
+ btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
+ true);
+
+ gtable = new TageEntry*[nHistoryTables + 1];
+ for (int i = 1; i <= nHistoryTables; i++) {
+ gtable[i] = new TageEntry[1<<(logTagTableSizes[i])];
+ }
+
+ tableIndices = new int [nHistoryTables+1];
+ tableTags = new int [nHistoryTables+1];
+}
+
+int
+TAGE::bindex(Addr pc_in) const
+{
+ return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) -
1));
+}
+
+int
+TAGE::F(int A, int size, int bank) const
+{
+ int A1, A2;
+
+ A = A & ((ULL(1) << size) - 1);
+ A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1));
+ A2 = (A >> logTagTableSizes[bank]);
+ A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
+ + (A2 >> (logTagTableSizes[bank] - bank));
+ A = A1 ^ A2;
+ A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
+ + (A >> (logTagTableSizes[bank] - bank));
+ return (A);
+}
+
+
+// gindex computes a full hash of pc, ghist and pathHist
+int
+TAGE::gindex(ThreadID tid, Addr pc, int bank) const
+{
+ int index;
+ int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits :
+ histLengths[bank];
+ const Addr shiftedPc = pc >> instShiftAmt;
+ index =
+ shiftedPc ^
+ (shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^
+ threadHistory[tid].computeIndices[bank].comp ^
+ F(threadHistory[tid].pathHist, hlen, bank);
+
+ return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1));
+}
+
+
+// Tag computation
+uint16_t
+TAGE::gtag(ThreadID tid, Addr pc, int bank) const
+{
+ int tag = (pc >> instShiftAmt) ^
+ threadHistory[tid].computeTags[0][bank].comp ^
+ (threadHistory[tid].computeTags[1][bank].comp << 1);
+
+ return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1));
+}
+
+
+// Up-down saturating counter
+void
+TAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
+{
+ assert(nbits <= sizeof(int8_t) << 3);
+ if (taken) {
+ if (ctr < ((1 << (nbits - 1)) - 1))
+ ctr++;
+ } else {
+ if (ctr > -(1 << (nbits - 1)))
+ ctr--;
+ }
+}
+
+// Up-down unsigned saturating counter
+void
+TAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits)
+{
+ assert(nbits <= sizeof(uint8_t) << 3);
+ if (up) {
+ if (ctr < ((1 << nbits) - 1))
+ ctr++;
+ } else {
+ if (ctr)
+ ctr--;
+ }
+}
+
+// Bimodal prediction
+bool
+TAGE::getBimodePred(Addr pc, TageBranchInfo* bi) const
+{
+ return btablePrediction[bi->bimodalIndex];
+}
+
+
+// Update the bimodal predictor: a hysteresis bit is shared among N
prediction
+// bits (N = 2 ^ logRatioBiModalHystEntries)
+void
+TAGE::baseUpdate(Addr pc, bool taken, TageBranchInfo* bi)
+{
+ int inter = (btablePrediction[bi->bimodalIndex] << 1)
+ + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries];
+ if (taken) {
+ if (inter < 3)
+ inter++;
+ } else if (inter > 0) {
+ inter--;
+ }
+ const bool pred = inter >> 1;
+ const bool hyst = inter & 1;
+ btablePrediction[bi->bimodalIndex] = pred;
+ btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] =
hyst;
+ DPRINTF(Tage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred,
hyst);
+}
+
+// shifting the global history: we manage the history in a big table in
order
+// to reduce simulation time
+void
+TAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
+{
+ if (pt == 0) {
+ DPRINTF(Tage, "Rolling over the histories\n");
+ // Copy beginning of globalHistoryBuffer to end, such that
+ // the last maxHist outcomes are still reachable
+ // through pt[0 .. maxHist - 1].
+ for (int i = 0; i < maxHist; i++)
+ tab[histBufferSize - maxHist + i] = tab[i];
+ pt = histBufferSize - maxHist;
+ h = &tab[pt];
+ }
+ pt--;
+ h--;
+ h[0] = (dir) ? 1 : 0;
+}
+
+// Get GHR for hashing indirect predictor
+// Build history backwards from pointer in
+// bp_history.
+unsigned
+TAGE::getGHR(ThreadID tid, void *bp_history) const
+{
+ TageBranchInfo* bi = static_cast<TageBranchInfo*>(bp_history);
+ unsigned val = 0;
+ for (unsigned i = 0; i < 32; i++) {
+ // Make sure we don't go out of bounds
+ int gh_offset = bi->ptGhist + i;
+ assert(&(threadHistory[tid].globalHistory[gh_offset]) <
+ threadHistory[tid].globalHistory + histBufferSize);
+ val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
+ }
+
+ return val;
+}
+
+//prediction
+bool
+TAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
+{
+ TageBranchInfo *bi = new TageBranchInfo(nHistoryTables+1);
+ b = (void*)(bi);
+ return tagePredict(tid, branch_pc, cond_branch, bi);
+}
+
+bool
+TAGE::tagePredict(ThreadID tid, Addr branch_pc,
+ bool cond_branch, TageBranchInfo* bi)
+{
+ Addr pc = branch_pc;
+ bool pred_taken = true;
+
+ if (cond_branch) {
+ // TAGE prediction
+
+ // computes the table addresses and the partial tags
+ for (int i = 1; i <= nHistoryTables; i++) {
+ tableIndices[i] = gindex(tid, pc, i);
+ bi->tableIndices[i] = tableIndices[i];
+ tableTags[i] = gtag(tid, pc, i);
+ bi->tableTags[i] = tableTags[i];
+ }
+
+ bi->bimodalIndex = bindex(pc);
+
+ bi->hitBank = 0;
+ bi->altBank = 0;
+ //Look for the bank with longest matching history
+ for (int i = nHistoryTables; i > 0; i--) {
+ if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
+ bi->hitBank = i;
+ bi->hitBankIndex = tableIndices[bi->hitBank];
+ break;
+ }
+ }
+ //Look for the alternate bank
+ for (int i = bi->hitBank - 1; i > 0; i--) {
+ if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
+ bi->altBank = i;
+ bi->altBankIndex = tableIndices[bi->altBank];
+ break;
+ }
+ }
+ //computes the prediction and the alternate prediction
+ if (bi->hitBank > 0) {
+ if (bi->altBank > 0) {
+ bi->altTaken =
+ gtable[bi->altBank][tableIndices[bi->altBank]].ctr >=
0;
+ }else {
+ bi->altTaken = getBimodePred(pc, bi);
+ }
+
+ bi->longestMatchPred =
+ gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
+ bi->pseudoNewAlloc =
+ abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <=
1;
+
+ //if the entry is recognized as a newly allocated entry and
+ //useAltPredForNewlyAllocated is positive use the alternate
+ //prediction
+ if ((useAltPredForNewlyAllocated < 0)
+ || abs(2 *
+ gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1)
1)
+ bi->tagePred = bi->longestMatchPred;
+ else
+ bi->tagePred = bi->altTaken;
+ } else {
+ bi->altTaken = getBimodePred(pc, bi);
+ bi->tagePred = bi->altTaken;
+ bi->longestMatchPred = bi->altTaken;
+ }
+ //end TAGE prediction
+
+ pred_taken = (bi->tagePred);
+ DPRINTF(Tage, "Predict for %lx: taken?:%d, tagePred:%d,
altPred:%d\n",
+ branch_pc, pred_taken, bi->tagePred, bi->altTaken);
+ }
+ bi->branchPC = branch_pc;
+ bi->condBranch = cond_branch;
+ return pred_taken;
+}
+
+// PREDICTOR UPDATE
+void
+TAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
+ bool squashed)
+{
+ assert(bp_history);
+
+ TageBranchInfo *bi = static_cast<TageBranchInfo*>(bp_history);
+
+ if (squashed) {
+ // This restores the global history, then update it
+ // and recomputes the folded histories.
+ squash(tid, taken, bp_history);
+ return;
+ }
+
+ int nrand = random_mt.random<int>(0,3);
+ if (bi->condBranch) {
+ DPRINTF(Tage, "Updating tables for branch:%lx; taken?:%d\n",
+ branch_pc, taken);
+ condBranchUpdate(branch_pc, taken, bi, nrand);
+ }
+ if (!squashed) {
+ delete bi;
+ }
+}
+
+void
+TAGE::condBranchUpdate(Addr branch_pc, bool taken,
+ TageBranchInfo* bi, int nrand)
+{
+ // TAGE UPDATE
+ // try to allocate a new entries only if prediction was wrong
+ bool longest_match_pred = false;
+ bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
+ if (bi->hitBank > 0) {
+ // Manage the selection between longest matching and alternate
+ // matching for "pseudo"-newly allocated longest matching entry
+ longest_match_pred = bi->longestMatchPred;
+ bool PseudoNewAlloc = bi->pseudoNewAlloc;
+ // an entry is considered as newly allocated if its prediction
+ // counter is weak
+ if (PseudoNewAlloc) {
+ if (longest_match_pred == taken) {
+ alloc = false;
+ }
+ // if it was delivering the correct prediction, no need to
+ // allocate new entry even if the overall prediction was false
+ if (longest_match_pred != bi->altTaken) {
+ ctrUpdate(useAltPredForNewlyAllocated,
+ bi->altTaken == taken, useAltOnNaBits);
+ }
+ }
+ }
+
+ if (alloc) {
+ // is there some "unuseful" entry to allocate
+ uint8_t min = 1;
+ for (int i = nHistoryTables; i > bi->hitBank; i--) {
+ if (gtable[i][bi->tableIndices[i]].u < min) {
+ min = gtable[i][bi->tableIndices[i]].u;
+ }
+ }
+
+ // we allocate an entry with a longer history
+ // to avoid ping-pong, we do not choose systematically the next
+ // entry, but among the 3 next entries
+ int Y = nrand &
+ ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
+ int X = bi->hitBank + 1;
+ if (Y & 1) {
+ X++;
+ if (Y & 2)
+ X++;
+ }
+ // No entry available, forces one to be available
+ if (min > 0) {
+ gtable[X][bi->tableIndices[X]].u = 0;
+ }
+
+
+ //Allocate only one entry
+ for (int i = X; i <= nHistoryTables; i++) {
+ if ((gtable[i][bi->tableIndices[i]].u == 0)) {
+ gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
+ gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
+ break;
+ }
+ }
+ }
+ //periodic reset of u: reset is not complete but bit by bit
+ tCounter++;
+ if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) {
+ // reset least significant bit
+ // most significant bit becomes least significant bit
+ for (int i = 1; i <= nHistoryTables; i++) {
+ for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) {
+ gtable[i][j].u = gtable[i][j].u >> 1;
+ }
+ }
+ }
+
+ if (bi->hitBank > 0) {
+ DPRINTF(Tage, "Updating tag table entry (%d,%d) for branch %lx\n",
+ bi->hitBank, bi->hitBankIndex, branch_pc);
+ ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
+ tagTableCounterBits);
+ // if the provider entry is not certified to be useful also update
+ // the alternate prediction
+ if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
+ if (bi->altBank > 0) {
+ ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
+ tagTableCounterBits);
+ DPRINTF(Tage, "Updating tag table entry (%d,%d) for"
+ " branch %lx\n", bi->hitBank, bi->hitBankIndex,
+ branch_pc);
+ }
+ if (bi->altBank == 0) {
+ baseUpdate(branch_pc, taken, bi);
+ }
+ }
+
+ // update the u counter
+ if (bi->tagePred != bi->altTaken) {
+ unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u,
+ bi->tagePred == taken, tagTableUBits);
+ }
+ } else {
+ baseUpdate(branch_pc, taken, bi);
+ }
+}
+
+void
+TAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
+{
+ TageBranchInfo* bi = (TageBranchInfo*)(b);
+ ThreadHistory& tHist = threadHistory[tid];
+ // UPDATE HISTORIES
+ bool pathbit = ((branch_pc >> instShiftAmt) & 1);
+ //on a squash, return pointers to this and recompute indices.
+ //update user history
+ updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
+ tHist.pathHist = (tHist.pathHist << 1) + pathbit;
+ tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1));
+
+ bi->ptGhist = tHist.ptGhist;
+ bi->pathHist = tHist.pathHist;
+ //prepare next index and tag computations for user branchs
+ for (int i = 1; i <= nHistoryTables; i++)
+ {
+ bi->ci[i] = tHist.computeIndices[i].comp;
+ bi->ct0[i] = tHist.computeTags[0][i].comp;
+ bi->ct1[i] = tHist.computeTags[1][i].comp;
+ tHist.computeIndices[i].update(tHist.gHist);
+ tHist.computeTags[0][i].update(tHist.gHist);
+ tHist.computeTags[1][i].update(tHist.gHist);
+ }
+ DPRINTF(Tage, "Updating global histories with branch:%lx; taken?:%d, "
+ "path Hist: %x; pointer:%d\n", branch_pc, taken,
tHist.pathHist,
+ tHist.ptGhist);
+}
+
+void
+TAGE::squash(ThreadID tid, bool taken, void *bp_history)
+{
+ TageBranchInfo* bi = (TageBranchInfo*)(bp_history);
+ ThreadHistory& tHist = threadHistory[tid];
+ DPRINTF(Tage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
+ "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
+ tHist.pathHist = bi->pathHist;
+ tHist.ptGhist = bi->ptGhist;
+ tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
+ tHist.gHist[0] = (taken ? 1 : 0);
+ for (int i = 1; i <= nHistoryTables; i++) {
+ tHist.computeIndices[i].comp = bi->ci[i];
+ tHist.computeTags[0][i].comp = bi->ct0[i];
+ tHist.computeTags[1][i].comp = bi->ct1[i];
+ tHist.computeIndices[i].update(tHist.gHist);
+ tHist.computeTags[0][i].update(tHist.gHist);
+ tHist.computeTags[1][i].update(tHist.gHist);
+ }
+}
+
+void
+TAGE::squash(ThreadID tid, void *bp_history)
+{
+ TageBranchInfo* bi = (TageBranchInfo*)(bp_history);
+ DPRINTF(Tage, "Deleting branch info: %lx\n", bi->branchPC);
+ delete bi;
+}
+
+bool
+TAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
+{
+ bool retval = predict(tid, branch_pc, true, bp_history);
+
+ DPRINTF(Tage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
+ updateHistories(tid, branch_pc, retval, bp_history);
+ assert(threadHistory[tid].gHist ==
+ &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
+
+ return retval;
+}
+
+void
+TAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
+{
+ TageBranchInfo* bi = (TageBranchInfo*) bp_history;
+ ThreadHistory& tHist = threadHistory[tid];
+ DPRINTF(Tage, "BTB miss resets prediction: %lx\n", branch_pc);
+ assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
+ tHist.gHist[0] = 0;
+ for (int i = 1; i <= nHistoryTables; i++) {
+ tHist.computeIndices[i].comp = bi->ci[i];
+ tHist.computeTags[0][i].comp = bi->ct0[i];
+ tHist.computeTags[1][i].comp = bi->ct1[i];
+ tHist.computeIndices[i].update(tHist.gHist);
+ tHist.computeTags[0][i].update(tHist.gHist);
+ tHist.computeTags[1][i].update(tHist.gHist);
+ }
+}
+
+void
+TAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
+{
+ DPRINTF(Tage, "UnConditionalBranch: %lx\n", br_pc);
+ predict(tid, br_pc, false, bp_history);
+ updateHistories(tid, br_pc, true, bp_history);
+ assert(threadHistory[tid].gHist ==
+ &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
+}
+
+TAGE*
+TAGEParams::create()
+{
+ return new TAGE(this);
+}
diff --git a/src/cpu/pred/tage.hh b/src/cpu/pred/tage.hh
new file mode 100644
index 0000000..c3d9a63
--- /dev/null
+++ b/src/cpu/pred/tage.hh
@@ -0,0 +1,374 @@
+/*
+ * Copyright (c) 2014 The University of Wisconsin
+ *
+ * Copyright (c) 2006 INRIA (Institut National de Recherche en
+ * Informatique et en Automatique / French National Research Institute
+ * for Computer Science and Applied Mathematics)
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
+ * from André Seznec's code.
+ */
+
+/* @file
+ * Implementation of a TAGE branch predictor. TAGE is a global-history
based
+ * branch predictor. It features a PC-indexed bimodal predictor and N
+ * partially tagged tables, indexed with a hash of the PC and the global
+ * branch history. The different lengths of global branch history used to
+ * index the partially tagged tables grow geometrically. A small path
history
+ * is also used in the hash.
+ *
+ * All TAGE tables are accessed in parallel, and the one using the longest
+ * history that matches provides the prediction (some exceptions apply).
+ * Entries are allocated in components using a longer history than the
+ * one that predicted when the prediction is incorrect.
+ */
+
+#ifndef __CPU_PRED_TAGE
+#define __CPU_PRED_TAGE
+
+#include <vector>
+
+#include "base/types.hh"
+#include "cpu/pred/bpred_unit.hh"
+#include "params/TAGE.hh"
+
+class TAGE: public BPredUnit
+{
+ public:
+ TAGE(const TAGEParams *params);
+
+ // Base class methods.
+ void uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
override;
+ bool lookup(ThreadID tid, Addr branch_addr, void* &bp_history)
override;
+ void btbUpdate(ThreadID tid, Addr branch_addr, void* &bp_history)
override;
+ void update(ThreadID tid, Addr branch_addr, bool taken, void
*bp_history,
+ bool squashed) override;
+ virtual void squash(ThreadID tid, void *bp_history) override;
+ unsigned getGHR(ThreadID tid, void *bp_history) const override;
+
+ protected:
+ // Prediction Structures
+
+ // Tage Entry
+ struct TageEntry
+ {
+ int8_t ctr;
+ uint16_t tag;
+ uint8_t u;
+ TageEntry() : ctr(0), tag(0), u(0) { }
+ };
+
+ // Folded History Table - compressed history
+ // to mix with instruction PC to index partially
+ // tagged tables.
+ struct FoldedHistory
+ {
+ unsigned comp;
+ int compLength;
+ int origLength;
+ int outpoint;
+
+ void init(int original_length, int compressed_length)
+ {
+ comp = 0;
+ origLength = original_length;
+ compLength = compressed_length;
+ outpoint = original_length % compressed_length;
+ }
+
+ void update(uint8_t * h)
+ {
+ comp = (comp << 1) | h[0];
+ comp ^= h[origLength] << outpoint;
+ comp ^= (comp >> compLength);
+ comp &= (ULL(1) << compLength) - 1;
+ }
+ };
+
+ // Primary branch history entry
+ struct TageBranchInfo
+ {
+ int pathHist;
+ int ptGhist;
+ int hitBank;
+ int hitBankIndex;
+ int altBank;
+ int altBankIndex;
+ int bimodalIndex;
+
+ bool tagePred;
+ bool altTaken;
+ bool condBranch;
+ bool longestMatchPred;
+ bool pseudoNewAlloc;
+ Addr branchPC;
+
+ // Pointer to dynamically allocated storage
+ // to save table indices and folded histories.
+ // To do one call to new instead of five.
+ int *storage;
+
+ // Pointers to actual saved array within the dynamically
+ // allocated storage.
+ int *tableIndices;
+ int *tableTags;
+ int *ci;
+ int *ct0;
+ int *ct1;
+
+ TageBranchInfo(int sz)
+ : pathHist(0), ptGhist(0),
+ hitBank(0), hitBankIndex(0),
+ altBank(0), altBankIndex(0),
+ bimodalIndex(0),
+ tagePred(false), altTaken(false),
+ condBranch(false), longestMatchPred(false),
+ pseudoNewAlloc(false), branchPC(0)
+ {
+ storage = new int [sz * 5];
+ tableIndices = storage;
+ tableTags = storage + sz;
+ ci = tableTags + sz;
+ ct0 = ci + sz;
+ ct1 = ct0 + sz;
+ }
+
+ virtual ~TageBranchInfo()
+ {
+ delete[] storage;
+ }
+ };
+
+ /**
+ * Computes the index used to access the
+ * bimodal table.
+ * @param pc_in The unshifted branch PC.
+ */
+ int bindex(Addr pc_in) const;
+
+ /**
+ * Computes the index used to access a
+ * partially tagged table.
+ * @param tid The thread ID used to select the
+ * global histories to use.
+ * @param pc The unshifted branch PC.
+ * @param bank The partially tagged table to access.
+ */
+ inline int gindex(ThreadID tid, Addr pc, int bank) const;
+
+ /**
+ * Utility function to shuffle the path history
+ * depending on which tagged table we are accessing.
+ * @param phist The path history.
+ * @param size Number of path history bits to use.
+ * @param bank The partially tagged table to access.
+ */
+ int F(int phist, int size, int bank) const;
+
+ /**
+ * Computes the partial tag of a tagged table.
+ * @param tid the thread ID used to select the
+ * global histories to use.
+ * @param pc The unshifted branch PC.
+ * @param bank The partially tagged table to access.
+ */
+ inline uint16_t gtag(ThreadID tid, Addr pc, int bank) const;
+
+ /**
+ * Updates a direction counter based on the actual
+ * branch outcome.
+ * @param ctr Reference to counter to update.
+ * @param taken Actual branch outcome.
+ * @param nbits Counter width.
+ */
+ void ctrUpdate(int8_t & ctr, bool taken, int nbits);
+
+ /**
+ * Updates an unsigned counter based on up/down parameter
+ * @param ctr Reference to counter to update.
+ * @param up Boolean indicating if the counter is
incremented/decremented
+ * If true it is incremented, if false it is decremented
+ * @param nbits Counter width.
+ */
+ void unsignedCtrUpdate(uint8_t & ctr, bool taken, unsigned nbits);
+
+ /**
+ * Get a branch prediction from the bimodal
+ * predictor.
+ * @param pc The unshifted branch PC.
+ * @param bi Pointer to information on the
+ * prediction.
+ */
+ bool getBimodePred(Addr pc, TageBranchInfo* bi) const;
+
+ /**
+ * Updates the bimodal predictor.
+ * @param pc The unshifted branch PC.
+ * @param taken The actual branch outcome.
+ * @param bi Pointer to information on the prediction
+ * recorded at prediction time.
+ */
+ void baseUpdate(Addr pc, bool taken, TageBranchInfo* bi);
+
+ /**
+ * (Speculatively) updates the global branch history.
+ * @param h Reference to pointer to global branch history.
+ * @param dir (Predicted) outcome to update the histories
+ * with.
+ * @param tab
+ * @param PT Reference to path history.
+ */
+ void updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &PT);
+
+ /**
+ * Get a branch prediction from TAGE. *NOT* an override of
+ * BpredUnit::predict().
+ * @param tid The thread ID to select the global
+ * histories to use.
+ * @param branch_pc The unshifted branch PC.
+ * @param cond_branch True if the branch is conditional.
+ * @param b Reference to wrapping pointer to allow storing
+ * derived class prediction information in the base class.
+ */
+ virtual bool predict(
+ ThreadID tid, Addr branch_pc, bool cond_branch, void* &b);
+
+ /**
+ * Update TAGE. Called at execute to repair histories on a
misprediction
+ * and at commit to update the tables.
+ * @param tid The thread ID to select the global
+ * histories to use.
+ * @param branch_pc The unshifted branch PC.
+ * @param taken Actual branch outcome.
+ * @param bi Pointer to information on the prediction
+ * recorded at prediction time.
+ */
+ void update(ThreadID tid, Addr branch_pc, bool taken, TageBranchInfo*
bi);
+
+ /**
+ * (Speculatively) updates global histories (path and direction).
+ * Also recomputes compressed (folded) histories based on the
+ * branch direction.
+ * @param tid The thread ID to select the histories
+ * to update.
+ * @param branch_pc The unshifted branch PC.
+ * @param taken (Predicted) branch direction.
+ * @param b Wrapping pointer to TageBranchInfo (to allow
+ * storing derived class prediction information in the
+ * base class).
+ */
+ void updateHistories(ThreadID tid, Addr branch_pc, bool taken, void*
b);
+
+ /**
+ * Restores speculatively updated path and direction histories.
+ * Also recomputes compressed (folded) histories based on the
+ * correct branch outcome.
+ * This version of squash() is called once on a branch misprediction.
+ * @param tid The Thread ID to select the histories to rollback.
+ * @param taken The correct branch outcome.
+ * @param bp_history Wrapping pointer to TageBranchInfo (to allow
+ * storing derived class prediction information in the
+ * base class).
+ * @post bp_history points to valid memory.
+ */
+ virtual void squash(ThreadID tid, bool taken, void *bp_history);
+
+ /**
+ * Update TAGE for conditional branches.
+ * @param branch_pc The unshifted branch PC.
+ * @param taken Actual branch outcome.
+ * @param bi Pointer to information on the prediction
+ * recorded at prediction time.
+ * @nrand Random int number from 0 to 3
+ */
+ virtual void condBranchUpdate(
+ Addr branch_pc, bool taken, TageBranchInfo* bi, int nrand);
+
+ /**
+ * TAGE prediction called from TAGE::predict
+ * @param tid The thread ID to select the global
+ * histories to use.
+ * @param branch_pc The unshifted branch PC.
+ * @param cond_branch True if the branch is conditional.
+ * @param bi Pointer to the TageBranchInfo
+ */
+ bool tagePredict(
+ ThreadID tid, Addr branch_pc, bool cond_branch, TageBranchInfo*
bi);
+
+ const unsigned logRatioBiModalHystEntries;
+ const unsigned nHistoryTables;
+ const unsigned tagTableCounterBits;
+ const unsigned tagTableUBits;
+ const unsigned histBufferSize;
+ const unsigned minHist;
+ const unsigned maxHist;
+ const unsigned pathHistBits;
+
+ const std::vector<unsigned> tagTableTagWidths;
+ const std::vector<int> logTagTableSizes;
+
+ std::vector<bool> btablePrediction;
+ std::vector<bool> btableHysteresis;
+ TageEntry **gtable;
+
+ // Keep per-thread histories to
+ // support SMT.
+ struct ThreadHistory {
+ // Speculative path history
+ // (LSB of branch address)
+ int pathHist;
+
+ // Speculative branch direction
+ // history (circular buffer)
+ // @TODO Convert to std::vector<bool>
+ uint8_t *globalHistory;
+
+ // Pointer to most recent branch outcome
+ uint8_t* gHist;
+
+ // Index to most recent branch outcome
+ int ptGhist;
+
+ // Speculative folded histories.
+ FoldedHistory *computeIndices;
+ FoldedHistory *computeTags[2];
+ };
+
+ std::vector<ThreadHistory> threadHistory;
+
+ int *histLengths;
+ int *tableIndices;
+ int *tableTags;
+
+ int8_t useAltPredForNewlyAllocated;
+ uint64_t tCounter;
+ uint64_t logUResetPeriod;
+ unsigned useAltOnNaBits;
+};
+
+#endif // __CPU_PRED_TAGE

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/14435
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I8323adbfd5c9a45db23cfff234218280e639f9ed
Gerrit-Change-Number: 14435
Gerrit-PatchSet: 1
Gerrit-Owner: Pau Cabre <***@metempsy.com>
Gerrit-MessageType: newchange
Pau Cabre (Gerrit)
2018-11-22 13:24:28 UTC
Permalink
Hello Gabe Black, Jason Lowe-Power, Sudhanshu Jha, Andreas Sandberg,

I'd like you to reexamine a change. Please visit

https://gem5-review.googlesource.com/c/public/gem5/+/14435

to look at the new patch set (#2).

Change subject: cpu: split LTAGE implementation into a base TAGE and a
derived LTAGE
......................................................................

cpu: split LTAGE implementation into a base TAGE and a derived LTAGE

The new derived LTAGE is equivalent to the original LTAGE implementation
The default values of the TAGE branch predictor match the settings of the
8C-TAGE configuration described in https://www.jilp.org/vol8/v8paper1.pdf

Change-Id: I8323adbfd5c9a45db23cfff234218280e639f9ed
Signed-off-by: Pau Cabre <***@metempsy.com>
---
M src/cpu/pred/BranchPredictor.py
M src/cpu/pred/SConscript
M src/cpu/pred/ltage.cc
M src/cpu/pred/ltage.hh
A src/cpu/pred/tage.cc
A src/cpu/pred/tage.hh
6 files changed, 1,110 insertions(+), 828 deletions(-)
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/14435
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I8323adbfd5c9a45db23cfff234218280e639f9ed
Gerrit-Change-Number: 14435
Gerrit-PatchSet: 2
Gerrit-Owner: Pau Cabre <***@metempsy.com>
Gerrit-Reviewer: Andreas Sandberg <***@arm.com>
Gerrit-Reviewer: Gabe Black <***@google.com>
Gerrit-Reviewer: Jason Lowe-Power <***@lowepower.com>
Gerrit-Reviewer: Sudhanshu Jha <***@arm.com>
Gerrit-MessageType: newpatchset
Pau Cabre (Gerrit)
2018-11-28 17:25:52 UTC
Permalink
Pau Cabre has submitted this change and it was merged. (
https://gem5-review.googlesource.com/c/public/gem5/+/14435 )

Change subject: cpu: split LTAGE implementation into a base TAGE and a
derived LTAGE
......................................................................

cpu: split LTAGE implementation into a base TAGE and a derived LTAGE

The new derived LTAGE is equivalent to the original LTAGE implementation
The default values of the TAGE branch predictor match the settings of the
8C-TAGE configuration described in https://www.jilp.org/vol8/v8paper1.pdf

Change-Id: I8323adbfd5c9a45db23cfff234218280e639f9ed
Signed-off-by: Pau Cabre <***@metempsy.com>
Reviewed-on: https://gem5-review.googlesource.com/c/14435
Reviewed-by: Sudhanshu Jha <***@arm.com>
Reviewed-by: Jason Lowe-Power <***@lowepower.com>
Maintainer: Jason Lowe-Power <***@lowepower.com>
---
M src/cpu/pred/BranchPredictor.py
M src/cpu/pred/SConscript
M src/cpu/pred/ltage.cc
M src/cpu/pred/ltage.hh
A src/cpu/pred/tage.cc
A src/cpu/pred/tage.hh
6 files changed, 1,110 insertions(+), 828 deletions(-)

Approvals:
Jason Lowe-Power: Looks good to me, approved; Looks good to me, approved
Sudhanshu Jha: Looks good to me, but someone else must approve



diff --git a/src/cpu/pred/BranchPredictor.py
b/src/cpu/pred/BranchPredictor.py
index 9d83abb..2c622cd 100644
--- a/src/cpu/pred/BranchPredictor.py
+++ b/src/cpu/pred/BranchPredictor.py
@@ -87,34 +87,55 @@
choicePredictorSize = Param.Unsigned(8192, "Size of choice predictor")
choiceCtrBits = Param.Unsigned(2, "Bits of choice counters")

-class LTAGE(BranchPredictor):
+# TAGE branch predictor as described in
https://www.jilp.org/vol8/v8paper1.pdf
+# The default sizes below are for the 8C-TAGE configuration (63.5 Kbits)
+class TAGE(BranchPredictor):
+ type = 'TAGE'
+ cxx_class = 'TAGE'
+ cxx_header = "cpu/pred/tage.hh"
+
+ nHistoryTables = Param.Unsigned(7, "Number of history tables")
+ minHist = Param.Unsigned(5, "Minimum history size of LTAGE")
+ maxHist = Param.Unsigned(130, "Maximum history size of LTAGE")
+
+ tagTableTagWidths = VectorParam.Unsigned(
+ [0, 9, 9, 10, 10, 11, 11, 12], "Tag size in TAGE tag tables")
+ logTagTableSizes = VectorParam.Int(
+ [13, 9, 9, 9, 9, 9, 9, 9], "Log2 of TAGE table sizes")
+ logRatioBiModalHystEntries = Param.Unsigned(2,
+ "Log num of prediction entries for a shared hysteresis bit " \
+ "for the Bimodal")
+
+ tagTableCounterBits = Param.Unsigned(3, "Number of tag table counter
bits")
+ tagTableUBits = Param.Unsigned(2, "Number of tag table u bits")
+
+ histBufferSize = Param.Unsigned(2097152,
+ "A large number to track all branch histories(2MEntries
default)")
+
+ pathHistBits = Param.Unsigned(16, "Path history size")
+ logUResetPeriod = Param.Unsigned(18,
+ "Log period in number of branches to reset TAGE useful counters")
+ useAltOnNaBits = Param.Unsigned(4, "Size of the USE_ALT_ON_NA counter")
+
+
+# LTAGE branch predictor as described in
+# https://www.irisa.fr/caps/people/seznec/L-TAGE.pdf
+# It is basically a TAGE predictor plus a loop predictor
+# The differnt TAGE sizes are updated according to the paper values (256
Kbits)
+class LTAGE(TAGE):
type = 'LTAGE'
cxx_class = 'LTAGE'
cxx_header = "cpu/pred/ltage.hh"

- logRatioBiModalHystEntries = Param.Unsigned(2,
- "Log num of prediction entries for a shared hysteresis bit " \
- "for the Bimodal")
- logSizeLoopPred = Param.Unsigned(8, "Log size of the loop predictor")
- nHistoryTables = Param.Unsigned(12, "Number of history tables")
- tagTableCounterBits = Param.Unsigned(3, "Number of tag table counter
bits")
- tagTableUBits = Param.Unsigned(2, "Number of tag table u bits")
- histBufferSize = Param.Unsigned(2097152,
- "A large number to track all branch histories(2MEntries
default)")
- minHist = Param.Unsigned(4, "Minimum history size of LTAGE")
- maxHist = Param.Unsigned(640, "Maximum history size of LTAGE")
- pathHistBits = Param.Unsigned(16, "Path history size")
- tagTableTagWidths = VectorParam.Unsigned(
- [0, 7, 7, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15],
- "Tag size in TAGE tag tables")
- logTagTableSizes = VectorParam.Int(
- [14, 10, 10, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9],
- "Log2 of TAGE table sizes")
- logUResetPeriod = Param.Unsigned(19,
- "Log period in number of branches to reset TAGE useful counters")
- useAltOnNaBits = Param.Unsigned(4, "Size of the USE_ALT_ON_NA counter")
- withLoopBits = Param.Unsigned(7, "Size of the WITHLOOP counter")
+ nHistoryTables = 12
+ minHist = 4
+ maxHist = 640
+ tagTableTagWidths = [0, 7, 7, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15]
+ logTagTableSizes = [14, 10, 10, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9]
+ logUResetPeriod = 19

+ logSizeLoopPred = Param.Unsigned(8, "Log size of the loop predictor")
+ withLoopBits = Param.Unsigned(7, "Size of the WITHLOOP counter")
loopTableAgeBits = Param.Unsigned(8, "Number of age bits per loop
entry")
loopTableConfidenceBits = Param.Unsigned(2,
"Number of confidence bits per loop entry")
diff --git a/src/cpu/pred/SConscript b/src/cpu/pred/SConscript
index 1cdf7bb..b0e6309 100644
--- a/src/cpu/pred/SConscript
+++ b/src/cpu/pred/SConscript
@@ -43,7 +43,9 @@
Source('ras.cc')
Source('tournament.cc')
Source ('bi_mode.cc')
+Source('tage.cc')
Source('ltage.cc')
DebugFlag('FreeList')
DebugFlag('Branch')
+DebugFlag('Tage')
DebugFlag('LTage')
diff --git a/src/cpu/pred/ltage.cc b/src/cpu/pred/ltage.cc
index d6cc087..3be0193 100644
--- a/src/cpu/pred/ltage.cc
+++ b/src/cpu/pred/ltage.cc
@@ -48,16 +48,8 @@
#include "debug/LTage.hh"

LTAGE::LTAGE(const LTAGEParams *params)
- : BPredUnit(params),
- logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
+ : TAGE(params),
logSizeLoopPred(params->logSizeLoopPred),
- nHistoryTables(params->nHistoryTables),
- tagTableCounterBits(params->tagTableCounterBits),
- tagTableUBits(params->tagTableUBits),
- histBufferSize(params->histBufferSize),
- minHist(params->minHist),
- maxHist(params->maxHist),
- pathHistBits(params->pathHistBits),
loopTableAgeBits(params->loopTableAgeBits),
loopTableConfidenceBits(params->loopTableConfidenceBits),
loopTableTagBits(params->loopTableTagBits),
@@ -66,18 +58,9 @@
confidenceThreshold((1 << loopTableConfidenceBits) - 1),
loopTagMask((1 << loopTableTagBits) - 1),
loopNumIterMask((1 << loopTableIterBits) - 1),
- tagTableTagWidths(params->tagTableTagWidths),
- logTagTableSizes(params->logTagTableSizes),
- threadHistory(params->numThreads),
- logUResetPeriod(params->logUResetPeriod),
- useAltOnNaBits(params->useAltOnNaBits),
+ loopUseCounter(0),
withLoopBits(params->withLoopBits)
{
- // Current method for periodically resetting the u counter bits only
- // works for 1 or 2 bits
- // Also make sure that it is not 0
- assert(tagTableUBits <= 2 && (tagTableUBits > 0));
-
// we use uint16_t type for these vales, so they cannot be more than
// 16 bits
assert(loopTableTagBits <= 16);
@@ -85,81 +68,7 @@

assert(logSizeLoopPred >= logLoopTableAssoc);

- // we use int type for the path history, so it cannot be more than
- // its size
- assert(pathHistBits <= (sizeof(int)*8));
-
- // initialize the counter to half of the period
- assert(logUResetPeriod != 0);
- tCounter = ULL(1) << (logUResetPeriod - 1);
-
- assert(params->histBufferSize > params->maxHist * 2);
- useAltPredForNewlyAllocated = 0;
-
- for (auto& history : threadHistory) {
- history.pathHist = 0;
- history.globalHistory = new uint8_t[histBufferSize];
- history.gHist = history.globalHistory;
- memset(history.gHist, 0, histBufferSize);
- history.ptGhist = 0;
- }
-
- histLengths = new int [nHistoryTables+1];
- histLengths[1] = minHist;
- histLengths[nHistoryTables] = maxHist;
-
- for (int i = 2; i <= nHistoryTables; i++) {
- histLengths[i] = (int) (((double) minHist *
- pow ((double) (maxHist) / (double) minHist,
- (double) (i - 1) / (double) ((nHistoryTables- 1))))
- + 0.5);
- }
-
- assert(tagTableTagWidths.size() == (nHistoryTables+1));
- assert(logTagTableSizes.size() == (nHistoryTables+1));
-
- // First entry is for the Bimodal table and it is untagged in this
- // implementation
- assert(tagTableTagWidths[0] == 0);
-
- for (auto& history : threadHistory) {
- history.computeIndices = new FoldedHistory[nHistoryTables+1];
- history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
- history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
-
- for (int i = 1; i <= nHistoryTables; i++) {
- history.computeIndices[i].init(
- histLengths[i], (logTagTableSizes[i]));
- history.computeTags[0][i].init(
- history.computeIndices[i].origLength,
tagTableTagWidths[i]);
- history.computeTags[1][i].init(
- history.computeIndices[i].origLength,
tagTableTagWidths[i]-1);
- DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
- histLengths[i], logTagTableSizes[i],
tagTableTagWidths[i]);
- }
- }
-
- const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0];
- btablePrediction.resize(bimodalTableSize, false);
- btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
- true);
-
ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
- gtable = new TageEntry*[nHistoryTables + 1];
- for (int i = 1; i <= nHistoryTables; i++) {
- gtable[i] = new TageEntry[1<<(logTagTableSizes[i])];
- }
-
- tableIndices = new int [nHistoryTables+1];
- tableTags = new int [nHistoryTables+1];
-
- loopUseCounter = 0;
-}
-
-int
-LTAGE::bindex(Addr pc_in) const
-{
- return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) -
1));
}

int
@@ -176,113 +85,9 @@
return (((pc_in >> instShiftAmt) & mask) << logLoopTableAssoc);
}

-int
-LTAGE::F(int A, int size, int bank) const
-{
- int A1, A2;
-
- A = A & ((ULL(1) << size) - 1);
- A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1));
- A2 = (A >> logTagTableSizes[bank]);
- A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
- + (A2 >> (logTagTableSizes[bank] - bank));
- A = A1 ^ A2;
- A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
- + (A >> (logTagTableSizes[bank] - bank));
- return (A);
-}
-
-
-// gindex computes a full hash of pc, ghist and pathHist
-int
-LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
-{
- int index;
- int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits :
- histLengths[bank];
- const Addr shiftedPc = pc >> instShiftAmt;
- index =
- shiftedPc ^
- (shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^
- threadHistory[tid].computeIndices[bank].comp ^
- F(threadHistory[tid].pathHist, hlen, bank);
-
- return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1));
-}
-
-
-// Tag computation
-uint16_t
-LTAGE::gtag(ThreadID tid, Addr pc, int bank) const
-{
- int tag = (pc >> instShiftAmt) ^
- threadHistory[tid].computeTags[0][bank].comp ^
- (threadHistory[tid].computeTags[1][bank].comp << 1);
-
- return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1));
-}
-
-
-// Up-down saturating counter
-void
-LTAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
-{
- assert(nbits <= sizeof(int8_t) << 3);
- if (taken) {
- if (ctr < ((1 << (nbits - 1)) - 1))
- ctr++;
- } else {
- if (ctr > -(1 << (nbits - 1)))
- ctr--;
- }
-}
-
-// Up-down unsigned saturating counter
-void
-LTAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits)
-{
- assert(nbits <= sizeof(uint8_t) << 3);
- if (up) {
- if (ctr < ((1 << nbits) - 1))
- ctr++;
- } else {
- if (ctr)
- ctr--;
- }
-}
-
-// Bimodal prediction
-bool
-LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
-{
- return btablePrediction[bi->bimodalIndex];
-}
-
-
-// Update the bimodal predictor: a hysteresis bit is shared among N
prediction
-// bits (N = 2 ^ logRatioBiModalHystEntries)
-void
-LTAGE::baseUpdate(Addr pc, bool taken, BranchInfo* bi)
-{
- int inter = (btablePrediction[bi->bimodalIndex] << 1)
- + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries];
- if (taken) {
- if (inter < 3)
- inter++;
- } else if (inter > 0) {
- inter--;
- }
- const bool pred = inter >> 1;
- const bool hyst = inter & 1;
- btablePrediction[bi->bimodalIndex] = pred;
- btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] =
hyst;
- DPRINTF(LTage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred,
hyst);
-}
-
-
//loop prediction: only used if high confidence
bool
-LTAGE::getLoop(Addr pc, BranchInfo* bi) const
+LTAGE::getLoop(Addr pc, LTageBranchInfo* bi) const
{
bi->loopHit = -1;
bi->loopPredValid = false;
@@ -308,7 +113,7 @@
}

void
-LTAGE::specLoopUpdate(Addr pc, bool taken, BranchInfo* bi)
+LTAGE::specLoopUpdate(Addr pc, bool taken, LTageBranchInfo* bi)
{
if (bi->loopHit>=0) {
int index = lindex(pc);
@@ -322,7 +127,7 @@
}

void
-LTAGE::loopUpdate(Addr pc, bool taken, BranchInfo* bi)
+LTAGE::loopUpdate(Addr pc, bool taken, LTageBranchInfo* bi)
{
int idx = bi->loopIndex + bi->loopHit;
if (bi->loopHit >= 0) {
@@ -409,319 +214,57 @@

}

-// shifting the global history: we manage the history in a big table in
order
-// to reduce simulation time
-void
-LTAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
-{
- if (pt == 0) {
- DPRINTF(LTage, "Rolling over the histories\n");
- // Copy beginning of globalHistoryBuffer to end, such that
- // the last maxHist outcomes are still reachable
- // through pt[0 .. maxHist - 1].
- for (int i = 0; i < maxHist; i++)
- tab[histBufferSize - maxHist + i] = tab[i];
- pt = histBufferSize - maxHist;
- h = &tab[pt];
- }
- pt--;
- h--;
- h[0] = (dir) ? 1 : 0;
-}
-
-// Get GHR for hashing indirect predictor
-// Build history backwards from pointer in
-// bp_history.
-unsigned
-LTAGE::getGHR(ThreadID tid, void *bp_history) const
-{
- BranchInfo* bi = static_cast<BranchInfo*>(bp_history);
- unsigned val = 0;
- for (unsigned i = 0; i < 32; i++) {
- // Make sure we don't go out of bounds
- int gh_offset = bi->ptGhist + i;
- assert(&(threadHistory[tid].globalHistory[gh_offset]) <
- threadHistory[tid].globalHistory + histBufferSize);
- val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
- }
-
- return val;
-}
-
//prediction
bool
LTAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
{
- BranchInfo *bi = new BranchInfo(nHistoryTables+1);
+ LTageBranchInfo *bi = new LTageBranchInfo(nHistoryTables+1);
b = (void*)(bi);
- Addr pc = branch_pc;
- bool pred_taken = true;
- bi->loopHit = -1;
+
+ bool pred_taken = tagePredict(tid, branch_pc, cond_branch, bi);

if (cond_branch) {
- // TAGE prediction
+ bi->loopPred = getLoop(branch_pc, bi); // loop prediction

- // computes the table addresses and the partial tags
- for (int i = 1; i <= nHistoryTables; i++) {
- tableIndices[i] = gindex(tid, pc, i);
- bi->tableIndices[i] = tableIndices[i];
- tableTags[i] = gtag(tid, pc, i);
- bi->tableTags[i] = tableTags[i];
+ if ((loopUseCounter >= 0) && bi->loopPredValid) {
+ pred_taken = bi->loopPred;
}
-
- bi->bimodalIndex = bindex(pc);
-
- bi->hitBank = 0;
- bi->altBank = 0;
- //Look for the bank with longest matching history
- for (int i = nHistoryTables; i > 0; i--) {
- if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
- bi->hitBank = i;
- bi->hitBankIndex = tableIndices[bi->hitBank];
- break;
- }
- }
- //Look for the alternate bank
- for (int i = bi->hitBank - 1; i > 0; i--) {
- if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
- bi->altBank = i;
- bi->altBankIndex = tableIndices[bi->altBank];
- break;
- }
- }
- //computes the prediction and the alternate prediction
- if (bi->hitBank > 0) {
- if (bi->altBank > 0) {
- bi->altTaken =
- gtable[bi->altBank][tableIndices[bi->altBank]].ctr >=
0;
- }else {
- bi->altTaken = getBimodePred(pc, bi);
- }
-
- bi->longestMatchPred =
- gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
- bi->pseudoNewAlloc =
- abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <=
1;
-
- //if the entry is recognized as a newly allocated entry and
- //useAltPredForNewlyAllocated is positive use the alternate
- //prediction
- if ((useAltPredForNewlyAllocated < 0)
- || abs(2 *
- gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1)
1)
- bi->tagePred = bi->longestMatchPred;
- else
- bi->tagePred = bi->altTaken;
- } else {
- bi->altTaken = getBimodePred(pc, bi);
- bi->tagePred = bi->altTaken;
- bi->longestMatchPred = bi->altTaken;
- }
- //end TAGE prediction
-
- bi->loopPred = getLoop(pc, bi); // loop prediction
-
- pred_taken = (((loopUseCounter >= 0) && bi->loopPredValid)) ?
- (bi->loopPred): (bi->tagePred);
DPRINTF(LTage, "Predict for %lx: taken?:%d, loopTaken?:%d, "
"loopValid?:%d, loopUseCounter:%d, tagePred:%d,
altPred:%d\n",
branch_pc, pred_taken, bi->loopPred, bi->loopPredValid,
loopUseCounter, bi->tagePred, bi->altTaken);
}
- bi->branchPC = branch_pc;
- bi->condBranch = cond_branch;
+
specLoopUpdate(branch_pc, pred_taken, bi);
return pred_taken;
}

-// PREDICTOR UPDATE
void
-LTAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
- bool squashed)
+LTAGE::condBranchUpdate(Addr branch_pc, bool taken,
+ TageBranchInfo* tage_bi, int nrand)
{
- assert(bp_history);
+ LTageBranchInfo* bi = static_cast<LTageBranchInfo*>(tage_bi);

- BranchInfo *bi = static_cast<BranchInfo*>(bp_history);
+ // first update the loop predictor
+ loopUpdate(branch_pc, taken, bi);

- if (squashed) {
- // This restores the global history, then update it
- // and recomputes the folded histories.
- squash(tid, taken, bp_history);
- return;
+ if (bi->loopPredValid) {
+ if (bi->tagePred != bi->loopPred) {
+ ctrUpdate(loopUseCounter,
+ (bi->loopPred == taken),
+ withLoopBits);
+ }
}

- int nrand = random_mt.random<int>(0,3);
- Addr pc = branch_pc;
- if (bi->condBranch) {
- DPRINTF(LTage, "Updating tables for branch:%lx; taken?:%d\n",
- branch_pc, taken);
- // first update the loop predictor
- loopUpdate(pc, taken, bi);
-
- if (bi->loopPredValid) {
- if (bi->tagePred != bi->loopPred) {
- ctrUpdate(loopUseCounter,
- (bi->loopPred == taken),
- withLoopBits);
- }
- }
-
- // TAGE UPDATE
- // try to allocate a new entries only if prediction was wrong
- bool longest_match_pred = false;
- bool alloc = (bi->tagePred != taken) && (bi->hitBank <
nHistoryTables);
- if (bi->hitBank > 0) {
- // Manage the selection between longest matching and alternate
- // matching for "pseudo"-newly allocated longest matching entry
- longest_match_pred = bi->longestMatchPred;
- bool PseudoNewAlloc = bi->pseudoNewAlloc;
- // an entry is considered as newly allocated if its prediction
- // counter is weak
- if (PseudoNewAlloc) {
- if (longest_match_pred == taken) {
- alloc = false;
- }
- // if it was delivering the correct prediction, no need to
- // allocate new entry even if the overall prediction was
false
- if (longest_match_pred != bi->altTaken) {
- ctrUpdate(useAltPredForNewlyAllocated,
- bi->altTaken == taken, useAltOnNaBits);
- }
- }
- }
-
- if (alloc) {
- // is there some "unuseful" entry to allocate
- uint8_t min = 1;
- for (int i = nHistoryTables; i > bi->hitBank; i--) {
- if (gtable[i][bi->tableIndices[i]].u < min) {
- min = gtable[i][bi->tableIndices[i]].u;
- }
- }
-
- // we allocate an entry with a longer history
- // to avoid ping-pong, we do not choose systematically the
next
- // entry, but among the 3 next entries
- int Y = nrand &
- ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
- int X = bi->hitBank + 1;
- if (Y & 1) {
- X++;
- if (Y & 2)
- X++;
- }
- // No entry available, forces one to be available
- if (min > 0) {
- gtable[X][bi->tableIndices[X]].u = 0;
- }
-
-
- //Allocate only one entry
- for (int i = X; i <= nHistoryTables; i++) {
- if ((gtable[i][bi->tableIndices[i]].u == 0)) {
- gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
- gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
- break;
- }
- }
- }
- //periodic reset of u: reset is not complete but bit by bit
- tCounter++;
- if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) {
- // reset least significant bit
- // most significant bit becomes least significant bit
- for (int i = 1; i <= nHistoryTables; i++) {
- for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) {
- gtable[i][j].u = gtable[i][j].u >> 1;
- }
- }
- }
-
- if (bi->hitBank > 0) {
- DPRINTF(LTage, "Updating tag table entry (%d,%d) for
branch %lx\n",
- bi->hitBank, bi->hitBankIndex, branch_pc);
- ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
- tagTableCounterBits);
- // if the provider entry is not certified to be useful also
update
- // the alternate prediction
- if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
- if (bi->altBank > 0) {
- ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr,
taken,
- tagTableCounterBits);
- DPRINTF(LTage, "Updating tag table entry (%d,%d) for"
- " branch %lx\n", bi->hitBank, bi->hitBankIndex,
- branch_pc);
- }
- if (bi->altBank == 0) {
- baseUpdate(pc, taken, bi);
- }
- }
-
- // update the u counter
- if (bi->tagePred != bi->altTaken) {
- unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u,
- bi->tagePred == taken, tagTableUBits);
- }
- } else {
- baseUpdate(pc, taken, bi);
- }
-
- //END PREDICTOR UPDATE
- }
- if (!squashed) {
- delete bi;
- }
-}
-
-void
-LTAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
-{
- BranchInfo* bi = (BranchInfo*)(b);
- ThreadHistory& tHist = threadHistory[tid];
- // UPDATE HISTORIES
- bool pathbit = ((branch_pc >> instShiftAmt) & 1);
- //on a squash, return pointers to this and recompute indices.
- //update user history
- updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
- tHist.pathHist = (tHist.pathHist << 1) + pathbit;
- tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1));
-
- bi->ptGhist = tHist.ptGhist;
- bi->pathHist = tHist.pathHist;
- //prepare next index and tag computations for user branchs
- for (int i = 1; i <= nHistoryTables; i++)
- {
- bi->ci[i] = tHist.computeIndices[i].comp;
- bi->ct0[i] = tHist.computeTags[0][i].comp;
- bi->ct1[i] = tHist.computeTags[1][i].comp;
- tHist.computeIndices[i].update(tHist.gHist);
- tHist.computeTags[0][i].update(tHist.gHist);
- tHist.computeTags[1][i].update(tHist.gHist);
- }
- DPRINTF(LTage, "Updating global histories with branch:%lx; taken?:%d, "
- "path Hist: %x; pointer:%d\n", branch_pc, taken,
tHist.pathHist,
- tHist.ptGhist);
+ TAGE::condBranchUpdate(branch_pc, taken, bi, nrand);
}

void
LTAGE::squash(ThreadID tid, bool taken, void *bp_history)
{
- BranchInfo* bi = (BranchInfo*)(bp_history);
- ThreadHistory& tHist = threadHistory[tid];
- DPRINTF(LTage, "Restoring branch info: %lx; taken? %d;
PathHistory:%x, "
- "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
- tHist.pathHist = bi->pathHist;
- tHist.ptGhist = bi->ptGhist;
- tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
- tHist.gHist[0] = (taken ? 1 : 0);
- for (int i = 1; i <= nHistoryTables; i++) {
- tHist.computeIndices[i].comp = bi->ci[i];
- tHist.computeTags[0][i].comp = bi->ct0[i];
- tHist.computeTags[1][i].comp = bi->ct1[i];
- tHist.computeIndices[i].update(tHist.gHist);
- tHist.computeTags[0][i].update(tHist.gHist);
- tHist.computeTags[1][i].update(tHist.gHist);
- }
+ TAGE::squash(tid, taken, bp_history);
+
+ LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history);

if (bi->condBranch) {
if (bi->loopHit >= 0) {
@@ -729,14 +272,12 @@
ltable[idx].currentIterSpec = bi->currentIter;
}
}
-
}

void
LTAGE::squash(ThreadID tid, void *bp_history)
{
- BranchInfo* bi = (BranchInfo*)(bp_history);
- DPRINTF(LTage, "Deleting branch info: %lx\n", bi->branchPC);
+ LTageBranchInfo* bi = (LTageBranchInfo*)(bp_history);
if (bi->condBranch) {
if (bi->loopHit >= 0) {
int idx = bi->loopIndex + bi->loopHit;
@@ -744,48 +285,7 @@
}
}

- delete bi;
-}
-
-bool
-LTAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
-{
- bool retval = predict(tid, branch_pc, true, bp_history);
-
- DPRINTF(LTage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
- updateHistories(tid, branch_pc, retval, bp_history);
- assert(threadHistory[tid].gHist ==
- &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
-
- return retval;
-}
-
-void
-LTAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
-{
- BranchInfo* bi = (BranchInfo*) bp_history;
- ThreadHistory& tHist = threadHistory[tid];
- DPRINTF(LTage, "BTB miss resets prediction: %lx\n", branch_pc);
- assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
- tHist.gHist[0] = 0;
- for (int i = 1; i <= nHistoryTables; i++) {
- tHist.computeIndices[i].comp = bi->ci[i];
- tHist.computeTags[0][i].comp = bi->ct0[i];
- tHist.computeTags[1][i].comp = bi->ct1[i];
- tHist.computeIndices[i].update(tHist.gHist);
- tHist.computeTags[0][i].update(tHist.gHist);
- tHist.computeTags[1][i].update(tHist.gHist);
- }
-}
-
-void
-LTAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
-{
- DPRINTF(LTage, "UnConditionalBranch: %lx\n", br_pc);
- predict(tid, br_pc, false, bp_history);
- updateHistories(tid, br_pc, true, bp_history);
- assert(threadHistory[tid].gHist ==
- &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
+ TAGE::squash(tid, bp_history);
}

LTAGE*
diff --git a/src/cpu/pred/ltage.hh b/src/cpu/pred/ltage.hh
index 8b417d4..d614026 100644
--- a/src/cpu/pred/ltage.hh
+++ b/src/cpu/pred/ltage.hh
@@ -52,25 +52,20 @@
#ifndef __CPU_PRED_LTAGE
#define __CPU_PRED_LTAGE

+
#include <vector>

#include "base/types.hh"
-#include "cpu/pred/bpred_unit.hh"
+#include "cpu/pred/tage.hh"
#include "params/LTAGE.hh"

-class LTAGE: public BPredUnit
+class LTAGE: public TAGE
{
public:
LTAGE(const LTAGEParams *params);

// Base class methods.
- void uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
override;
- bool lookup(ThreadID tid, Addr branch_addr, void* &bp_history)
override;
- void btbUpdate(ThreadID tid, Addr branch_addr, void* &bp_history)
override;
- void update(ThreadID tid, Addr branch_addr, bool taken, void
*bp_history,
- bool squashed) override;
void squash(ThreadID tid, void *bp_history) override;
- unsigned getGHR(ThreadID tid, void *bp_history) const override;

private:
// Prediction Structures
@@ -89,189 +84,40 @@
confidence(0), tag(0), age(0), dir(0) { }
};

- // Tage Entry
- struct TageEntry
- {
- int8_t ctr;
- uint16_t tag;
- uint8_t u;
- TageEntry() : ctr(0), tag(0), u(0) { }
- };
-
- // Folded History Table - compressed history
- // to mix with instruction PC to index partially
- // tagged tables.
- struct FoldedHistory
- {
- unsigned comp;
- int compLength;
- int origLength;
- int outpoint;
-
- void init(int original_length, int compressed_length)
- {
- comp = 0;
- origLength = original_length;
- compLength = compressed_length;
- outpoint = original_length % compressed_length;
- }
-
- void update(uint8_t * h)
- {
- comp = (comp << 1) | h[0];
- comp ^= h[origLength] << outpoint;
- comp ^= (comp >> compLength);
- comp &= (ULL(1) << compLength) - 1;
- }
- };
-
// Primary branch history entry
- struct BranchInfo
+ struct LTageBranchInfo : public TageBranchInfo
{
- int pathHist;
- int ptGhist;
- int hitBank;
- int hitBankIndex;
- int altBank;
- int altBankIndex;
- int bimodalIndex;
uint16_t loopTag;
uint16_t currentIter;

- bool tagePred;
- bool altTaken;
bool loopPred;
bool loopPredValid;
int loopIndex;
int loopHit;
- bool condBranch;
- bool longestMatchPred;
- bool pseudoNewAlloc;
- Addr branchPC;

- // Pointer to dynamically allocated storage
- // to save table indices and folded histories.
- // To do one call to new instead of five.
- int *storage;
-
- // Pointers to actual saved array within the dynamically
- // allocated storage.
- int *tableIndices;
- int *tableTags;
- int *ci;
- int *ct0;
- int *ct1;
-
- BranchInfo(int sz)
- : pathHist(0), ptGhist(0),
- hitBank(0), hitBankIndex(0),
- altBank(0), altBankIndex(0),
- bimodalIndex(0), loopTag(0), currentIter(0),
- tagePred(false), altTaken(false), loopPred(false),
- loopPredValid(false), loopIndex(0), loopHit(0),
- condBranch(false), longestMatchPred(false),
- pseudoNewAlloc(false), branchPC(0)
- {
- storage = new int [sz * 5];
- tableIndices = storage;
- tableTags = storage + sz;
- ci = tableTags + sz;
- ct0 = ci + sz;
- ct1 = ct0 + sz;
- }
-
- ~BranchInfo()
- {
- delete[] storage;
- }
+ LTageBranchInfo(int sz)
+ : TageBranchInfo(sz),
+ loopTag(0), currentIter(0),
+ loopPred(false),
+ loopPredValid(false), loopIndex(0), loopHit(0)
+ {}
};

/**
* Computes the index used to access the
- * bimodal table.
- * @param pc_in The unshifted branch PC.
- */
- int bindex(Addr pc_in) const;
-
- /**
- * Computes the index used to access the
* loop predictor.
* @param pc_in The unshifted branch PC.
*/
int lindex(Addr pc_in) const;

/**
- * Computes the index used to access a
- * partially tagged table.
- * @param tid The thread ID used to select the
- * global histories to use.
- * @param pc The unshifted branch PC.
- * @param bank The partially tagged table to access.
- */
- inline int gindex(ThreadID tid, Addr pc, int bank) const;
-
- /**
- * Utility function to shuffle the path history
- * depending on which tagged table we are accessing.
- * @param phist The path history.
- * @param size Number of path history bits to use.
- * @param bank The partially tagged table to access.
- */
- int F(int phist, int size, int bank) const;
-
- /**
- * Computes the partial tag of a tagged table.
- * @param tid the thread ID used to select the
- * global histories to use.
- * @param pc The unshifted branch PC.
- * @param bank The partially tagged table to access.
- */
- inline uint16_t gtag(ThreadID tid, Addr pc, int bank) const;
-
- /**
- * Updates a direction counter based on the actual
- * branch outcome.
- * @param ctr Reference to counter to update.
- * @param taken Actual branch outcome.
- * @param nbits Counter width.
- */
- void ctrUpdate(int8_t & ctr, bool taken, int nbits);
-
- /**
- * Updates an unsigned counter based on up/down parameter
- * @param ctr Reference to counter to update.
- * @param up Boolean indicating if the counter is
incremented/decremented
- * If true it is incremented, if false it is decremented
- * @param nbits Counter width.
- */
- void unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits);
-
- /**
- * Get a branch prediction from the bimodal
- * predictor.
- * @param pc The unshifted branch PC.
- * @param bi Pointer to information on the
- * prediction.
- */
- bool getBimodePred(Addr pc, BranchInfo* bi) const;
-
- /**
- * Updates the bimodal predictor.
- * @param pc The unshifted branch PC.
- * @param taken The actual branch outcome.
- * @param bi Pointer to information on the prediction
- * recorded at prediction time.
- */
- void baseUpdate(Addr pc, bool taken, BranchInfo* bi);
-
- /**
* Get a branch prediction from the loop
* predictor.
* @param pc The unshifted branch PC.
* @param bi Pointer to information on the
* prediction.
*/
- bool getLoop(Addr pc, BranchInfo* bi) const;
+ bool getLoop(Addr pc, LTageBranchInfo* bi) const;

/**
* Updates the loop predictor.
@@ -280,69 +126,7 @@
* @param bi Pointer to information on the
* prediction recorded at prediction time.
*/
- void loopUpdate(Addr pc, bool Taken, BranchInfo* bi);
-
- /**
- * (Speculatively) updates the global branch history.
- * @param h Reference to pointer to global branch history.
- * @param dir (Predicted) outcome to update the histories
- * with.
- * @param tab
- * @param PT Reference to path history.
- */
- void updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &PT);
-
- /**
- * Get a branch prediction from L-TAGE. *NOT* an override of
- * BpredUnit::predict().
- * @param tid The thread ID to select the global
- * histories to use.
- * @param branch_pc The unshifted branch PC.
- * @param cond_branch True if the branch is conditional.
- * @param b Reference to wrapping pointer to allow storing
- * derived class prediction information in the base class.
- */
- bool predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b);
-
- /**
- * Update L-TAGE. Called at execute to repair histories on a
misprediction
- * and at commit to update the tables.
- * @param tid The thread ID to select the global
- * histories to use.
- * @param branch_pc The unshifted branch PC.
- * @param taken Actual branch outcome.
- * @param bi Pointer to information on the prediction
- * recorded at prediction time.
- */
- void update(ThreadID tid, Addr branch_pc, bool taken, BranchInfo* bi);
-
- /**
- * (Speculatively) updates global histories (path and direction).
- * Also recomputes compressed (folded) histories based on the
- * branch direction.
- * @param tid The thread ID to select the histories
- * to update.
- * @param branch_pc The unshifted branch PC.
- * @param taken (Predicted) branch direction.
- * @param b Wrapping pointer to BranchInfo (to allow
- * storing derived class prediction information in the
- * base class).
- */
- void updateHistories(ThreadID tid, Addr branch_pc, bool taken, void*
b);
-
- /**
- * Restores speculatively updated path and direction histories.
- * Also recomputes compressed (folded) histories based on the
- * correct branch outcome.
- * This version of squash() is called once on a branch misprediction.
- * @param tid The Thread ID to select the histories to rollback.
- * @param taken The correct branch outcome.
- * @param bp_history Wrapping pointer to BranchInfo (to allow
- * storing derived class prediction information in the
- * base class).
- * @post bp_history points to valid memory.
- */
- void squash(ThreadID tid, bool taken, void *bp_history);
+ void loopUpdate(Addr pc, bool Taken, LTageBranchInfo* bi);

/**
* Speculatively updates the loop predictor
@@ -352,17 +136,48 @@
* @param bi Pointer to information on the prediction
* recorded at prediction time.
*/
- void specLoopUpdate(Addr pc, bool taken, BranchInfo* bi);
+ void specLoopUpdate(Addr pc, bool taken, LTageBranchInfo* bi);

- const unsigned logRatioBiModalHystEntries;
+ /**
+ * Update LTAGE for conditional branches.
+ * @param branch_pc The unshifted branch PC.
+ * @param taken Actual branch outcome.
+ * @param bi Pointer to information on the prediction
+ * recorded at prediction time.
+ * @nrand Random int number from 0 to 3
+ */
+ void condBranchUpdate(
+ Addr branch_pc, bool taken, TageBranchInfo* bi, int nrand)
override;
+
+ /**
+ * Get a branch prediction from LTAGE. *NOT* an override of
+ * BpredUnit::predict().
+ * @param tid The thread ID to select the global
+ * histories to use.
+ * @param branch_pc The unshifted branch PC.
+ * @param cond_branch True if the branch is conditional.
+ * @param b Reference to wrapping pointer to allow storing
+ * derived class prediction information in the base class.
+ */
+ bool predict(
+ ThreadID tid, Addr branch_pc, bool cond_branch, void* &b) override;
+
+ /**
+ * Restores speculatively updated path and direction histories.
+ * Also recomputes compressed (folded) histories based on the
+ * correct branch outcome.
+ * This version of squash() is called once on a branch misprediction.
+ * @param tid The Thread ID to select the histories to rollback.
+ * @param taken The correct branch outcome.
+ * @param bp_history Wrapping pointer to TageBranchInfo (to allow
+ * storing derived class prediction information in the
+ * base class).
+ * @post bp_history points to valid memory.
+ */
+ void squash(
+ ThreadID tid, bool taken, void *bp_history) override;
+
const unsigned logSizeLoopPred;
- const unsigned nHistoryTables;
- const unsigned tagTableCounterBits;
- const unsigned tagTableUBits;
- const unsigned histBufferSize;
- const unsigned minHist;
- const unsigned maxHist;
- const unsigned pathHistBits;
const unsigned loopTableAgeBits;
const unsigned loopTableConfidenceBits;
const unsigned loopTableTagBits;
@@ -372,48 +187,9 @@
const uint16_t loopTagMask;
const uint16_t loopNumIterMask;

- const std::vector<unsigned> tagTableTagWidths;
- const std::vector<int> logTagTableSizes;
-
- std::vector<bool> btablePrediction;
- std::vector<bool> btableHysteresis;
- TageEntry **gtable;
LoopEntry *ltable;

- // Keep per-thread histories to
- // support SMT.
- struct ThreadHistory {
- // Speculative path history
- // (LSB of branch address)
- int pathHist;
-
- // Speculative branch direction
- // history (circular buffer)
- // @TODO Convert to std::vector<bool>
- uint8_t *globalHistory;
-
- // Pointer to most recent branch outcome
- uint8_t* gHist;
-
- // Index to most recent branch outcome
- int ptGhist;
-
- // Speculative folded histories.
- FoldedHistory *computeIndices;
- FoldedHistory *computeTags[2];
- };
-
- std::vector<ThreadHistory> threadHistory;
-
- int *histLengths;
- int *tableIndices;
- int *tableTags;
-
int8_t loopUseCounter;
- int8_t useAltPredForNewlyAllocated;
- uint64_t tCounter;
- uint64_t logUResetPeriod;
- unsigned useAltOnNaBits;
unsigned withLoopBits;
};

diff --git a/src/cpu/pred/tage.cc b/src/cpu/pred/tage.cc
new file mode 100644
index 0000000..c22e6b7
--- /dev/null
+++ b/src/cpu/pred/tage.cc
@@ -0,0 +1,609 @@
+/*
+ * Copyright (c) 2014 The University of Wisconsin
+ *
+ * Copyright (c) 2006 INRIA (Institut National de Recherche en
+ * Informatique et en Automatique / French National Research Institute
+ * for Computer Science and Applied Mathematics)
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
+ * from André Seznec's code.
+ */
+
+/* @file
+ * Implementation of a TAGE branch predictor
+ */
+
+#include "cpu/pred/tage.hh"
+
+#include "base/intmath.hh"
+#include "base/logging.hh"
+#include "base/random.hh"
+#include "base/trace.hh"
+#include "debug/Fetch.hh"
+#include "debug/Tage.hh"
+
+TAGE::TAGE(const TAGEParams *params)
+ : BPredUnit(params),
+ logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
+ nHistoryTables(params->nHistoryTables),
+ tagTableCounterBits(params->tagTableCounterBits),
+ tagTableUBits(params->tagTableUBits),
+ histBufferSize(params->histBufferSize),
+ minHist(params->minHist),
+ maxHist(params->maxHist),
+ pathHistBits(params->pathHistBits),
+ tagTableTagWidths(params->tagTableTagWidths),
+ logTagTableSizes(params->logTagTableSizes),
+ threadHistory(params->numThreads),
+ logUResetPeriod(params->logUResetPeriod),
+ useAltOnNaBits(params->useAltOnNaBits)
+{
+ // Current method for periodically resetting the u counter bits only
+ // works for 1 or 2 bits
+ // Also make sure that it is not 0
+ assert(tagTableUBits <= 2 && (tagTableUBits > 0));
+
+ // we use int type for the path history, so it cannot be more than
+ // its size
+ assert(pathHistBits <= (sizeof(int)*8));
+
+ // initialize the counter to half of the period
+ assert(logUResetPeriod != 0);
+ tCounter = ULL(1) << (logUResetPeriod - 1);
+
+ assert(params->histBufferSize > params->maxHist * 2);
+ useAltPredForNewlyAllocated = 0;
+
+ for (auto& history : threadHistory) {
+ history.pathHist = 0;
+ history.globalHistory = new uint8_t[histBufferSize];
+ history.gHist = history.globalHistory;
+ memset(history.gHist, 0, histBufferSize);
+ history.ptGhist = 0;
+ }
+
+ histLengths = new int [nHistoryTables+1];
+ histLengths[1] = minHist;
+ histLengths[nHistoryTables] = maxHist;
+
+ for (int i = 2; i <= nHistoryTables; i++) {
+ histLengths[i] = (int) (((double) minHist *
+ pow ((double) (maxHist) / (double) minHist,
+ (double) (i - 1) / (double) ((nHistoryTables- 1))))
+ + 0.5);
+ }
+
+ assert(tagTableTagWidths.size() == (nHistoryTables+1));
+ assert(logTagTableSizes.size() == (nHistoryTables+1));
+
+ // First entry is for the Bimodal table and it is untagged in this
+ // implementation
+ assert(tagTableTagWidths[0] == 0);
+
+ for (auto& history : threadHistory) {
+ history.computeIndices = new FoldedHistory[nHistoryTables+1];
+ history.computeTags[0] = new FoldedHistory[nHistoryTables+1];
+ history.computeTags[1] = new FoldedHistory[nHistoryTables+1];
+
+ for (int i = 1; i <= nHistoryTables; i++) {
+ history.computeIndices[i].init(
+ histLengths[i], (logTagTableSizes[i]));
+ history.computeTags[0][i].init(
+ history.computeIndices[i].origLength,
tagTableTagWidths[i]);
+ history.computeTags[1][i].init(
+ history.computeIndices[i].origLength,
tagTableTagWidths[i]-1);
+ DPRINTF(Tage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
+ histLengths[i], logTagTableSizes[i],
tagTableTagWidths[i]);
+ }
+ }
+
+ const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0];
+ btablePrediction.resize(bimodalTableSize, false);
+ btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
+ true);
+
+ gtable = new TageEntry*[nHistoryTables + 1];
+ for (int i = 1; i <= nHistoryTables; i++) {
+ gtable[i] = new TageEntry[1<<(logTagTableSizes[i])];
+ }
+
+ tableIndices = new int [nHistoryTables+1];
+ tableTags = new int [nHistoryTables+1];
+}
+
+int
+TAGE::bindex(Addr pc_in) const
+{
+ return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) -
1));
+}
+
+int
+TAGE::F(int A, int size, int bank) const
+{
+ int A1, A2;
+
+ A = A & ((ULL(1) << size) - 1);
+ A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1));
+ A2 = (A >> logTagTableSizes[bank]);
+ A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
+ + (A2 >> (logTagTableSizes[bank] - bank));
+ A = A1 ^ A2;
+ A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
+ + (A >> (logTagTableSizes[bank] - bank));
+ return (A);
+}
+
+
+// gindex computes a full hash of pc, ghist and pathHist
+int
+TAGE::gindex(ThreadID tid, Addr pc, int bank) const
+{
+ int index;
+ int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits :
+ histLengths[bank];
+ const Addr shiftedPc = pc >> instShiftAmt;
+ index =
+ shiftedPc ^
+ (shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^
+ threadHistory[tid].computeIndices[bank].comp ^
+ F(threadHistory[tid].pathHist, hlen, bank);
+
+ return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1));
+}
+
+
+// Tag computation
+uint16_t
+TAGE::gtag(ThreadID tid, Addr pc, int bank) const
+{
+ int tag = (pc >> instShiftAmt) ^
+ threadHistory[tid].computeTags[0][bank].comp ^
+ (threadHistory[tid].computeTags[1][bank].comp << 1);
+
+ return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1));
+}
+
+
+// Up-down saturating counter
+void
+TAGE::ctrUpdate(int8_t & ctr, bool taken, int nbits)
+{
+ assert(nbits <= sizeof(int8_t) << 3);
+ if (taken) {
+ if (ctr < ((1 << (nbits - 1)) - 1))
+ ctr++;
+ } else {
+ if (ctr > -(1 << (nbits - 1)))
+ ctr--;
+ }
+}
+
+// Up-down unsigned saturating counter
+void
+TAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits)
+{
+ assert(nbits <= sizeof(uint8_t) << 3);
+ if (up) {
+ if (ctr < ((1 << nbits) - 1))
+ ctr++;
+ } else {
+ if (ctr)
+ ctr--;
+ }
+}
+
+// Bimodal prediction
+bool
+TAGE::getBimodePred(Addr pc, TageBranchInfo* bi) const
+{
+ return btablePrediction[bi->bimodalIndex];
+}
+
+
+// Update the bimodal predictor: a hysteresis bit is shared among N
prediction
+// bits (N = 2 ^ logRatioBiModalHystEntries)
+void
+TAGE::baseUpdate(Addr pc, bool taken, TageBranchInfo* bi)
+{
+ int inter = (btablePrediction[bi->bimodalIndex] << 1)
+ + btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries];
+ if (taken) {
+ if (inter < 3)
+ inter++;
+ } else if (inter > 0) {
+ inter--;
+ }
+ const bool pred = inter >> 1;
+ const bool hyst = inter & 1;
+ btablePrediction[bi->bimodalIndex] = pred;
+ btableHysteresis[bi->bimodalIndex >> logRatioBiModalHystEntries] =
hyst;
+ DPRINTF(Tage, "Updating branch %lx, pred:%d, hyst:%d\n", pc, pred,
hyst);
+}
+
+// shifting the global history: we manage the history in a big table in
order
+// to reduce simulation time
+void
+TAGE::updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &pt)
+{
+ if (pt == 0) {
+ DPRINTF(Tage, "Rolling over the histories\n");
+ // Copy beginning of globalHistoryBuffer to end, such that
+ // the last maxHist outcomes are still reachable
+ // through pt[0 .. maxHist - 1].
+ for (int i = 0; i < maxHist; i++)
+ tab[histBufferSize - maxHist + i] = tab[i];
+ pt = histBufferSize - maxHist;
+ h = &tab[pt];
+ }
+ pt--;
+ h--;
+ h[0] = (dir) ? 1 : 0;
+}
+
+// Get GHR for hashing indirect predictor
+// Build history backwards from pointer in
+// bp_history.
+unsigned
+TAGE::getGHR(ThreadID tid, void *bp_history) const
+{
+ TageBranchInfo* bi = static_cast<TageBranchInfo*>(bp_history);
+ unsigned val = 0;
+ for (unsigned i = 0; i < 32; i++) {
+ // Make sure we don't go out of bounds
+ int gh_offset = bi->ptGhist + i;
+ assert(&(threadHistory[tid].globalHistory[gh_offset]) <
+ threadHistory[tid].globalHistory + histBufferSize);
+ val |= ((threadHistory[tid].globalHistory[gh_offset] & 0x1) << i);
+ }
+
+ return val;
+}
+
+//prediction
+bool
+TAGE::predict(ThreadID tid, Addr branch_pc, bool cond_branch, void* &b)
+{
+ TageBranchInfo *bi = new TageBranchInfo(nHistoryTables+1);
+ b = (void*)(bi);
+ return tagePredict(tid, branch_pc, cond_branch, bi);
+}
+
+bool
+TAGE::tagePredict(ThreadID tid, Addr branch_pc,
+ bool cond_branch, TageBranchInfo* bi)
+{
+ Addr pc = branch_pc;
+ bool pred_taken = true;
+
+ if (cond_branch) {
+ // TAGE prediction
+
+ // computes the table addresses and the partial tags
+ for (int i = 1; i <= nHistoryTables; i++) {
+ tableIndices[i] = gindex(tid, pc, i);
+ bi->tableIndices[i] = tableIndices[i];
+ tableTags[i] = gtag(tid, pc, i);
+ bi->tableTags[i] = tableTags[i];
+ }
+
+ bi->bimodalIndex = bindex(pc);
+
+ bi->hitBank = 0;
+ bi->altBank = 0;
+ //Look for the bank with longest matching history
+ for (int i = nHistoryTables; i > 0; i--) {
+ if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
+ bi->hitBank = i;
+ bi->hitBankIndex = tableIndices[bi->hitBank];
+ break;
+ }
+ }
+ //Look for the alternate bank
+ for (int i = bi->hitBank - 1; i > 0; i--) {
+ if (gtable[i][tableIndices[i]].tag == tableTags[i]) {
+ bi->altBank = i;
+ bi->altBankIndex = tableIndices[bi->altBank];
+ break;
+ }
+ }
+ //computes the prediction and the alternate prediction
+ if (bi->hitBank > 0) {
+ if (bi->altBank > 0) {
+ bi->altTaken =
+ gtable[bi->altBank][tableIndices[bi->altBank]].ctr >=
0;
+ }else {
+ bi->altTaken = getBimodePred(pc, bi);
+ }
+
+ bi->longestMatchPred =
+ gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr >= 0;
+ bi->pseudoNewAlloc =
+ abs(2 * gtable[bi->hitBank][bi->hitBankIndex].ctr + 1) <=
1;
+
+ //if the entry is recognized as a newly allocated entry and
+ //useAltPredForNewlyAllocated is positive use the alternate
+ //prediction
+ if ((useAltPredForNewlyAllocated < 0)
+ || abs(2 *
+ gtable[bi->hitBank][tableIndices[bi->hitBank]].ctr + 1)
1)
+ bi->tagePred = bi->longestMatchPred;
+ else
+ bi->tagePred = bi->altTaken;
+ } else {
+ bi->altTaken = getBimodePred(pc, bi);
+ bi->tagePred = bi->altTaken;
+ bi->longestMatchPred = bi->altTaken;
+ }
+ //end TAGE prediction
+
+ pred_taken = (bi->tagePred);
+ DPRINTF(Tage, "Predict for %lx: taken?:%d, tagePred:%d,
altPred:%d\n",
+ branch_pc, pred_taken, bi->tagePred, bi->altTaken);
+ }
+ bi->branchPC = branch_pc;
+ bi->condBranch = cond_branch;
+ return pred_taken;
+}
+
+// PREDICTOR UPDATE
+void
+TAGE::update(ThreadID tid, Addr branch_pc, bool taken, void* bp_history,
+ bool squashed)
+{
+ assert(bp_history);
+
+ TageBranchInfo *bi = static_cast<TageBranchInfo*>(bp_history);
+
+ if (squashed) {
+ // This restores the global history, then update it
+ // and recomputes the folded histories.
+ squash(tid, taken, bp_history);
+ return;
+ }
+
+ int nrand = random_mt.random<int>(0,3);
+ if (bi->condBranch) {
+ DPRINTF(Tage, "Updating tables for branch:%lx; taken?:%d\n",
+ branch_pc, taken);
+ condBranchUpdate(branch_pc, taken, bi, nrand);
+ }
+ if (!squashed) {
+ delete bi;
+ }
+}
+
+void
+TAGE::condBranchUpdate(Addr branch_pc, bool taken,
+ TageBranchInfo* bi, int nrand)
+{
+ // TAGE UPDATE
+ // try to allocate a new entries only if prediction was wrong
+ bool longest_match_pred = false;
+ bool alloc = (bi->tagePred != taken) && (bi->hitBank < nHistoryTables);
+ if (bi->hitBank > 0) {
+ // Manage the selection between longest matching and alternate
+ // matching for "pseudo"-newly allocated longest matching entry
+ longest_match_pred = bi->longestMatchPred;
+ bool PseudoNewAlloc = bi->pseudoNewAlloc;
+ // an entry is considered as newly allocated if its prediction
+ // counter is weak
+ if (PseudoNewAlloc) {
+ if (longest_match_pred == taken) {
+ alloc = false;
+ }
+ // if it was delivering the correct prediction, no need to
+ // allocate new entry even if the overall prediction was false
+ if (longest_match_pred != bi->altTaken) {
+ ctrUpdate(useAltPredForNewlyAllocated,
+ bi->altTaken == taken, useAltOnNaBits);
+ }
+ }
+ }
+
+ if (alloc) {
+ // is there some "unuseful" entry to allocate
+ uint8_t min = 1;
+ for (int i = nHistoryTables; i > bi->hitBank; i--) {
+ if (gtable[i][bi->tableIndices[i]].u < min) {
+ min = gtable[i][bi->tableIndices[i]].u;
+ }
+ }
+
+ // we allocate an entry with a longer history
+ // to avoid ping-pong, we do not choose systematically the next
+ // entry, but among the 3 next entries
+ int Y = nrand &
+ ((ULL(1) << (nHistoryTables - bi->hitBank - 1)) - 1);
+ int X = bi->hitBank + 1;
+ if (Y & 1) {
+ X++;
+ if (Y & 2)
+ X++;
+ }
+ // No entry available, forces one to be available
+ if (min > 0) {
+ gtable[X][bi->tableIndices[X]].u = 0;
+ }
+
+
+ //Allocate only one entry
+ for (int i = X; i <= nHistoryTables; i++) {
+ if ((gtable[i][bi->tableIndices[i]].u == 0)) {
+ gtable[i][bi->tableIndices[i]].tag = bi->tableTags[i];
+ gtable[i][bi->tableIndices[i]].ctr = (taken) ? 0 : -1;
+ break;
+ }
+ }
+ }
+ //periodic reset of u: reset is not complete but bit by bit
+ tCounter++;
+ if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) {
+ // reset least significant bit
+ // most significant bit becomes least significant bit
+ for (int i = 1; i <= nHistoryTables; i++) {
+ for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) {
+ gtable[i][j].u = gtable[i][j].u >> 1;
+ }
+ }
+ }
+
+ if (bi->hitBank > 0) {
+ DPRINTF(Tage, "Updating tag table entry (%d,%d) for branch %lx\n",
+ bi->hitBank, bi->hitBankIndex, branch_pc);
+ ctrUpdate(gtable[bi->hitBank][bi->hitBankIndex].ctr, taken,
+ tagTableCounterBits);
+ // if the provider entry is not certified to be useful also update
+ // the alternate prediction
+ if (gtable[bi->hitBank][bi->hitBankIndex].u == 0) {
+ if (bi->altBank > 0) {
+ ctrUpdate(gtable[bi->altBank][bi->altBankIndex].ctr, taken,
+ tagTableCounterBits);
+ DPRINTF(Tage, "Updating tag table entry (%d,%d) for"
+ " branch %lx\n", bi->hitBank, bi->hitBankIndex,
+ branch_pc);
+ }
+ if (bi->altBank == 0) {
+ baseUpdate(branch_pc, taken, bi);
+ }
+ }
+
+ // update the u counter
+ if (bi->tagePred != bi->altTaken) {
+ unsignedCtrUpdate(gtable[bi->hitBank][bi->hitBankIndex].u,
+ bi->tagePred == taken, tagTableUBits);
+ }
+ } else {
+ baseUpdate(branch_pc, taken, bi);
+ }
+}
+
+void
+TAGE::updateHistories(ThreadID tid, Addr branch_pc, bool taken, void* b)
+{
+ TageBranchInfo* bi = (TageBranchInfo*)(b);
+ ThreadHistory& tHist = threadHistory[tid];
+ // UPDATE HISTORIES
+ bool pathbit = ((branch_pc >> instShiftAmt) & 1);
+ //on a squash, return pointers to this and recompute indices.
+ //update user history
+ updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
+ tHist.pathHist = (tHist.pathHist << 1) + pathbit;
+ tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1));
+
+ bi->ptGhist = tHist.ptGhist;
+ bi->pathHist = tHist.pathHist;
+ //prepare next index and tag computations for user branchs
+ for (int i = 1; i <= nHistoryTables; i++)
+ {
+ bi->ci[i] = tHist.computeIndices[i].comp;
+ bi->ct0[i] = tHist.computeTags[0][i].comp;
+ bi->ct1[i] = tHist.computeTags[1][i].comp;
+ tHist.computeIndices[i].update(tHist.gHist);
+ tHist.computeTags[0][i].update(tHist.gHist);
+ tHist.computeTags[1][i].update(tHist.gHist);
+ }
+ DPRINTF(Tage, "Updating global histories with branch:%lx; taken?:%d, "
+ "path Hist: %x; pointer:%d\n", branch_pc, taken,
tHist.pathHist,
+ tHist.ptGhist);
+}
+
+void
+TAGE::squash(ThreadID tid, bool taken, void *bp_history)
+{
+ TageBranchInfo* bi = (TageBranchInfo*)(bp_history);
+ ThreadHistory& tHist = threadHistory[tid];
+ DPRINTF(Tage, "Restoring branch info: %lx; taken? %d; PathHistory:%x, "
+ "pointer:%d\n", bi->branchPC,taken, bi->pathHist, bi->ptGhist);
+ tHist.pathHist = bi->pathHist;
+ tHist.ptGhist = bi->ptGhist;
+ tHist.gHist = &(tHist.globalHistory[tHist.ptGhist]);
+ tHist.gHist[0] = (taken ? 1 : 0);
+ for (int i = 1; i <= nHistoryTables; i++) {
+ tHist.computeIndices[i].comp = bi->ci[i];
+ tHist.computeTags[0][i].comp = bi->ct0[i];
+ tHist.computeTags[1][i].comp = bi->ct1[i];
+ tHist.computeIndices[i].update(tHist.gHist);
+ tHist.computeTags[0][i].update(tHist.gHist);
+ tHist.computeTags[1][i].update(tHist.gHist);
+ }
+}
+
+void
+TAGE::squash(ThreadID tid, void *bp_history)
+{
+ TageBranchInfo* bi = (TageBranchInfo*)(bp_history);
+ DPRINTF(Tage, "Deleting branch info: %lx\n", bi->branchPC);
+ delete bi;
+}
+
+bool
+TAGE::lookup(ThreadID tid, Addr branch_pc, void* &bp_history)
+{
+ bool retval = predict(tid, branch_pc, true, bp_history);
+
+ DPRINTF(Tage, "Lookup branch: %lx; predict:%d\n", branch_pc, retval);
+ updateHistories(tid, branch_pc, retval, bp_history);
+ assert(threadHistory[tid].gHist ==
+ &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
+
+ return retval;
+}
+
+void
+TAGE::btbUpdate(ThreadID tid, Addr branch_pc, void* &bp_history)
+{
+ TageBranchInfo* bi = (TageBranchInfo*) bp_history;
+ ThreadHistory& tHist = threadHistory[tid];
+ DPRINTF(Tage, "BTB miss resets prediction: %lx\n", branch_pc);
+ assert(tHist.gHist == &tHist.globalHistory[tHist.ptGhist]);
+ tHist.gHist[0] = 0;
+ for (int i = 1; i <= nHistoryTables; i++) {
+ tHist.computeIndices[i].comp = bi->ci[i];
+ tHist.computeTags[0][i].comp = bi->ct0[i];
+ tHist.computeTags[1][i].comp = bi->ct1[i];
+ tHist.computeIndices[i].update(tHist.gHist);
+ tHist.computeTags[0][i].update(tHist.gHist);
+ tHist.computeTags[1][i].update(tHist.gHist);
+ }
+}
+
+void
+TAGE::uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
+{
+ DPRINTF(Tage, "UnConditionalBranch: %lx\n", br_pc);
+ predict(tid, br_pc, false, bp_history);
+ updateHistories(tid, br_pc, true, bp_history);
+ assert(threadHistory[tid].gHist ==
+ &threadHistory[tid].globalHistory[threadHistory[tid].ptGhist]);
+}
+
+TAGE*
+TAGEParams::create()
+{
+ return new TAGE(this);
+}
diff --git a/src/cpu/pred/tage.hh b/src/cpu/pred/tage.hh
new file mode 100644
index 0000000..9ba0241
--- /dev/null
+++ b/src/cpu/pred/tage.hh
@@ -0,0 +1,374 @@
+/*
+ * Copyright (c) 2014 The University of Wisconsin
+ *
+ * Copyright (c) 2006 INRIA (Institut National de Recherche en
+ * Informatique et en Automatique / French National Research Institute
+ * for Computer Science and Applied Mathematics)
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Vignyan Reddy, Dibakar Gope and Arthur Perais,
+ * from André Seznec's code.
+ */
+
+/* @file
+ * Implementation of a TAGE branch predictor. TAGE is a global-history
based
+ * branch predictor. It features a PC-indexed bimodal predictor and N
+ * partially tagged tables, indexed with a hash of the PC and the global
+ * branch history. The different lengths of global branch history used to
+ * index the partially tagged tables grow geometrically. A small path
history
+ * is also used in the hash.
+ *
+ * All TAGE tables are accessed in parallel, and the one using the longest
+ * history that matches provides the prediction (some exceptions apply).
+ * Entries are allocated in components using a longer history than the
+ * one that predicted when the prediction is incorrect.
+ */
+
+#ifndef __CPU_PRED_TAGE
+#define __CPU_PRED_TAGE
+
+#include <vector>
+
+#include "base/types.hh"
+#include "cpu/pred/bpred_unit.hh"
+#include "params/TAGE.hh"
+
+class TAGE: public BPredUnit
+{
+ public:
+ TAGE(const TAGEParams *params);
+
+ // Base class methods.
+ void uncondBranch(ThreadID tid, Addr br_pc, void* &bp_history)
override;
+ bool lookup(ThreadID tid, Addr branch_addr, void* &bp_history)
override;
+ void btbUpdate(ThreadID tid, Addr branch_addr, void* &bp_history)
override;
+ void update(ThreadID tid, Addr branch_addr, bool taken, void
*bp_history,
+ bool squashed) override;
+ virtual void squash(ThreadID tid, void *bp_history) override;
+ unsigned getGHR(ThreadID tid, void *bp_history) const override;
+
+ protected:
+ // Prediction Structures
+
+ // Tage Entry
+ struct TageEntry
+ {
+ int8_t ctr;
+ uint16_t tag;
+ uint8_t u;
+ TageEntry() : ctr(0), tag(0), u(0) { }
+ };
+
+ // Folded History Table - compressed history
+ // to mix with instruction PC to index partially
+ // tagged tables.
+ struct FoldedHistory
+ {
+ unsigned comp;
+ int compLength;
+ int origLength;
+ int outpoint;
+
+ void init(int original_length, int compressed_length)
+ {
+ comp = 0;
+ origLength = original_length;
+ compLength = compressed_length;
+ outpoint = original_length % compressed_length;
+ }
+
+ void update(uint8_t * h)
+ {
+ comp = (comp << 1) | h[0];
+ comp ^= h[origLength] << outpoint;
+ comp ^= (comp >> compLength);
+ comp &= (ULL(1) << compLength) - 1;
+ }
+ };
+
+ // Primary branch history entry
+ struct TageBranchInfo
+ {
+ int pathHist;
+ int ptGhist;
+ int hitBank;
+ int hitBankIndex;
+ int altBank;
+ int altBankIndex;
+ int bimodalIndex;
+
+ bool tagePred;
+ bool altTaken;
+ bool condBranch;
+ bool longestMatchPred;
+ bool pseudoNewAlloc;
+ Addr branchPC;
+
+ // Pointer to dynamically allocated storage
+ // to save table indices and folded histories.
+ // To do one call to new instead of five.
+ int *storage;
+
+ // Pointers to actual saved array within the dynamically
+ // allocated storage.
+ int *tableIndices;
+ int *tableTags;
+ int *ci;
+ int *ct0;
+ int *ct1;
+
+ TageBranchInfo(int sz)
+ : pathHist(0), ptGhist(0),
+ hitBank(0), hitBankIndex(0),
+ altBank(0), altBankIndex(0),
+ bimodalIndex(0),
+ tagePred(false), altTaken(false),
+ condBranch(false), longestMatchPred(false),
+ pseudoNewAlloc(false), branchPC(0)
+ {
+ storage = new int [sz * 5];
+ tableIndices = storage;
+ tableTags = storage + sz;
+ ci = tableTags + sz;
+ ct0 = ci + sz;
+ ct1 = ct0 + sz;
+ }
+
+ virtual ~TageBranchInfo()
+ {
+ delete[] storage;
+ }
+ };
+
+ /**
+ * Computes the index used to access the
+ * bimodal table.
+ * @param pc_in The unshifted branch PC.
+ */
+ int bindex(Addr pc_in) const;
+
+ /**
+ * Computes the index used to access a
+ * partially tagged table.
+ * @param tid The thread ID used to select the
+ * global histories to use.
+ * @param pc The unshifted branch PC.
+ * @param bank The partially tagged table to access.
+ */
+ inline int gindex(ThreadID tid, Addr pc, int bank) const;
+
+ /**
+ * Utility function to shuffle the path history
+ * depending on which tagged table we are accessing.
+ * @param phist The path history.
+ * @param size Number of path history bits to use.
+ * @param bank The partially tagged table to access.
+ */
+ int F(int phist, int size, int bank) const;
+
+ /**
+ * Computes the partial tag of a tagged table.
+ * @param tid the thread ID used to select the
+ * global histories to use.
+ * @param pc The unshifted branch PC.
+ * @param bank The partially tagged table to access.
+ */
+ inline uint16_t gtag(ThreadID tid, Addr pc, int bank) const;
+
+ /**
+ * Updates a direction counter based on the actual
+ * branch outcome.
+ * @param ctr Reference to counter to update.
+ * @param taken Actual branch outcome.
+ * @param nbits Counter width.
+ */
+ void ctrUpdate(int8_t & ctr, bool taken, int nbits);
+
+ /**
+ * Updates an unsigned counter based on up/down parameter
+ * @param ctr Reference to counter to update.
+ * @param up Boolean indicating if the counter is
incremented/decremented
+ * If true it is incremented, if false it is decremented
+ * @param nbits Counter width.
+ */
+ void unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits);
+
+ /**
+ * Get a branch prediction from the bimodal
+ * predictor.
+ * @param pc The unshifted branch PC.
+ * @param bi Pointer to information on the
+ * prediction.
+ */
+ bool getBimodePred(Addr pc, TageBranchInfo* bi) const;
+
+ /**
+ * Updates the bimodal predictor.
+ * @param pc The unshifted branch PC.
+ * @param taken The actual branch outcome.
+ * @param bi Pointer to information on the prediction
+ * recorded at prediction time.
+ */
+ void baseUpdate(Addr pc, bool taken, TageBranchInfo* bi);
+
+ /**
+ * (Speculatively) updates the global branch history.
+ * @param h Reference to pointer to global branch history.
+ * @param dir (Predicted) outcome to update the histories
+ * with.
+ * @param tab
+ * @param PT Reference to path history.
+ */
+ void updateGHist(uint8_t * &h, bool dir, uint8_t * tab, int &PT);
+
+ /**
+ * Get a branch prediction from TAGE. *NOT* an override of
+ * BpredUnit::predict().
+ * @param tid The thread ID to select the global
+ * histories to use.
+ * @param branch_pc The unshifted branch PC.
+ * @param cond_branch True if the branch is conditional.
+ * @param b Reference to wrapping pointer to allow storing
+ * derived class prediction information in the base class.
+ */
+ virtual bool predict(
+ ThreadID tid, Addr branch_pc, bool cond_branch, void* &b);
+
+ /**
+ * Update TAGE. Called at execute to repair histories on a
misprediction
+ * and at commit to update the tables.
+ * @param tid The thread ID to select the global
+ * histories to use.
+ * @param branch_pc The unshifted branch PC.
+ * @param taken Actual branch outcome.
+ * @param bi Pointer to information on the prediction
+ * recorded at prediction time.
+ */
+ void update(ThreadID tid, Addr branch_pc, bool taken, TageBranchInfo*
bi);
+
+ /**
+ * (Speculatively) updates global histories (path and direction).
+ * Also recomputes compressed (folded) histories based on the
+ * branch direction.
+ * @param tid The thread ID to select the histories
+ * to update.
+ * @param branch_pc The unshifted branch PC.
+ * @param taken (Predicted) branch direction.
+ * @param b Wrapping pointer to TageBranchInfo (to allow
+ * storing derived class prediction information in the
+ * base class).
+ */
+ void updateHistories(ThreadID tid, Addr branch_pc, bool taken, void*
b);
+
+ /**
+ * Restores speculatively updated path and direction histories.
+ * Also recomputes compressed (folded) histories based on the
+ * correct branch outcome.
+ * This version of squash() is called once on a branch misprediction.
+ * @param tid The Thread ID to select the histories to rollback.
+ * @param taken The correct branch outcome.
+ * @param bp_history Wrapping pointer to TageBranchInfo (to allow
+ * storing derived class prediction information in the
+ * base class).
+ * @post bp_history points to valid memory.
+ */
+ virtual void squash(ThreadID tid, bool taken, void *bp_history);
+
+ /**
+ * Update TAGE for conditional branches.
+ * @param branch_pc The unshifted branch PC.
+ * @param taken Actual branch outcome.
+ * @param bi Pointer to information on the prediction
+ * recorded at prediction time.
+ * @nrand Random int number from 0 to 3
+ */
+ virtual void condBranchUpdate(
+ Addr branch_pc, bool taken, TageBranchInfo* bi, int nrand);
+
+ /**
+ * TAGE prediction called from TAGE::predict
+ * @param tid The thread ID to select the global
+ * histories to use.
+ * @param branch_pc The unshifted branch PC.
+ * @param cond_branch True if the branch is conditional.
+ * @param bi Pointer to the TageBranchInfo
+ */
+ bool tagePredict(
+ ThreadID tid, Addr branch_pc, bool cond_branch, TageBranchInfo*
bi);
+
+ const unsigned logRatioBiModalHystEntries;
+ const unsigned nHistoryTables;
+ const unsigned tagTableCounterBits;
+ const unsigned tagTableUBits;
+ const unsigned histBufferSize;
+ const unsigned minHist;
+ const unsigned maxHist;
+ const unsigned pathHistBits;
+
+ const std::vector<unsigned> tagTableTagWidths;
+ const std::vector<int> logTagTableSizes;
+
+ std::vector<bool> btablePrediction;
+ std::vector<bool> btableHysteresis;
+ TageEntry **gtable;
+
+ // Keep per-thread histories to
+ // support SMT.
+ struct ThreadHistory {
+ // Speculative path history
+ // (LSB of branch address)
+ int pathHist;
+
+ // Speculative branch direction
+ // history (circular buffer)
+ // @TODO Convert to std::vector<bool>
+ uint8_t *globalHistory;
+
+ // Pointer to most recent branch outcome
+ uint8_t* gHist;
+
+ // Index to most recent branch outcome
+ int ptGhist;
+
+ // Speculative folded histories.
+ FoldedHistory *computeIndices;
+ FoldedHistory *computeTags[2];
+ };
+
+ std::vector<ThreadHistory> threadHistory;
+
+ int *histLengths;
+ int *tableIndices;
+ int *tableTags;
+
+ int8_t useAltPredForNewlyAllocated;
+ uint64_t tCounter;
+ uint64_t logUResetPeriod;
+ unsigned useAltOnNaBits;
+};
+
+#endif // __CPU_PRED_TAGE

--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/14435
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I8323adbfd5c9a45db23cfff234218280e639f9ed
Gerrit-Change-Number: 14435
Gerrit-PatchSet: 3
Gerrit-Owner: Pau Cabre <***@metempsy.com>
Gerrit-Reviewer: Andreas Sandberg <***@arm.com>
Gerrit-Reviewer: Gabe Black <***@google.com>
Gerrit-Reviewer: Jason Lowe-Power <***@lowepower.com>
Gerrit-Reviewer: Pau Cabre <***@metempsy.com>
Gerrit-Reviewer: Sudhanshu Jha <***@arm.com>
Gerrit-MessageType: merged

Continue reading on narkive:
Loading...