Discussion:
[gem5-dev] Change in gem5/gem5[master]: cpu: Made LTAGE parameters configurable
(too old to reply)
Pau Cabre (Gerrit)
2018-11-18 23:37:08 UTC
Permalink
Pau Cabre has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/14417


Change subject: cpu: Made LTAGE parameters configurable
......................................................................

cpu: Made LTAGE parameters configurable

This includes TAGE tag sizes, TAGE table sizes, U counters reset period,
loop predictor associativity, path history size, the USE_ALT_ON_NA size
and the WITHLOOP size

Change-Id: I935823f0a5794f5d55b744263798897a813dc1bd
Signed-off-by: Pau Cabre <***@metempsy.com>
---
M src/cpu/pred/BranchPredictor.py
M src/cpu/pred/ltage.cc
M src/cpu/pred/ltage.hh
3 files changed, 86 insertions(+), 66 deletions(-)



diff --git a/src/cpu/pred/BranchPredictor.py
b/src/cpu/pred/BranchPredictor.py
index aa6cd4a..9d83abb 100644
--- a/src/cpu/pred/BranchPredictor.py
+++ b/src/cpu/pred/BranchPredictor.py
@@ -92,11 +92,9 @@
cxx_class = 'LTAGE'
cxx_header = "cpu/pred/ltage.hh"

- logSizeBiMP = Param.Unsigned(14, "Log size of Bimodal predictor in
bits")
logRatioBiModalHystEntries = Param.Unsigned(2,
"Log num of prediction entries for a shared hysteresis bit " \
"for the Bimodal")
- logSizeTagTables = Param.Unsigned(11, "Log size of tag table in LTAGE")
logSizeLoopPred = Param.Unsigned(8, "Log size of the loop predictor")
nHistoryTables = Param.Unsigned(12, "Number of history tables")
tagTableCounterBits = Param.Unsigned(3, "Number of tag table counter
bits")
@@ -105,11 +103,22 @@
"A large number to track all branch histories(2MEntries
default)")
minHist = Param.Unsigned(4, "Minimum history size of LTAGE")
maxHist = Param.Unsigned(640, "Maximum history size of LTAGE")
- minTagWidth = Param.Unsigned(7, "Minimum tag size in tag tables")
+ pathHistBits = Param.Unsigned(16, "Path history size")
+ tagTableTagWidths = VectorParam.Unsigned(
+ [0, 7, 7, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15],
+ "Tag size in TAGE tag tables")
+ logTagTableSizes = VectorParam.Int(
+ [14, 10, 10, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9],
+ "Log2 of TAGE table sizes")
+ logUResetPeriod = Param.Unsigned(19,
+ "Log period in number of branches to reset TAGE useful counters")
+ useAltOnNaBits = Param.Unsigned(4, "Size of the USE_ALT_ON_NA counter")
+ withLoopBits = Param.Unsigned(7, "Size of the WITHLOOP counter")

loopTableAgeBits = Param.Unsigned(8, "Number of age bits per loop
entry")
loopTableConfidenceBits = Param.Unsigned(2,
"Number of confidence bits per loop entry")
loopTableTagBits = Param.Unsigned(14, "Number of tag bits per loop
entry")
loopTableIterBits = Param.Unsigned(14, "Nuber of iteration bits per
loop")
+ logLoopTableAssoc = Param.Unsigned(2, "Log loop predictor
associativity")

diff --git a/src/cpu/pred/ltage.cc b/src/cpu/pred/ltage.cc
index b049d4f..d6cc087 100644
--- a/src/cpu/pred/ltage.cc
+++ b/src/cpu/pred/ltage.cc
@@ -49,9 +49,7 @@

LTAGE::LTAGE(const LTAGEParams *params)
: BPredUnit(params),
- logSizeBiMP(params->logSizeBiMP),
logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
- logSizeTagTables(params->logSizeTagTables),
logSizeLoopPred(params->logSizeLoopPred),
nHistoryTables(params->nHistoryTables),
tagTableCounterBits(params->tagTableCounterBits),
@@ -59,15 +57,21 @@
histBufferSize(params->histBufferSize),
minHist(params->minHist),
maxHist(params->maxHist),
- minTagWidth(params->minTagWidth),
+ pathHistBits(params->pathHistBits),
loopTableAgeBits(params->loopTableAgeBits),
loopTableConfidenceBits(params->loopTableConfidenceBits),
loopTableTagBits(params->loopTableTagBits),
loopTableIterBits(params->loopTableIterBits),
+ logLoopTableAssoc(params->logLoopTableAssoc),
confidenceThreshold((1 << loopTableConfidenceBits) - 1),
loopTagMask((1 << loopTableTagBits) - 1),
loopNumIterMask((1 << loopTableIterBits) - 1),
- threadHistory(params->numThreads)
+ tagTableTagWidths(params->tagTableTagWidths),
+ logTagTableSizes(params->logTagTableSizes),
+ threadHistory(params->numThreads),
+ logUResetPeriod(params->logUResetPeriod),
+ useAltOnNaBits(params->useAltOnNaBits),
+ withLoopBits(params->withLoopBits)
{
// Current method for periodically resetting the u counter bits only
// works for 1 or 2 bits
@@ -79,10 +83,18 @@
assert(loopTableTagBits <= 16);
assert(loopTableIterBits <= 16);

+ assert(logSizeLoopPred >= logLoopTableAssoc);
+
+ // we use int type for the path history, so it cannot be more than
+ // its size
+ assert(pathHistBits <= (sizeof(int)*8));
+
+ // initialize the counter to half of the period
+ assert(logUResetPeriod != 0);
+ tCounter = ULL(1) << (logUResetPeriod - 1);
+
assert(params->histBufferSize > params->maxHist * 2);
useAltPredForNewlyAllocated = 0;
- logTick = 19;
- tCounter = ULL(1) << (logTick - 1);

for (auto& history : threadHistory) {
history.pathHist = 0;
@@ -103,27 +115,12 @@
+ 0.5);
}

- tagWidths[1] = minTagWidth;
- tagWidths[2] = minTagWidth;
- tagWidths[3] = minTagWidth + 1;
- tagWidths[4] = minTagWidth + 1;
- tagWidths[5] = minTagWidth + 2;
- tagWidths[6] = minTagWidth + 3;
- tagWidths[7] = minTagWidth + 4;
- tagWidths[8] = minTagWidth + 5;
- tagWidths[9] = minTagWidth + 5;
- tagWidths[10] = minTagWidth + 6;
- tagWidths[11] = minTagWidth + 7;
- tagWidths[12] = minTagWidth + 8;
+ assert(tagTableTagWidths.size() == (nHistoryTables+1));
+ assert(logTagTableSizes.size() == (nHistoryTables+1));

- for (int i = 1; i <= 2; i++)
- tagTableSizes[i] = logSizeTagTables - 1;
- for (int i = 3; i <= 6; i++)
- tagTableSizes[i] = logSizeTagTables;
- for (int i = 7; i <= 10; i++)
- tagTableSizes[i] = logSizeTagTables - 1;
- for (int i = 11; i <= 12; i++)
- tagTableSizes[i] = logSizeTagTables - 2;
+ // First entry is for the Bimodal table and it is untagged in this
+ // implementation
+ assert(tagTableTagWidths[0] == 0);

for (auto& history : threadHistory) {
history.computeIndices = new FoldedHistory[nHistoryTables+1];
@@ -131,17 +128,18 @@
history.computeTags[1] = new FoldedHistory[nHistoryTables+1];

for (int i = 1; i <= nHistoryTables; i++) {
- history.computeIndices[i].init(histLengths[i],
(tagTableSizes[i]));
+ history.computeIndices[i].init(
+ histLengths[i], (logTagTableSizes[i]));
history.computeTags[0][i].init(
- history.computeIndices[i].origLength, tagWidths[i]);
+ history.computeIndices[i].origLength,
tagTableTagWidths[i]);
history.computeTags[1][i].init(
- history.computeIndices[i].origLength, tagWidths[i] - 1);
+ history.computeIndices[i].origLength,
tagTableTagWidths[i]-1);
DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
- histLengths[i], tagTableSizes[i], tagWidths[i]);
+ histLengths[i], logTagTableSizes[i],
tagTableTagWidths[i]);
}
}

- const uint64_t bimodalTableSize = ULL(1) << logSizeBiMP;
+ const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0];
btablePrediction.resize(bimodalTableSize, false);
btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
true);
@@ -149,7 +147,7 @@
ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
gtable = new TageEntry*[nHistoryTables + 1];
for (int i = 1; i <= nHistoryTables; i++) {
- gtable[i] = new TageEntry[1<<(tagTableSizes[i])];
+ gtable[i] = new TageEntry[1<<(logTagTableSizes[i])];
}

tableIndices = new int [nHistoryTables+1];
@@ -161,14 +159,21 @@
int
LTAGE::bindex(Addr pc_in) const
{
- return ((pc_in >> instShiftAmt) & ((ULL(1) << (logSizeBiMP)) - 1));
+ return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) -
1));
}

int
LTAGE::lindex(Addr pc_in) const
{
- return (((pc_in >> instShiftAmt) &
- ((ULL(1) << (logSizeLoopPred - 2)) - 1)) << 2);
+ // The loop table is implemented as a linear table
+ // If associativity is N (N being 1 << logLoopTableAssoc),
+ // the first N entries are for set 0, the next N entries are for set 1,
+ // and so on.
+ // Thus, this function calculates the set and then it gets left shifted
+ // by logLoopTableAssoc in order to return the index of the first of
the
+ // N entries of the set
+ Addr mask = (ULL(1) << (logSizeLoopPred - logLoopTableAssoc)) - 1;
+ return (((pc_in >> instShiftAmt) & mask) << logLoopTableAssoc);
}

int
@@ -177,13 +182,13 @@
int A1, A2;

A = A & ((ULL(1) << size) - 1);
- A1 = (A & ((ULL(1) << tagTableSizes[bank]) - 1));
- A2 = (A >> tagTableSizes[bank]);
- A2 = ((A2 << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
- + (A2 >> (tagTableSizes[bank] - bank));
+ A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1));
+ A2 = (A >> logTagTableSizes[bank]);
+ A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
+ + (A2 >> (logTagTableSizes[bank] - bank));
A = A1 ^ A2;
- A = ((A << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
- + (A >> (tagTableSizes[bank] - bank));
+ A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
+ + (A >> (logTagTableSizes[bank] - bank));
return (A);
}

@@ -193,14 +198,16 @@
LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
{
int index;
- int hlen = (histLengths[bank] > 16) ? 16 : histLengths[bank];
+ int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits :
+ histLengths[bank];
+ const Addr shiftedPc = pc >> instShiftAmt;
index =
- (pc >> instShiftAmt) ^
- ((pc >> instShiftAmt) >> ((int) abs(tagTableSizes[bank] - bank) +
1)) ^
+ shiftedPc ^
+ (shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^
threadHistory[tid].computeIndices[bank].comp ^
F(threadHistory[tid].pathHist, hlen, bank);

- return (index & ((ULL(1) << (tagTableSizes[bank])) - 1));
+ return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1));
}


@@ -212,7 +219,7 @@
threadHistory[tid].computeTags[0][bank].comp ^
(threadHistory[tid].computeTags[1][bank].comp << 1);

- return (tag & ((ULL(1) << tagWidths[bank]) - 1));
+ return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1));
}


@@ -280,9 +287,10 @@
bi->loopHit = -1;
bi->loopPredValid = false;
bi->loopIndex = lindex(pc);
- bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2)) &
loopTagMask;
+ unsigned pcShift = instShiftAmt + logSizeLoopPred - logLoopTableAssoc;
+ bi->loopTag = ((pc) >> pcShift) & loopTagMask;

- for (int i = 0; i < 4; i++) {
+ for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
bi->loopHit = i;
bi->loopPredValid =
@@ -379,8 +387,8 @@
} else if (taken) {
//try to allocate an entry on taken branch
int nrand = random_mt.random<int>();
- for (int i = 0; i < 4; i++) {
- int loop_hit = (nrand + i) & 3;
+ for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
+ int loop_hit = (nrand + i) & ((1 << logLoopTableAssoc) - 1);
idx = bi->loopIndex + loop_hit;
if (ltable[idx].age == 0) {
DPRINTF(LTage, "Allocating loop pred entry for
branch %lx\n",
@@ -552,7 +560,9 @@

if (bi->loopPredValid) {
if (bi->tagePred != bi->loopPred) {
- ctrUpdate(loopUseCounter, (bi->loopPred== taken), 7);
+ ctrUpdate(loopUseCounter,
+ (bi->loopPred == taken),
+ withLoopBits);
}
}

@@ -575,7 +585,7 @@
// allocate new entry even if the overall prediction was
false
if (longest_match_pred != bi->altTaken) {
ctrUpdate(useAltPredForNewlyAllocated,
- bi->altTaken == taken, 4);
+ bi->altTaken == taken, useAltOnNaBits);
}
}
}
@@ -617,11 +627,11 @@
}
//periodic reset of u: reset is not complete but bit by bit
tCounter++;
- if ((tCounter & ((ULL(1) << logTick) - 1)) == 0) {
+ if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) {
// reset least significant bit
// most significant bit becomes least significant bit
for (int i = 1; i <= nHistoryTables; i++) {
- for (int j = 0; j < (ULL(1) << tagTableSizes[i]); j++) {
+ for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) {
gtable[i][j].u = gtable[i][j].u >> 1;
}
}
@@ -674,7 +684,7 @@
//update user history
updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
tHist.pathHist = (tHist.pathHist << 1) + pathbit;
- tHist.pathHist = (tHist.pathHist & ((ULL(1) << 16) - 1));
+ tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1));

bi->ptGhist = tHist.ptGhist;
bi->pathHist = tHist.pathHist;
diff --git a/src/cpu/pred/ltage.hh b/src/cpu/pred/ltage.hh
index b765a46..6929ad0 100644
--- a/src/cpu/pred/ltage.hh
+++ b/src/cpu/pred/ltage.hh
@@ -354,9 +354,7 @@
*/
void specLoopUpdate(Addr pc, bool taken, BranchInfo* bi);

- const unsigned logSizeBiMP;
const unsigned logRatioBiModalHystEntries;
- const unsigned logSizeTagTables;
const unsigned logSizeLoopPred;
const unsigned nHistoryTables;
const unsigned tagTableCounterBits;
@@ -364,16 +362,19 @@
const unsigned histBufferSize;
const unsigned minHist;
const unsigned maxHist;
- const unsigned minTagWidth;
+ const unsigned pathHistBits;
const unsigned loopTableAgeBits;
const unsigned loopTableConfidenceBits;
const unsigned loopTableTagBits;
const unsigned loopTableIterBits;
-
+ const unsigned logLoopTableAssoc;
const uint8_t confidenceThreshold;
const uint16_t loopTagMask;
const uint16_t loopNumIterMask;

+ const std::vector<unsigned> tagTableTagWidths;
+ const std::vector<int> logTagTableSizes;
+
std::vector<bool> btablePrediction;
std::vector<bool> btableHysteresis;
TageEntry **gtable;
@@ -404,16 +405,16 @@

std::vector<ThreadHistory> threadHistory;

- int tagWidths[15];
- int tagTableSizes[15];
int *histLengths;
int *tableIndices;
int *tableTags;

int8_t loopUseCounter;
int8_t useAltPredForNewlyAllocated;
- int tCounter;
- int logTick;
+ uint64_t tCounter;
+ uint64_t logUResetPeriod;
+ unsigned useAltOnNaBits;
+ unsigned withLoopBits;
};

#endif // __CPU_PRED_LTAGE
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/14417
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I935823f0a5794f5d55b744263798897a813dc1bd
Gerrit-Change-Number: 14417
Gerrit-PatchSet: 1
Gerrit-Owner: Pau Cabre <***@metempsy.com>
Gerrit-MessageType: newchange
Pau Cabre (Gerrit)
2018-11-22 11:46:57 UTC
Permalink
Pau Cabre has submitted this change and it was merged. (
https://gem5-review.googlesource.com/c/public/gem5/+/14417 )

Change subject: cpu: Made LTAGE parameters configurable
......................................................................

cpu: Made LTAGE parameters configurable

This includes TAGE tag sizes, TAGE table sizes, U counters reset period,
loop predictor associativity, path history size, the USE_ALT_ON_NA size
and the WITHLOOP size

Change-Id: I935823f0a5794f5d55b744263798897a813dc1bd
Signed-off-by: Pau Cabre <***@metempsy.com>
Reviewed-on: https://gem5-review.googlesource.com/c/14417
Reviewed-by: Jason Lowe-Power <***@lowepower.com>
Maintainer: Jason Lowe-Power <***@lowepower.com>
---
M src/cpu/pred/BranchPredictor.py
M src/cpu/pred/ltage.cc
M src/cpu/pred/ltage.hh
3 files changed, 86 insertions(+), 66 deletions(-)

Approvals:
Jason Lowe-Power: Looks good to me, approved; Looks good to me, approved



diff --git a/src/cpu/pred/BranchPredictor.py
b/src/cpu/pred/BranchPredictor.py
index aa6cd4a..9d83abb 100644
--- a/src/cpu/pred/BranchPredictor.py
+++ b/src/cpu/pred/BranchPredictor.py
@@ -92,11 +92,9 @@
cxx_class = 'LTAGE'
cxx_header = "cpu/pred/ltage.hh"

- logSizeBiMP = Param.Unsigned(14, "Log size of Bimodal predictor in
bits")
logRatioBiModalHystEntries = Param.Unsigned(2,
"Log num of prediction entries for a shared hysteresis bit " \
"for the Bimodal")
- logSizeTagTables = Param.Unsigned(11, "Log size of tag table in LTAGE")
logSizeLoopPred = Param.Unsigned(8, "Log size of the loop predictor")
nHistoryTables = Param.Unsigned(12, "Number of history tables")
tagTableCounterBits = Param.Unsigned(3, "Number of tag table counter
bits")
@@ -105,11 +103,22 @@
"A large number to track all branch histories(2MEntries
default)")
minHist = Param.Unsigned(4, "Minimum history size of LTAGE")
maxHist = Param.Unsigned(640, "Maximum history size of LTAGE")
- minTagWidth = Param.Unsigned(7, "Minimum tag size in tag tables")
+ pathHistBits = Param.Unsigned(16, "Path history size")
+ tagTableTagWidths = VectorParam.Unsigned(
+ [0, 7, 7, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15],
+ "Tag size in TAGE tag tables")
+ logTagTableSizes = VectorParam.Int(
+ [14, 10, 10, 11, 11, 11, 11, 10, 10, 10, 10, 9, 9],
+ "Log2 of TAGE table sizes")
+ logUResetPeriod = Param.Unsigned(19,
+ "Log period in number of branches to reset TAGE useful counters")
+ useAltOnNaBits = Param.Unsigned(4, "Size of the USE_ALT_ON_NA counter")
+ withLoopBits = Param.Unsigned(7, "Size of the WITHLOOP counter")

loopTableAgeBits = Param.Unsigned(8, "Number of age bits per loop
entry")
loopTableConfidenceBits = Param.Unsigned(2,
"Number of confidence bits per loop entry")
loopTableTagBits = Param.Unsigned(14, "Number of tag bits per loop
entry")
loopTableIterBits = Param.Unsigned(14, "Nuber of iteration bits per
loop")
+ logLoopTableAssoc = Param.Unsigned(2, "Log loop predictor
associativity")

diff --git a/src/cpu/pred/ltage.cc b/src/cpu/pred/ltage.cc
index b049d4f..d6cc087 100644
--- a/src/cpu/pred/ltage.cc
+++ b/src/cpu/pred/ltage.cc
@@ -49,9 +49,7 @@

LTAGE::LTAGE(const LTAGEParams *params)
: BPredUnit(params),
- logSizeBiMP(params->logSizeBiMP),
logRatioBiModalHystEntries(params->logRatioBiModalHystEntries),
- logSizeTagTables(params->logSizeTagTables),
logSizeLoopPred(params->logSizeLoopPred),
nHistoryTables(params->nHistoryTables),
tagTableCounterBits(params->tagTableCounterBits),
@@ -59,15 +57,21 @@
histBufferSize(params->histBufferSize),
minHist(params->minHist),
maxHist(params->maxHist),
- minTagWidth(params->minTagWidth),
+ pathHistBits(params->pathHistBits),
loopTableAgeBits(params->loopTableAgeBits),
loopTableConfidenceBits(params->loopTableConfidenceBits),
loopTableTagBits(params->loopTableTagBits),
loopTableIterBits(params->loopTableIterBits),
+ logLoopTableAssoc(params->logLoopTableAssoc),
confidenceThreshold((1 << loopTableConfidenceBits) - 1),
loopTagMask((1 << loopTableTagBits) - 1),
loopNumIterMask((1 << loopTableIterBits) - 1),
- threadHistory(params->numThreads)
+ tagTableTagWidths(params->tagTableTagWidths),
+ logTagTableSizes(params->logTagTableSizes),
+ threadHistory(params->numThreads),
+ logUResetPeriod(params->logUResetPeriod),
+ useAltOnNaBits(params->useAltOnNaBits),
+ withLoopBits(params->withLoopBits)
{
// Current method for periodically resetting the u counter bits only
// works for 1 or 2 bits
@@ -79,10 +83,18 @@
assert(loopTableTagBits <= 16);
assert(loopTableIterBits <= 16);

+ assert(logSizeLoopPred >= logLoopTableAssoc);
+
+ // we use int type for the path history, so it cannot be more than
+ // its size
+ assert(pathHistBits <= (sizeof(int)*8));
+
+ // initialize the counter to half of the period
+ assert(logUResetPeriod != 0);
+ tCounter = ULL(1) << (logUResetPeriod - 1);
+
assert(params->histBufferSize > params->maxHist * 2);
useAltPredForNewlyAllocated = 0;
- logTick = 19;
- tCounter = ULL(1) << (logTick - 1);

for (auto& history : threadHistory) {
history.pathHist = 0;
@@ -103,27 +115,12 @@
+ 0.5);
}

- tagWidths[1] = minTagWidth;
- tagWidths[2] = minTagWidth;
- tagWidths[3] = minTagWidth + 1;
- tagWidths[4] = minTagWidth + 1;
- tagWidths[5] = minTagWidth + 2;
- tagWidths[6] = minTagWidth + 3;
- tagWidths[7] = minTagWidth + 4;
- tagWidths[8] = minTagWidth + 5;
- tagWidths[9] = minTagWidth + 5;
- tagWidths[10] = minTagWidth + 6;
- tagWidths[11] = minTagWidth + 7;
- tagWidths[12] = minTagWidth + 8;
+ assert(tagTableTagWidths.size() == (nHistoryTables+1));
+ assert(logTagTableSizes.size() == (nHistoryTables+1));

- for (int i = 1; i <= 2; i++)
- tagTableSizes[i] = logSizeTagTables - 1;
- for (int i = 3; i <= 6; i++)
- tagTableSizes[i] = logSizeTagTables;
- for (int i = 7; i <= 10; i++)
- tagTableSizes[i] = logSizeTagTables - 1;
- for (int i = 11; i <= 12; i++)
- tagTableSizes[i] = logSizeTagTables - 2;
+ // First entry is for the Bimodal table and it is untagged in this
+ // implementation
+ assert(tagTableTagWidths[0] == 0);

for (auto& history : threadHistory) {
history.computeIndices = new FoldedHistory[nHistoryTables+1];
@@ -131,17 +128,18 @@
history.computeTags[1] = new FoldedHistory[nHistoryTables+1];

for (int i = 1; i <= nHistoryTables; i++) {
- history.computeIndices[i].init(histLengths[i],
(tagTableSizes[i]));
+ history.computeIndices[i].init(
+ histLengths[i], (logTagTableSizes[i]));
history.computeTags[0][i].init(
- history.computeIndices[i].origLength, tagWidths[i]);
+ history.computeIndices[i].origLength,
tagTableTagWidths[i]);
history.computeTags[1][i].init(
- history.computeIndices[i].origLength, tagWidths[i] - 1);
+ history.computeIndices[i].origLength,
tagTableTagWidths[i]-1);
DPRINTF(LTage, "HistLength:%d, TTSize:%d, TTTWidth:%d\n",
- histLengths[i], tagTableSizes[i], tagWidths[i]);
+ histLengths[i], logTagTableSizes[i],
tagTableTagWidths[i]);
}
}

- const uint64_t bimodalTableSize = ULL(1) << logSizeBiMP;
+ const uint64_t bimodalTableSize = ULL(1) << logTagTableSizes[0];
btablePrediction.resize(bimodalTableSize, false);
btableHysteresis.resize(bimodalTableSize >> logRatioBiModalHystEntries,
true);
@@ -149,7 +147,7 @@
ltable = new LoopEntry[ULL(1) << logSizeLoopPred];
gtable = new TageEntry*[nHistoryTables + 1];
for (int i = 1; i <= nHistoryTables; i++) {
- gtable[i] = new TageEntry[1<<(tagTableSizes[i])];
+ gtable[i] = new TageEntry[1<<(logTagTableSizes[i])];
}

tableIndices = new int [nHistoryTables+1];
@@ -161,14 +159,21 @@
int
LTAGE::bindex(Addr pc_in) const
{
- return ((pc_in >> instShiftAmt) & ((ULL(1) << (logSizeBiMP)) - 1));
+ return ((pc_in >> instShiftAmt) & ((ULL(1) << (logTagTableSizes[0])) -
1));
}

int
LTAGE::lindex(Addr pc_in) const
{
- return (((pc_in >> instShiftAmt) &
- ((ULL(1) << (logSizeLoopPred - 2)) - 1)) << 2);
+ // The loop table is implemented as a linear table
+ // If associativity is N (N being 1 << logLoopTableAssoc),
+ // the first N entries are for set 0, the next N entries are for set 1,
+ // and so on.
+ // Thus, this function calculates the set and then it gets left shifted
+ // by logLoopTableAssoc in order to return the index of the first of
the
+ // N entries of the set
+ Addr mask = (ULL(1) << (logSizeLoopPred - logLoopTableAssoc)) - 1;
+ return (((pc_in >> instShiftAmt) & mask) << logLoopTableAssoc);
}

int
@@ -177,13 +182,13 @@
int A1, A2;

A = A & ((ULL(1) << size) - 1);
- A1 = (A & ((ULL(1) << tagTableSizes[bank]) - 1));
- A2 = (A >> tagTableSizes[bank]);
- A2 = ((A2 << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
- + (A2 >> (tagTableSizes[bank] - bank));
+ A1 = (A & ((ULL(1) << logTagTableSizes[bank]) - 1));
+ A2 = (A >> logTagTableSizes[bank]);
+ A2 = ((A2 << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
+ + (A2 >> (logTagTableSizes[bank] - bank));
A = A1 ^ A2;
- A = ((A << bank) & ((ULL(1) << tagTableSizes[bank]) - 1))
- + (A >> (tagTableSizes[bank] - bank));
+ A = ((A << bank) & ((ULL(1) << logTagTableSizes[bank]) - 1))
+ + (A >> (logTagTableSizes[bank] - bank));
return (A);
}

@@ -193,14 +198,16 @@
LTAGE::gindex(ThreadID tid, Addr pc, int bank) const
{
int index;
- int hlen = (histLengths[bank] > 16) ? 16 : histLengths[bank];
+ int hlen = (histLengths[bank] > pathHistBits) ? pathHistBits :
+ histLengths[bank];
+ const Addr shiftedPc = pc >> instShiftAmt;
index =
- (pc >> instShiftAmt) ^
- ((pc >> instShiftAmt) >> ((int) abs(tagTableSizes[bank] - bank) +
1)) ^
+ shiftedPc ^
+ (shiftedPc >> ((int) abs(logTagTableSizes[bank] - bank) + 1)) ^
threadHistory[tid].computeIndices[bank].comp ^
F(threadHistory[tid].pathHist, hlen, bank);

- return (index & ((ULL(1) << (tagTableSizes[bank])) - 1));
+ return (index & ((ULL(1) << (logTagTableSizes[bank])) - 1));
}


@@ -212,7 +219,7 @@
threadHistory[tid].computeTags[0][bank].comp ^
(threadHistory[tid].computeTags[1][bank].comp << 1);

- return (tag & ((ULL(1) << tagWidths[bank]) - 1));
+ return (tag & ((ULL(1) << tagTableTagWidths[bank]) - 1));
}


@@ -280,9 +287,10 @@
bi->loopHit = -1;
bi->loopPredValid = false;
bi->loopIndex = lindex(pc);
- bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2)) &
loopTagMask;
+ unsigned pcShift = instShiftAmt + logSizeLoopPred - logLoopTableAssoc;
+ bi->loopTag = ((pc) >> pcShift) & loopTagMask;

- for (int i = 0; i < 4; i++) {
+ for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
bi->loopHit = i;
bi->loopPredValid =
@@ -379,8 +387,8 @@
} else if (taken) {
//try to allocate an entry on taken branch
int nrand = random_mt.random<int>();
- for (int i = 0; i < 4; i++) {
- int loop_hit = (nrand + i) & 3;
+ for (int i = 0; i < (1 << logLoopTableAssoc); i++) {
+ int loop_hit = (nrand + i) & ((1 << logLoopTableAssoc) - 1);
idx = bi->loopIndex + loop_hit;
if (ltable[idx].age == 0) {
DPRINTF(LTage, "Allocating loop pred entry for
branch %lx\n",
@@ -552,7 +560,9 @@

if (bi->loopPredValid) {
if (bi->tagePred != bi->loopPred) {
- ctrUpdate(loopUseCounter, (bi->loopPred== taken), 7);
+ ctrUpdate(loopUseCounter,
+ (bi->loopPred == taken),
+ withLoopBits);
}
}

@@ -575,7 +585,7 @@
// allocate new entry even if the overall prediction was
false
if (longest_match_pred != bi->altTaken) {
ctrUpdate(useAltPredForNewlyAllocated,
- bi->altTaken == taken, 4);
+ bi->altTaken == taken, useAltOnNaBits);
}
}
}
@@ -617,11 +627,11 @@
}
//periodic reset of u: reset is not complete but bit by bit
tCounter++;
- if ((tCounter & ((ULL(1) << logTick) - 1)) == 0) {
+ if ((tCounter & ((ULL(1) << logUResetPeriod) - 1)) == 0) {
// reset least significant bit
// most significant bit becomes least significant bit
for (int i = 1; i <= nHistoryTables; i++) {
- for (int j = 0; j < (ULL(1) << tagTableSizes[i]); j++) {
+ for (int j = 0; j < (ULL(1) << logTagTableSizes[i]); j++) {
gtable[i][j].u = gtable[i][j].u >> 1;
}
}
@@ -674,7 +684,7 @@
//update user history
updateGHist(tHist.gHist, taken, tHist.globalHistory, tHist.ptGhist);
tHist.pathHist = (tHist.pathHist << 1) + pathbit;
- tHist.pathHist = (tHist.pathHist & ((ULL(1) << 16) - 1));
+ tHist.pathHist = (tHist.pathHist & ((ULL(1) << pathHistBits) - 1));

bi->ptGhist = tHist.ptGhist;
bi->pathHist = tHist.pathHist;
diff --git a/src/cpu/pred/ltage.hh b/src/cpu/pred/ltage.hh
index 68aef1c..8b417d4 100644
--- a/src/cpu/pred/ltage.hh
+++ b/src/cpu/pred/ltage.hh
@@ -354,9 +354,7 @@
*/
void specLoopUpdate(Addr pc, bool taken, BranchInfo* bi);

- const unsigned logSizeBiMP;
const unsigned logRatioBiModalHystEntries;
- const unsigned logSizeTagTables;
const unsigned logSizeLoopPred;
const unsigned nHistoryTables;
const unsigned tagTableCounterBits;
@@ -364,16 +362,19 @@
const unsigned histBufferSize;
const unsigned minHist;
const unsigned maxHist;
- const unsigned minTagWidth;
+ const unsigned pathHistBits;
const unsigned loopTableAgeBits;
const unsigned loopTableConfidenceBits;
const unsigned loopTableTagBits;
const unsigned loopTableIterBits;
-
+ const unsigned logLoopTableAssoc;
const uint8_t confidenceThreshold;
const uint16_t loopTagMask;
const uint16_t loopNumIterMask;

+ const std::vector<unsigned> tagTableTagWidths;
+ const std::vector<int> logTagTableSizes;
+
std::vector<bool> btablePrediction;
std::vector<bool> btableHysteresis;
TageEntry **gtable;
@@ -404,16 +405,16 @@

std::vector<ThreadHistory> threadHistory;

- int tagWidths[15];
- int tagTableSizes[15];
int *histLengths;
int *tableIndices;
int *tableTags;

int8_t loopUseCounter;
int8_t useAltPredForNewlyAllocated;
- int tCounter;
- int logTick;
+ uint64_t tCounter;
+ uint64_t logUResetPeriod;
+ unsigned useAltOnNaBits;
+ unsigned withLoopBits;
};

#endif // __CPU_PRED_LTAGE
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/14417
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I935823f0a5794f5d55b744263798897a813dc1bd
Gerrit-Change-Number: 14417
Gerrit-PatchSet: 3
Gerrit-Owner: Pau Cabre <***@metempsy.com>
Gerrit-Reviewer: Jason Lowe-Power <***@lowepower.com>
Gerrit-Reviewer: Pau Cabre <***@metempsy.com>
Gerrit-MessageType: merged
Continue reading on narkive:
Loading...