Discussion:
[gem5-dev] Change in gem5/gem5[master]: cpu: Fixes on the loop predictor part of LTAGE
Pau Cabre (Gerrit)
2018-11-11 22:44:52 UTC
Permalink
Pau Cabre has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/14216


Change subject: cpu: Fixes on the loop predictor part of LTAGE
......................................................................

cpu: Fixes on the loop predictor part of LTAGE

Fixed the following fields of the loop predictor entries as described on
the LTAGE paper:
- Age counter (it was 3 bits and it should be 8 bits)
- Tag (it was 16 bits and it should be 14 bits). Also some times it used
int variables and some times uint16_t, leading to wrong behaviour
- Confidence counter (it was 2 bits ins some parts of the code and 3 bits
in some other parts. It should be 2 bits)
- Iteration counters (they were 16 bits and they should be 14 bits)
All the new sizes are now configurable

Change-Id: I8884c7454c1e510b65160eb4d5749d3259d34096
Signed-off-by: Pau Cabre <***@metempsy.com>
---
M src/cpu/pred/BranchPredictor.py
M src/cpu/pred/ltage.cc
M src/cpu/pred/ltage.hh
3 files changed, 61 insertions(+), 11 deletions(-)



diff --git a/src/cpu/pred/BranchPredictor.py
b/src/cpu/pred/BranchPredictor.py
index 1b400c2..0b6aba4 100644
--- a/src/cpu/pred/BranchPredictor.py
+++ b/src/cpu/pred/BranchPredictor.py
@@ -102,3 +102,9 @@
maxHist = Param.Unsigned(640, "Maximum history size of LTAGE")
minTagWidth = Param.Unsigned(7, "Minimum tag size in tag tables")

+ loopTableAgeBits = Param.Unsigned(8, "Number of age bits per loop
entry")
+ loopTableConfidenceBits = Param.Unsigned(2,
+ "Number of confidence bits per loop entry")
+ loopTableTagBits = Param.Unsigned(14, "Number of tag bits per loop
entry")
+ loopTableIterBits = Param.Unsigned(14, "Nuber of iteration bits per
loop")
+
diff --git a/src/cpu/pred/ltage.cc b/src/cpu/pred/ltage.cc
index 251fb2e..75ebabc 100644
--- a/src/cpu/pred/ltage.cc
+++ b/src/cpu/pred/ltage.cc
@@ -58,8 +58,20 @@
minHist(params->minHist),
maxHist(params->maxHist),
minTagWidth(params->minTagWidth),
+ loopTableAgeBits(params->loopTableAgeBits),
+ loopTableConfidenceBits(params->loopTableConfidenceBits),
+ loopTableTagBits(params->loopTableTagBits),
+ loopTableIterBits(params->loopTableIterBits),
+ confidenceThreshold((1 << loopTableConfidenceBits) - 1),
+ loopTagMask((1 << loopTableTagBits) - 1),
+ loopNumIterMask((1 << loopTableIterBits) - 1),
threadHistory(params->numThreads)
{
+ // we use uint16_t type for these vales, so they cannot be more than
+ // 16 bits
+ assert(loopTableTagBits <= 16);
+ assert(loopTableIterBits <= 16);
+
assert(params->histBufferSize > params->maxHist * 2);
useAltPredForNewlyAllocated = 0;
logTick = 19;
@@ -204,6 +216,20 @@
}
}

+// Up-down unsigned saturating counter
+void
+LTAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits)
+{
+ assert(nbits <= sizeof(uint8_t) << 3);
+ if (up) {
+ if (ctr < ((1 << nbits) - 1))
+ ctr++;
+ } else {
+ if (ctr)
+ ctr--;
+ }
+}
+
// Bimodal prediction
bool
LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
@@ -239,12 +265,13 @@
bi->loopHit = -1;
bi->loopPredValid = false;
bi->loopIndex = lindex(pc);
- bi->loopTag = ((pc) >> (logSizeLoopPred - 2));
+ bi->loopTag = ((pc) >> (logSizeLoopPred - 2)) & loopTagMask;

for (int i = 0; i < 4; i++) {
if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
bi->loopHit = i;
- bi->loopPredValid = (ltable[bi->loopIndex + i].confidence >=
3);
+ bi->loopPredValid =
+ ltable[bi->loopIndex + i].confidence ==
confidenceThreshold;
bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec;
if (ltable[bi->loopIndex + i].currentIterSpec + 1 ==
ltable[bi->loopIndex + i].numIter) {
@@ -265,7 +292,8 @@
if (taken != ltable[index].dir) {
ltable[index].currentIterSpec = 0;
} else {
- ltable[index].currentIterSpec++;
+ ltable[index].currentIterSpec =
+ (ltable[index].currentIterSpec + 1) & loopNumIterMask;
}
}
}
@@ -286,12 +314,12 @@
return;
} else if (bi->loopPred != bi->tagePred) {
DPRINTF(LTage, "Loop Prediction success:%lx\n",pc);
- if (ltable[idx].age < 7)
- ltable[idx].age++;
+ unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits);
}
}

- ltable[idx].currentIter++;
+ ltable[idx].currentIter =
+ (ltable[idx].currentIter + 1) & loopNumIterMask;
if (ltable[idx].currentIter > ltable[idx].numIter) {
ltable[idx].confidence = 0;
if (ltable[idx].numIter != 0) {
@@ -306,9 +334,8 @@
if (ltable[idx].currentIter == ltable[idx].numIter) {
DPRINTF(LTage, "Loop End predicted successfully:%lx\n",
pc);

- if (ltable[idx].confidence < 7) {
- ltable[idx].confidence++;
- }
+ unsignedCtrUpdate(ltable[idx].confidence, true,
+ loopTableConfidenceBits);
//just do not predict when the loop count is 1 or 2
if (ltable[idx].numIter < 3) {
// free the entry
@@ -346,7 +373,7 @@
ltable[idx].dir = !taken;
ltable[idx].tag = bi->loopTag;
ltable[idx].numIter = 0;
- ltable[idx].age = 7;
+ ltable[idx].age = (1 << loopTableAgeBits) - 1;
ltable[idx].confidence = 0;
ltable[idx].currentIter = 1;
break;
diff --git a/src/cpu/pred/ltage.hh b/src/cpu/pred/ltage.hh
index 60c3467..f605fa6 100644
--- a/src/cpu/pred/ltage.hh
+++ b/src/cpu/pred/ltage.hh
@@ -144,7 +144,7 @@
int altBank;
int altBankIndex;
int bimodalIndex;
- int loopTag;
+ uint16_t loopTag;
uint16_t currentIter;

bool tagePred;
@@ -247,6 +247,15 @@
void ctrUpdate(int8_t & ctr, bool taken, int nbits);

/**
+ * Updates an unsigned counter based on up/down parameter
+ * @param ctr Reference to counter to update.
+ * @param up Boolean indicating if the counter is
incremented/decremented
+ * If true it is incremented, if false it is decremented
+ * @param nbits Counter width.
+ */
+ void unsignedCtrUpdate(uint8_t & ctr, bool taken, unsigned nbits);
+
+ /**
* Get a branch prediction from the bimodal
* predictor.
* @param pc The unshifted branch PC.
@@ -363,6 +372,14 @@
const unsigned minHist;
const unsigned maxHist;
const unsigned minTagWidth;
+ const unsigned loopTableAgeBits;
+ const unsigned loopTableConfidenceBits;
+ const unsigned loopTableTagBits;
+ const unsigned loopTableIterBits;
+
+ const uint8_t confidenceThreshold;
+ const uint16_t loopTagMask;
+ const uint16_t loopNumIterMask;

BimodalEntry *btable;
TageEntry **gtable;
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/14216
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I8884c7454c1e510b65160eb4d5749d3259d34096
Gerrit-Change-Number: 14216
Gerrit-PatchSet: 1
Gerrit-Owner: Pau Cabre <***@metempsy.com>
Gerrit-MessageType: newchange
Pau Cabre (Gerrit)
2018-11-17 17:21:25 UTC
Permalink
Hello Ilias Vougioukas, Sudhanshu Jha, Andreas Sandberg,

I'd like you to reexamine a change. Please visit

https://gem5-review.googlesource.com/c/public/gem5/+/14216

to look at the new patch set (#2).

Change subject: cpu: Fixes on the loop predictor part of LTAGE
......................................................................

cpu: Fixes on the loop predictor part of LTAGE

Fixed the following fields of the loop predictor entries as described on
the LTAGE paper:
- Age counter (it was 3 bits and it should be 8 bits)
- Tag (it was 16 bits and it should be 14 bits). Also some times it used
int variables and some times uint16_t, leading to wrong behaviour
- Confidence counter (it was 2 bits ins some parts of the code and 3 bits
in some other parts. It should be 2 bits)
- Iteration counters (they were 16 bits and they should be 14 bits)
All the new sizes are now configurable

Change-Id: I8884c7454c1e510b65160eb4d5749d3259d34096
Signed-off-by: Pau Cabre <***@metempsy.com>
---
M src/cpu/pred/BranchPredictor.py
M src/cpu/pred/ltage.cc
M src/cpu/pred/ltage.hh
3 files changed, 61 insertions(+), 11 deletions(-)
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/14216
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I8884c7454c1e510b65160eb4d5749d3259d34096
Gerrit-Change-Number: 14216
Gerrit-PatchSet: 2
Gerrit-Owner: Pau Cabre <***@metempsy.com>
Gerrit-Assignee: Ilias Vougioukas <***@arm.com>
Gerrit-Reviewer: Andreas Sandberg <***@arm.com>
Gerrit-Reviewer: Ilias Vougioukas <***@arm.com>
Gerrit-Reviewer: Pau Cabre <***@metempsy.com>
Gerrit-Reviewer: Sudhanshu Jha <***@arm.com>
Gerrit-MessageType: newpatchset
Pau Cabre (Gerrit)
2018-11-21 20:59:48 UTC
Permalink
Hello Ilias Vougioukas, Sudhanshu Jha, Andreas Sandberg,

I'd like you to reexamine a change. Please visit

https://gem5-review.googlesource.com/c/public/gem5/+/14216

to look at the new patch set (#3).

Change subject: cpu: Fixes on the loop predictor part of LTAGE
......................................................................

cpu: Fixes on the loop predictor part of LTAGE

Fixed the following fields of the loop predictor entries as described on
the LTAGE paper:
- Age counter (it was 3 bits and it should be 8 bits)
- Tag (it was 16 bits and it should be 14 bits). Also some times it used
int variables and some times uint16_t, leading to wrong behaviour
- Confidence counter (it was 2 bits ins some parts of the code and 3 bits
in some other parts. It should be 2 bits)
- Iteration counters (they were 16 bits and they should be 14 bits)
All the new sizes are now configurable

Change-Id: I8884c7454c1e510b65160eb4d5749d3259d34096
Signed-off-by: Pau Cabre <***@metempsy.com>
---
M src/cpu/pred/BranchPredictor.py
M src/cpu/pred/ltage.cc
M src/cpu/pred/ltage.hh
3 files changed, 61 insertions(+), 11 deletions(-)
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/14216
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I8884c7454c1e510b65160eb4d5749d3259d34096
Gerrit-Change-Number: 14216
Gerrit-PatchSet: 3
Gerrit-Owner: Pau Cabre <***@metempsy.com>
Gerrit-Assignee: Ilias Vougioukas <***@arm.com>
Gerrit-Reviewer: Andreas Sandberg <***@arm.com>
Gerrit-Reviewer: Ilias Vougioukas <***@arm.com>
Gerrit-Reviewer: Pau Cabre <***@metempsy.com>
Gerrit-Reviewer: Sudhanshu Jha <***@arm.com>
Gerrit-CC: Jason Lowe-Power <***@lowepower.com>
Gerrit-MessageType: newpatchset
Pau Cabre (Gerrit)
2018-11-22 11:46:57 UTC
Permalink
Pau Cabre has submitted this change and it was merged. (
https://gem5-review.googlesource.com/c/public/gem5/+/14216 )

Change subject: cpu: Fixes on the loop predictor part of LTAGE
......................................................................

cpu: Fixes on the loop predictor part of LTAGE

Fixed the following fields of the loop predictor entries as described on
the LTAGE paper:
- Age counter (it was 3 bits and it should be 8 bits)
- Tag (it was 16 bits and it should be 14 bits). Also some times it used
int variables and some times uint16_t, leading to wrong behaviour
- Confidence counter (it was 2 bits ins some parts of the code and 3 bits
in some other parts. It should be 2 bits)
- Iteration counters (they were 16 bits and they should be 14 bits)
All the new sizes are now configurable

Change-Id: I8884c7454c1e510b65160eb4d5749d3259d34096
Signed-off-by: Pau Cabre <***@metempsy.com>
Reviewed-on: https://gem5-review.googlesource.com/c/14216
Reviewed-by: Jason Lowe-Power <***@lowepower.com>
Maintainer: Jason Lowe-Power <***@lowepower.com>
---
M src/cpu/pred/BranchPredictor.py
M src/cpu/pred/ltage.cc
M src/cpu/pred/ltage.hh
3 files changed, 61 insertions(+), 11 deletions(-)

Approvals:
Jason Lowe-Power: Looks good to me, approved; Looks good to me, approved



diff --git a/src/cpu/pred/BranchPredictor.py
b/src/cpu/pred/BranchPredictor.py
index 1eeecde..a428194 100644
--- a/src/cpu/pred/BranchPredictor.py
+++ b/src/cpu/pred/BranchPredictor.py
@@ -106,3 +106,9 @@
maxHist = Param.Unsigned(640, "Maximum history size of LTAGE")
minTagWidth = Param.Unsigned(7, "Minimum tag size in tag tables")

+ loopTableAgeBits = Param.Unsigned(8, "Number of age bits per loop
entry")
+ loopTableConfidenceBits = Param.Unsigned(2,
+ "Number of confidence bits per loop entry")
+ loopTableTagBits = Param.Unsigned(14, "Number of tag bits per loop
entry")
+ loopTableIterBits = Param.Unsigned(14, "Nuber of iteration bits per
loop")
+
diff --git a/src/cpu/pred/ltage.cc b/src/cpu/pred/ltage.cc
index 8d20d50..85ae2b3 100644
--- a/src/cpu/pred/ltage.cc
+++ b/src/cpu/pred/ltage.cc
@@ -59,8 +59,20 @@
minHist(params->minHist),
maxHist(params->maxHist),
minTagWidth(params->minTagWidth),
+ loopTableAgeBits(params->loopTableAgeBits),
+ loopTableConfidenceBits(params->loopTableConfidenceBits),
+ loopTableTagBits(params->loopTableTagBits),
+ loopTableIterBits(params->loopTableIterBits),
+ confidenceThreshold((1 << loopTableConfidenceBits) - 1),
+ loopTagMask((1 << loopTableTagBits) - 1),
+ loopNumIterMask((1 << loopTableIterBits) - 1),
threadHistory(params->numThreads)
{
+ // we use uint16_t type for these vales, so they cannot be more than
+ // 16 bits
+ assert(loopTableTagBits <= 16);
+ assert(loopTableIterBits <= 16);
+
assert(params->histBufferSize > params->maxHist * 2);
useAltPredForNewlyAllocated = 0;
logTick = 19;
@@ -212,6 +224,20 @@
}
}

+// Up-down unsigned saturating counter
+void
+LTAGE::unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits)
+{
+ assert(nbits <= sizeof(uint8_t) << 3);
+ if (up) {
+ if (ctr < ((1 << nbits) - 1))
+ ctr++;
+ } else {
+ if (ctr)
+ ctr--;
+ }
+}
+
// Bimodal prediction
bool
LTAGE::getBimodePred(Addr pc, BranchInfo* bi) const
@@ -248,12 +274,13 @@
bi->loopHit = -1;
bi->loopPredValid = false;
bi->loopIndex = lindex(pc);
- bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2));
+ bi->loopTag = ((pc) >> (instShiftAmt + logSizeLoopPred - 2)) &
loopTagMask;

for (int i = 0; i < 4; i++) {
if (ltable[bi->loopIndex + i].tag == bi->loopTag) {
bi->loopHit = i;
- bi->loopPredValid = (ltable[bi->loopIndex + i].confidence >=
3);
+ bi->loopPredValid =
+ ltable[bi->loopIndex + i].confidence ==
confidenceThreshold;
bi->currentIter = ltable[bi->loopIndex + i].currentIterSpec;
if (ltable[bi->loopIndex + i].currentIterSpec + 1 ==
ltable[bi->loopIndex + i].numIter) {
@@ -274,7 +301,8 @@
if (taken != ltable[index].dir) {
ltable[index].currentIterSpec = 0;
} else {
- ltable[index].currentIterSpec++;
+ ltable[index].currentIterSpec =
+ (ltable[index].currentIterSpec + 1) & loopNumIterMask;
}
}
}
@@ -295,12 +323,12 @@
return;
} else if (bi->loopPred != bi->tagePred) {
DPRINTF(LTage, "Loop Prediction success:%lx\n",pc);
- if (ltable[idx].age < 7)
- ltable[idx].age++;
+ unsignedCtrUpdate(ltable[idx].age, true, loopTableAgeBits);
}
}

- ltable[idx].currentIter++;
+ ltable[idx].currentIter =
+ (ltable[idx].currentIter + 1) & loopNumIterMask;
if (ltable[idx].currentIter > ltable[idx].numIter) {
ltable[idx].confidence = 0;
if (ltable[idx].numIter != 0) {
@@ -315,9 +343,8 @@
if (ltable[idx].currentIter == ltable[idx].numIter) {
DPRINTF(LTage, "Loop End predicted successfully:%lx\n",
pc);

- if (ltable[idx].confidence < 7) {
- ltable[idx].confidence++;
- }
+ unsignedCtrUpdate(ltable[idx].confidence, true,
+ loopTableConfidenceBits);
//just do not predict when the loop count is 1 or 2
if (ltable[idx].numIter < 3) {
// free the entry
@@ -355,7 +382,7 @@
ltable[idx].dir = !taken;
ltable[idx].tag = bi->loopTag;
ltable[idx].numIter = 0;
- ltable[idx].age = 7;
+ ltable[idx].age = (1 << loopTableAgeBits) - 1;
ltable[idx].confidence = 0;
ltable[idx].currentIter = 1;
break;
diff --git a/src/cpu/pred/ltage.hh b/src/cpu/pred/ltage.hh
index a810fb5..2119156 100644
--- a/src/cpu/pred/ltage.hh
+++ b/src/cpu/pred/ltage.hh
@@ -135,7 +135,7 @@
int altBank;
int altBankIndex;
int bimodalIndex;
- int loopTag;
+ uint16_t loopTag;
uint16_t currentIter;

bool tagePred;
@@ -238,6 +238,15 @@
void ctrUpdate(int8_t & ctr, bool taken, int nbits);

/**
+ * Updates an unsigned counter based on up/down parameter
+ * @param ctr Reference to counter to update.
+ * @param up Boolean indicating if the counter is
incremented/decremented
+ * If true it is incremented, if false it is decremented
+ * @param nbits Counter width.
+ */
+ void unsignedCtrUpdate(uint8_t & ctr, bool up, unsigned nbits);
+
+ /**
* Get a branch prediction from the bimodal
* predictor.
* @param pc The unshifted branch PC.
@@ -355,6 +364,14 @@
const unsigned minHist;
const unsigned maxHist;
const unsigned minTagWidth;
+ const unsigned loopTableAgeBits;
+ const unsigned loopTableConfidenceBits;
+ const unsigned loopTableTagBits;
+ const unsigned loopTableIterBits;
+
+ const uint8_t confidenceThreshold;
+ const uint16_t loopTagMask;
+ const uint16_t loopNumIterMask;

std::vector<bool> btablePrediction;
std::vector<bool> btableHysteresis;
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/14216
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I8884c7454c1e510b65160eb4d5749d3259d34096
Gerrit-Change-Number: 14216
Gerrit-PatchSet: 4
Gerrit-Owner: Pau Cabre <***@metempsy.com>
Gerrit-Assignee: Ilias Vougioukas <***@arm.com>
Gerrit-Reviewer: Andreas Sandberg <***@arm.com>
Gerrit-Reviewer: Ilias Vougioukas <***@arm.com>
Gerrit-Reviewer: Jason Lowe-Power <***@lowepower.com>
Gerrit-Reviewer: Pau Cabre <***@metempsy.com>
Gerrit-Reviewer: Sudhanshu Jha <***@arm.com>
Gerrit-MessageType: merged
Loading...