[gem5-dev] Change in gem5/gem5[master]: mem-cache: Add R/W information to calculateAccessLatency

Daniel Carvalho (Gerrit)

2018-11-29 15:15:17 UTC

Daniel Carvalho has uploaded this change for review. (
https://gem5-review.googlesource.com/c/public/gem5/+/14716

Change subject: mem-cache: Add R/W information to calculateAccessLatency
......................................................................

mem-cache: Add R/W information to calculateAccessLatency

Add read and write information to cache's access latency calculation,
so that it can be used in future patches. This requires the access
latency calculation to be set appropriately throughout the block
accessing calls.

If both read and write flags are set, the access should be considered
a RMW.

As a side effect, cmpAndSwap and satisfyRequest now return the access
latency, and receive the tag lookup latency as an argument.

Change-Id: I0bb37fdecf12ef8968727a69972f7ffb5d17254c
Signed-off-by: Daniel R. Carvalho <***@yahoo.com.br>
---
M src/mem/cache/base.cc
M src/mem/cache/base.hh
M src/mem/cache/cache.cc
M src/mem/cache/cache.hh
M src/mem/cache/noncoherent_cache.cc
M src/mem/cache/noncoherent_cache.hh
6 files changed, 131 insertions(+), 61 deletions(-)

diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
index bad24f7..e4f6dc0 100644
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -343,7 +343,7 @@
// the delay provided by the crossbar
Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay;

- Cycles lat;
+ Cycles lat(0);
CacheBlk *blk = nullptr;
bool satisfied = false;
{
@@ -549,10 +549,7 @@
// writebacks... that would mean that someone used an atomic
// access in timing mode

- // We use lookupLatency here because it is used to specify the latency
- // to access.
- Cycles lat = lookupLatency;
-
+ Cycles lat(0);
CacheBlk *blk = nullptr;
PacketList writebacks;
bool satisfied = access(pkt, blk, lat, writebacks);
@@ -679,8 +676,8 @@
}

-void
-BaseCache::cmpAndSwap(CacheBlk *blk, PacketPtr pkt)
+Cycles
+BaseCache::cmpAndSwap(CacheBlk *blk, PacketPtr pkt, Cycles lookup_lat)
{
assert(pkt->isRequest());

@@ -717,6 +714,9 @@
std::memcpy(blk_data, &overwrite_val, pkt->getSize());
blk->status |= BlkDirty;
}
+
+ // This is either a read or a RMW
+ return calculateAccessLatency(blk, lookup_lat, true, overwrite_mem);
}

QueueEntry*
@@ -801,9 +801,14 @@
return nullptr;
}

-void
-BaseCache::satisfyRequest(PacketPtr pkt, CacheBlk *blk, bool, bool)
+Cycles
+BaseCache::satisfyRequest(PacketPtr pkt, CacheBlk *blk, Cycles lookup_lat,
+ bool, bool)
{
+ // This request caused an access, which has a latency. It will be
properly
+ // set on a call to calculateAccessLatency()
+ Cycles lat = Cycles(0);
+
assert(pkt->isRequest());

assert(blk && blk->isValid());
@@ -831,8 +836,11 @@

// set block status to dirty
blk->status |= BlkDirty;
+
+ // A RMW sets both the read and write tags
+ lat = calculateAccessLatency(blk, lookup_lat, true, true);
} else {
- cmpAndSwap(blk, pkt);
+ lat = cmpAndSwap(blk, pkt, lookup_lat);
}
} else if (pkt->isWrite()) {
// we have the block in a writable state and can go ahead,
@@ -840,10 +848,16 @@
// downstream caches along the path to memory, but always
// Exclusive, and never Modified
assert(blk->isWritable());
+
// Write or WriteLine at the first cache with block in writable
state
- if (blk->checkWrite(pkt)) {
+ const bool write_ok = blk->checkWrite(pkt);
+ if (write_ok) {
pkt->writeDataToBlock(blk->data, blkSize);
}
+
+ // A failed StoreCond will only access the metadata of the block
+ lat = calculateAccessLatency(blk, lookup_lat, false, write_ok);
+
// Always mark the line as dirty (and thus transition to the
// Modified state) even if we are a failed StoreCond so we
// supply data to any snoops that have appended themselves to
@@ -858,25 +872,36 @@
// all read responses have a data payload
assert(pkt->hasRespData());
pkt->setDataFromBlock(blk->data, blkSize);
- } else if (pkt->isUpgrade()) {
- // sanity check
- assert(!pkt->hasSharers());

- if (blk->isDirty()) {
- // we were in the Owned state, and a cache above us that
- // has the line in Shared state needs to be made aware
- // that the data it already has is in fact dirty
- pkt->setCacheResponding();
- blk->status &= ~BlkDirty;
- }
- } else if (pkt->isClean()) {
- blk->status &= ~BlkDirty;
+ lat = calculateAccessLatency(blk, lookup_lat, true, false);
} else {
- assert(pkt->isInvalidate());
- invalidateBlock(blk);
- DPRINTF(CacheVerbose, "%s for %s (invalidation)\n", __func__,
- pkt->print());
+ // This request does not access the data array (i.e., only changes
the
+ // block's metadata), therefore we only need to consider the tag
lookup
+ // latency
+ lat = calculateAccessLatency(blk, lookup_lat, false, false);
+
+ if (pkt->isUpgrade()) {
+ // sanity check
+ assert(!pkt->hasSharers());
+
+ if (blk->isDirty()) {
+ // we were in the Owned state, and a cache above us that
+ // has the line in Shared state needs to be made aware
+ // that the data it already has is in fact dirty
+ pkt->setCacheResponding();
+ blk->status &= ~BlkDirty;
+ }
+ } else if (pkt->isClean()) {
+ blk->status &= ~BlkDirty;
+ } else {
+ assert(pkt->isInvalidate());
+ invalidateBlock(blk);
+ DPRINTF(CacheVerbose, "%s for %s (invalidation)\n", __func__,
+ pkt->print());
+ }
}
+
+ return lat;
}

/////////////////////////////////////////////////////
@@ -885,8 +910,9 @@
//
/////////////////////////////////////////////////////
Cycles
-BaseCache::calculateAccessLatency(const CacheBlk* blk,
- const Cycles lookup_lat) const
+BaseCache::calculateAccessLatency(const CacheBlk* blk, const Cycles
lookup_lat,
+ const bool is_read,
+ const bool is_write) const
{
Cycles lat(lookup_lat);

@@ -926,8 +952,10 @@
Cycles tag_latency(0);
blk = tags->accessBlock(pkt->getAddr(), pkt->isSecure(), tag_latency);

- // Calculate access latency
- lat = calculateAccessLatency(blk, tag_latency);
+ // Calculate access latency. This is a dummy value set for early exits,
+ // and that will be overridden when appropriate (i.e., when the data is
+ // read or written).
+ lat = calculateAccessLatency(blk, tag_latency, pkt->isRead(), false);

DPRINTF(Cache, "%s for %s %s\n", __func__, pkt->print(),
blk ? "hit " + blk->print() : "miss");
@@ -1022,6 +1050,9 @@
// nothing else to do; writeback doesn't expect response
assert(!pkt->needsResponse());
pkt->writeDataToBlock(blk->data, blkSize);
+
+ lat = calculateAccessLatency(blk, tag_latency, false, true);
+
DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());
incHitCount(pkt);
// populate the time when the block will be ready to access.
@@ -1078,6 +1109,9 @@
// nothing else to do; writeback doesn't expect response
assert(!pkt->needsResponse());
pkt->writeDataToBlock(blk->data, blkSize);
+
+ lat = calculateAccessLatency(blk, tag_latency, false, true);
+
DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print());

incHitCount(pkt);
@@ -1089,9 +1123,10 @@
return !pkt->writeThrough();
} else if (blk && (pkt->needsWritable() ? blk->isWritable() :
blk->isReadable())) {
- // OK to satisfy access
+ // OK to satisfy access. This will update the access latency with
the
+ // correct value according to the access type
incHitCount(pkt);
- satisfyRequest(pkt, blk);
+ lat = satisfyRequest(pkt, blk, tag_latency);
maintainClusivity(pkt->fromCache(), blk);

return true;
@@ -1212,6 +1247,13 @@

pkt->writeDataToBlock(blk->data, blkSize);
}
+
+ // This access either writes data to the block, or only updates
metadata.
+ // Ignore access latency, as it won't be used anywhere (we are dealing
+ // with a response, thus the data can be simultaneously provided to the
+ // requestor)
+ calculateAccessLatency(blk, Cycles(0), false, pkt->isRead());
+
// We pay for fillLatency here.
blk->setWhenReady(clockEdge(fillLatency) + pkt->payloadDelay);

diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
index 240bf21..cfa7999 100644
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -419,15 +419,19 @@
Addr regenerateBlkAddr(CacheBlk* blk);

/**
- * Calculate access latency in ticks given a tag lookup latency, and
- * whether access was a hit or miss.
+ * Calculate access latency in ticks given a tag lookup latency, the
+ * access type and whether access was a hit or miss. If both is_read
+ * and is_write are set, access is a read-modify-write.
*
* @param blk The cache block that was accessed.
* @param lookup_lat Latency of the respective tag lookup.
+ * @param is_read Whether the access is a read.
+ * @param is_write Whether the access is a write.
* @return The number of ticks that pass due to a block access.
*/
- Cycles calculateAccessLatency(const CacheBlk* blk,
- const Cycles lookup_lat) const;
+ Cycles calculateAccessLatency(const CacheBlk* blk, const Cycles
lookup_lat,
+ const bool is_read,
+ const bool is_write) const;

/**
* Does all the processing necessary to perform the provided request.
@@ -564,8 +568,10 @@

/**
* Handle doing the Compare and Swap function for SPARC.
+ *
+ * @return The access latency of a read or read-modify-write.
*/
- void cmpAndSwap(CacheBlk *blk, PacketPtr pkt);
+ Cycles cmpAndSwap(CacheBlk *blk, PacketPtr pkt, Cycles lookup_lat);

/**
* Return the next queue entry to service, either a pending miss
@@ -648,12 +654,15 @@
*
* @param pkt Request packet from upstream that hit a block
* @param blk Cache block that the packet hit
+ * @param lookup_lat The tag lookup latency of the block.
* @param deferred_response Whether this request originally missed
* @param pending_downgrade Whether the writable flag is to be removed
+ * @return The latency to perform the request.
*/
- virtual void satisfyRequest(PacketPtr pkt, CacheBlk *blk,
- bool deferred_response = false,
- bool pending_downgrade = false);
+ virtual Cycles satisfyRequest(PacketPtr pkt, CacheBlk *blk,
+ Cycles lookup_lat,
+ bool deferred_response = false,
+ bool pending_downgrade = false);

/**
* Maintain the clusivity of this cache by potentially
diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc
index 624f244..83ea350 100644
--- a/src/mem/cache/cache.cc
+++ b/src/mem/cache/cache.cc
@@ -77,11 +77,11 @@
{
}

-void
-Cache::satisfyRequest(PacketPtr pkt, CacheBlk *blk,
+Cycles
+Cache::satisfyRequest(PacketPtr pkt, CacheBlk *blk, Cycles lookup_lat,
bool deferred_response, bool pending_downgrade)
{
- BaseCache::satisfyRequest(pkt, blk);
+ const Cycles lat = BaseCache::satisfyRequest(pkt, blk, lookup_lat);

if (pkt->isRead()) {
// determine if this read is from a (coherent) cache or not
@@ -152,6 +152,8 @@
}
}
}
+
+ return lat;
}

/////////////////////////////////////////////////////
@@ -180,9 +182,10 @@
BaseCache::evictBlock(old_blk, writebacks);
}

+ // An uncacheable block's access latency is aliased as a miss
blk = nullptr;
- // lookupLatency is the latency in case the request is uncacheable.
- lat = lookupLatency;
+ lat = calculateAccessLatency(blk, lookupLatency, pkt->isRead(),
false);
+
return false;
}

@@ -628,14 +631,16 @@
blk = handleFill(bus_pkt, blk, writebacks, allocate);
assert(blk != NULL);
is_invalidate = false;
- satisfyRequest(pkt, blk);
+ // Ignore the access latency, as it is atomic mode
+ satisfyRequest(pkt, blk, Cycles(0));
} else if (bus_pkt->isRead() ||
bus_pkt->cmd == MemCmd::UpgradeResp) {
// we're updating cache state to allow us to
// satisfy the upstream request from the cache
blk = handleFill(bus_pkt, blk, writebacks,
allocOnFill(pkt->cmd));
- satisfyRequest(pkt, blk);
+ // Ignore the access latency, as it is atomic mode
+ satisfyRequest(pkt, blk, Cycles(0));
maintainClusivity(pkt->fromCache(), blk);
} else {
// we're satisfying the upstream request without
@@ -731,7 +736,10 @@
}

if (blk && blk->isValid() && !mshr->isForward) {
- satisfyRequest(tgt_pkt, blk, true,
mshr->hasPostDowngrade());
+ // Don't add access latency to completion time, as the miss
+ // can be serviced while request is being satisfied.
+ satisfyRequest(tgt_pkt, blk, Cycles(0), true,
+ mshr->hasPostDowngrade());

// How many bytes past the first request is this one
int transfer_offset =
@@ -914,6 +922,8 @@
pkt->makeTimingResponse();
if (pkt->isRead()) {
pkt->setDataFromBlock(blk_data, blkSize);
+
+ // @todo Make someone pay for this read
}
if (pkt->cmd == MemCmd::ReadResp && pending_inval) {
// Assume we defer a response to a read from a far-away cache
@@ -1128,8 +1138,12 @@
pkt->makeAtomicResponse();
// packets such as upgrades do not actually have any data
// payload
- if (pkt->hasData())
+ if (pkt->hasData()) {
pkt->setDataFromBlock(blk->data, blkSize);
+
+ // As we are in atomic mode, ignore latency
+ calculateAccessLatency(blk, Cycles(0), true, false);
+ }
}
}

diff --git a/src/mem/cache/cache.hh b/src/mem/cache/cache.hh
index a7eb97d..5a8b774 100644
--- a/src/mem/cache/cache.hh
+++ b/src/mem/cache/cache.hh
@@ -117,9 +117,9 @@

Tick recvAtomicSnoop(PacketPtr pkt) override;

- void satisfyRequest(PacketPtr pkt, CacheBlk *blk,
- bool deferred_response = false,
- bool pending_downgrade = false) override;
+ Cycles satisfyRequest(PacketPtr pkt, CacheBlk *blk, Cycles lookup_lat,
+ bool deferred_response = false,
+ bool pending_downgrade = false) override;

void doTimingSupplyResponse(PacketPtr req_pkt, const uint8_t *blk_data,
bool already_copied, bool pending_inval);
diff --git a/src/mem/cache/noncoherent_cache.cc
b/src/mem/cache/noncoherent_cache.cc
index 5edd435..f5a9375 100644
--- a/src/mem/cache/noncoherent_cache.cc
+++ b/src/mem/cache/noncoherent_cache.cc
@@ -69,14 +69,15 @@
{
}

-void
-NoncoherentCache::satisfyRequest(PacketPtr pkt, CacheBlk *blk, bool, bool)
+Cycles
+NoncoherentCache::satisfyRequest(PacketPtr pkt, CacheBlk *blk,
+ Cycles lookup_lat, bool, bool)
{
// As this a non-coherent cache located below the point of
// coherency, we do not expect requests that are typically used to
// keep caches coherent (e.g., InvalidateReq or UpdateReq).
assert(pkt->isRead() || pkt->isWrite());
- BaseCache::satisfyRequest(pkt, blk);
+ return BaseCache::satisfyRequest(pkt, blk, lookup_lat);
}

bool
@@ -200,7 +201,9 @@
blk = handleFill(bus_pkt, blk, writebacks,
allocOnFill(bus_pkt->cmd));
assert(blk);
}
- satisfyRequest(pkt, blk);
+
+ // Ignore the access latency, as it is atomic mode
+ satisfyRequest(pkt, blk, Cycles(0));

maintainClusivity(true, blk);

@@ -263,7 +266,9 @@
// packet comes from it, charged on headerDelay.
completion_time = pkt->headerDelay;

- satisfyRequest(tgt_pkt, blk);
+ // Don't add access latency to completion time, as the miss can
+ // be serviced while request is being satisfied
+ satisfyRequest(tgt_pkt, blk, Cycles(0));

// How many bytes past the first request is this one
int transfer_offset;
diff --git a/src/mem/cache/noncoherent_cache.hh
b/src/mem/cache/noncoherent_cache.hh
index 824c7cc..f985ae8 100644
--- a/src/mem/cache/noncoherent_cache.hh
+++ b/src/mem/cache/noncoherent_cache.hh
@@ -109,9 +109,9 @@

void functionalAccess(PacketPtr pkt, bool from_cpu_side) override;

- void satisfyRequest(PacketPtr pkt, CacheBlk *blk,
- bool deferred_response = false,
- bool pending_downgrade = false) override;
+ Cycles satisfyRequest(PacketPtr pkt, CacheBlk *blk, Cycles lookup_lat,
+ bool deferred_response = false,
+ bool pending_downgrade = false) override;

/*
* Creates a new packet with the request to be send to the memory
--
To view, visit https://gem5-review.googlesource.com/c/public/gem5/+/14716
To unsubscribe, or for help writing mail filters, visit
https://gem5-review.googlesource.com/settings

Gerrit-Project: public/gem5
Gerrit-Branch: master
Gerrit-Change-Id: I0bb37fdecf12ef8968727a69972f7ffb5d17254c
Gerrit-Change-Number: 14716
Gerrit-PatchSet: 1
Gerrit-Owner: Daniel Carvalho <***@yahoo.com.br>
Gerrit-MessageType: newchange