Skip to content

Commit

Permalink
Improve CPU cache detection
Browse files Browse the repository at this point in the history
  • Loading branch information
kimwalisch committed Nov 17, 2024
1 parent 8be914f commit 648b1df
Showing 1 changed file with 67 additions and 67 deletions.
134 changes: 67 additions & 67 deletions src/CpuInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,20 +144,20 @@ void CpuInfo::init()
Array<size_t, 4> cacheSharing;
};

struct L1CacheStatistics
struct L1CacheInfo
{
long cpuCoreId = -1;
std::size_t cpuCoreCount = 0;
};

using CacheSize_t = std::size_t;
// Items must be sorted in ascending order
std::map<CacheSize_t, L1CacheStatistics> l1CacheStatistics;
std::vector<CpuCoreCacheInfo> cacheInfo;
std::map<CacheSize_t, L1CacheInfo> l1CacheSizes;
std::map<std::size_t, CpuCoreCacheInfo> cpuCores;
std::size_t totalL1CpuCores = 0;
SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* info;

// Fill the cacheInfo vector with the L1, L2 & L3 cache
// Fill the cpuCores map with the L1, L2 & L3 cache
// sizes and cache sharing of each CPU core.
for (std::size_t i = 0; i < bytes; i += info->Size)
{
Expand Down Expand Up @@ -199,57 +199,60 @@ void CpuInfo::init()
// cpuCoreIds which is good enough for our usage.
std::size_t cpuCoreId = processorGroup * maxCpusPerProcessorGroup + cpuCoreIndex;

if (cacheInfo.size() <= cpuCoreId)
cacheInfo.resize((cpuCoreId + 1) * 2);

// If the CPU core has multiple caches of the same level,
// then we are only interested in the first such cache
// since this is likely the fastest cache. Usually, all
// caches are ordered from fastest to slowest.
if (cacheInfo[cpuCoreId].cacheSizes[level] != 0)
// then we are only interested in the smallest such
// cache since this is likely the fastest cache.
if (cpuCores[cpuCoreId].cacheSizes[level] > 0 &&
cpuCores[cpuCoreId].cacheSizes[level] <= cacheSize)
continue;

cacheInfo[cpuCoreId].cacheSizes[level] = cacheSize;
cacheInfo[cpuCoreId].cacheSharing[level] = cacheSharing;

// Count the number of occurences of each type of L1 cache.
// If one of these L1 cache types is used predominantly
// we will use that cache as our default cache size.
if (level == 1)
{
auto& mapEntry = l1CacheStatistics[cacheSize];
totalL1CpuCores++;
mapEntry.cpuCoreCount++;
if (mapEntry.cpuCoreId == -1)
mapEntry.cpuCoreId = (long) cpuCoreId;
}
cpuCores[cpuCoreId].cacheSizes[level] = cacheSize;
cpuCores[cpuCoreId].cacheSharing[level] = cacheSharing;
}
}
}

// Iterate over all CPU cores and create a map
// with the different L1 cache sizes.
for (const auto& cpuCore : cpuCores)
{
const auto& cpuCoreInfo = cpuCore.second;
std::size_t l1CacheSize = cpuCoreInfo.cacheSizes[1];
std::size_t cpuCoreId = cpuCore.first;

if (l1CacheSize > 0)
{
totalL1CpuCores++;
auto& mapEntry = l1CacheSizes[l1CacheSize];
mapEntry.cpuCoreCount++;
if (mapEntry.cpuCoreId == -1)
mapEntry.cpuCoreId = (long) cpuCoreId;
}
}

// Check if one of the L1 cache types is used
// by more than 80% of all CPU cores.
for (const auto& item : l1CacheStatistics)
for (const auto& l1CacheSize : l1CacheSizes)
{
if (item.second.cpuCoreCount > totalL1CpuCores * 0.80)
if (l1CacheSize.second.cpuCoreCount > totalL1CpuCores * 0.80)
{
long cpuCoreId = item.second.cpuCoreId;
cacheSizes_ = cacheInfo[cpuCoreId].cacheSizes;
cacheSharing_ = cacheInfo[cpuCoreId].cacheSharing;
long cpuCoreId = l1CacheSize.second.cpuCoreId;
cacheSizes_ = cpuCores[cpuCoreId].cacheSizes;
cacheSharing_ = cpuCores[cpuCoreId].cacheSharing;
return;
}
}

// For hybrid CPUs with many different L1 cache types
// we pick one from the middle that is hopefully
// representative for the CPU's overall performance.
if (!l1CacheStatistics.empty())
if (!l1CacheSizes.empty())
{
auto iter = l1CacheStatistics.begin();
std::advance(iter, (l1CacheStatistics.size() - 1) / 2);
auto iter = l1CacheSizes.begin();
std::advance(iter, (l1CacheSizes.size() - 1) / 2);
long cpuCoreId = iter->second.cpuCoreId;
cacheSizes_ = cacheInfo[cpuCoreId].cacheSizes;
cacheSharing_ = cacheInfo[cpuCoreId].cacheSharing;
cacheSizes_ = cpuCores[cpuCoreId].cacheSizes;
cacheSharing_ = cpuCores[cpuCoreId].cacheSharing;
}

// Windows XP or later
Expand Down Expand Up @@ -299,15 +302,16 @@ void CpuInfo::init()
info[i].Cache.Type == CacheUnified))
{
auto level = info[i].Cache.Level;
auto cacheSize = info[i].Cache.Size;

// If the CPU core has multiple caches of the same level,
// then we are only interested in the first such cache
// since this is likely the fastest cache. Usually, all
// caches are ordered from fastest to slowest.
if (cacheSizes_[level] != 0)
// then we are only interested in the smallest such
// cache since this is likely the fastest cache.
if (cacheSizes_[level] > 0 &&
cacheSizes_[level] <= cacheSize)
continue;

cacheSizes_[level] = info[i].Cache.Size;
cacheSizes_[level] = cacheSize;

// We assume the L1 and L2 caches are private
if (info[i].Cache.Level <= 2)
Expand Down Expand Up @@ -631,11 +635,10 @@ void CpuInfo::init()
{
std::string cpusOnline = "/sys/devices/system/cpu/online";
logicalCpuCores_ = parseThreadList(cpusOnline);
bool identicalL1CacheSizes = false;

using CacheSize_t = std::size_t;
// Items must be sorted in ascending order
std::map<CacheSize_t, std::size_t> l1CacheStatistics;
std::map<CacheSize_t, std::size_t> l1CacheSizes;
std::vector<size_t> cpuIds;
cpuIds.reserve(3);

Expand Down Expand Up @@ -673,34 +676,28 @@ void CpuInfo::init()
if (cacheType == "Data" ||
cacheType == "Unified")
{
std::size_t cacheSize = getCacheSize(path + "/size");
std::string cacheSizePath = path + "/size";
std::size_t cacheSize = getCacheSize(cacheSizePath);

if (cacheSize > 0)
{
if (l1CacheStatistics.find(cacheSize) == l1CacheStatistics.end())
l1CacheStatistics[cacheSize] = cpuId;
else
identicalL1CacheSizes = true;
if (l1CacheSizes.find(cacheSize) != l1CacheSizes.end())
l1CacheSizes[cacheSize] = cpuId;
break;
}
break;
}
}
}

// We have found 2 identical CPU cores.
// In this case we assume all CPU cores
// have the same cache hierarchy.
if (identicalL1CacheSizes)
break;
}

// Retrieve the cache sizes of the CPU core with the middle
// L1 data cache size. If there are only 2 different L1
// cache sizes we retrieve the cache sizes of the CPU core
// with the smaller L1 data cache size.
if (!l1CacheStatistics.empty())
if (!l1CacheSizes.empty())
{
auto iter = l1CacheStatistics.begin();
std::advance(iter, (l1CacheStatistics.size() - 1) / 2);
auto iter = l1CacheSizes.begin();
std::advance(iter, (l1CacheSizes.size() - 1) / 2);
std::size_t cpuId = iter->second;

for (std::size_t i = 0; i <= 3; i++)
Expand All @@ -712,24 +709,27 @@ void CpuInfo::init()
if (level >= 1 &&
level <= 3)
{
// If the CPU core has multiple caches of the same level,
// then we are only interested in the first such cache
// since this is likely the fastest cache. Usually, all
// caches are ordered from fastest to slowest.
if (cacheSizes_[level] != 0)
continue;

std::string type = path + "/type";
std::string cacheType = getString(type);

if (cacheType == "Data" ||
cacheType == "Unified")
{
std::string cacheSize = path + "/size";
std::string cacheSizePath = path + "/size";
std::string sharedCpuList = path + "/shared_cpu_list";
std::string sharedCpuMap = path + "/shared_cpu_map";
cacheSizes_[level] = getCacheSize(cacheSize);
cacheSharing_[level] = getThreads(sharedCpuList, sharedCpuMap);
std::size_t cacheSize = getCacheSize(cacheSizePath);
std::size_t cacheSharing = getThreads(sharedCpuList, sharedCpuMap);

// If the CPU core has multiple caches of the same level,
// then we are only interested in the smallest such
// cache since this is likely the fastest cache.
if (cacheSizes_[level] > 0 &&
cacheSizes_[level] <= cacheSize)
continue;

cacheSizes_[level] = cacheSize;
cacheSharing_[level] = cacheSharing;
}
}
}
Expand Down

0 comments on commit 648b1df

Please sign in to comment.