From 648b1df7e33e73f8393a006b8e1c54dbd88b476f Mon Sep 17 00:00:00 2001 From: Kim Walisch Date: Sun, 17 Nov 2024 19:26:10 +0100 Subject: [PATCH] Improve CPU cache detection --- src/CpuInfo.cpp | 134 ++++++++++++++++++++++++------------------------ 1 file changed, 67 insertions(+), 67 deletions(-) diff --git a/src/CpuInfo.cpp b/src/CpuInfo.cpp index a95bc567..33f2ca43 100644 --- a/src/CpuInfo.cpp +++ b/src/CpuInfo.cpp @@ -144,7 +144,7 @@ void CpuInfo::init() Array cacheSharing; }; - struct L1CacheStatistics + struct L1CacheInfo { long cpuCoreId = -1; std::size_t cpuCoreCount = 0; @@ -152,12 +152,12 @@ void CpuInfo::init() using CacheSize_t = std::size_t; // Items must be sorted in ascending order - std::map l1CacheStatistics; - std::vector cacheInfo; + std::map l1CacheSizes; + std::map cpuCores; std::size_t totalL1CpuCores = 0; SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX* info; - // Fill the cacheInfo vector with the L1, L2 & L3 cache + // Fill the cpuCores map with the L1, L2 & L3 cache // sizes and cache sharing of each CPU core. for (std::size_t i = 0; i < bytes; i += info->Size) { @@ -199,43 +199,46 @@ void CpuInfo::init() // cpuCoreIds which is good enough for our usage. std::size_t cpuCoreId = processorGroup * maxCpusPerProcessorGroup + cpuCoreIndex; - if (cacheInfo.size() <= cpuCoreId) - cacheInfo.resize((cpuCoreId + 1) * 2); - // If the CPU core has multiple caches of the same level, - // then we are only interested in the first such cache - // since this is likely the fastest cache. Usually, all - // caches are ordered from fastest to slowest. - if (cacheInfo[cpuCoreId].cacheSizes[level] != 0) + // then we are only interested in the smallest such + // cache since this is likely the fastest cache. + if (cpuCores[cpuCoreId].cacheSizes[level] > 0 && + cpuCores[cpuCoreId].cacheSizes[level] <= cacheSize) continue; - cacheInfo[cpuCoreId].cacheSizes[level] = cacheSize; - cacheInfo[cpuCoreId].cacheSharing[level] = cacheSharing; - - // Count the number of occurences of each type of L1 cache. - // If one of these L1 cache types is used predominantly - // we will use that cache as our default cache size. - if (level == 1) - { - auto& mapEntry = l1CacheStatistics[cacheSize]; - totalL1CpuCores++; - mapEntry.cpuCoreCount++; - if (mapEntry.cpuCoreId == -1) - mapEntry.cpuCoreId = (long) cpuCoreId; - } + cpuCores[cpuCoreId].cacheSizes[level] = cacheSize; + cpuCores[cpuCoreId].cacheSharing[level] = cacheSharing; } } } + // Iterate over all CPU cores and create a map + // with the different L1 cache sizes. + for (const auto& cpuCore : cpuCores) + { + const auto& cpuCoreInfo = cpuCore.second; + std::size_t l1CacheSize = cpuCoreInfo.cacheSizes[1]; + std::size_t cpuCoreId = cpuCore.first; + + if (l1CacheSize > 0) + { + totalL1CpuCores++; + auto& mapEntry = l1CacheSizes[l1CacheSize]; + mapEntry.cpuCoreCount++; + if (mapEntry.cpuCoreId == -1) + mapEntry.cpuCoreId = (long) cpuCoreId; + } + } + // Check if one of the L1 cache types is used // by more than 80% of all CPU cores. - for (const auto& item : l1CacheStatistics) + for (const auto& l1CacheSize : l1CacheSizes) { - if (item.second.cpuCoreCount > totalL1CpuCores * 0.80) + if (l1CacheSize.second.cpuCoreCount > totalL1CpuCores * 0.80) { - long cpuCoreId = item.second.cpuCoreId; - cacheSizes_ = cacheInfo[cpuCoreId].cacheSizes; - cacheSharing_ = cacheInfo[cpuCoreId].cacheSharing; + long cpuCoreId = l1CacheSize.second.cpuCoreId; + cacheSizes_ = cpuCores[cpuCoreId].cacheSizes; + cacheSharing_ = cpuCores[cpuCoreId].cacheSharing; return; } } @@ -243,13 +246,13 @@ void CpuInfo::init() // For hybrid CPUs with many different L1 cache types // we pick one from the middle that is hopefully // representative for the CPU's overall performance. - if (!l1CacheStatistics.empty()) + if (!l1CacheSizes.empty()) { - auto iter = l1CacheStatistics.begin(); - std::advance(iter, (l1CacheStatistics.size() - 1) / 2); + auto iter = l1CacheSizes.begin(); + std::advance(iter, (l1CacheSizes.size() - 1) / 2); long cpuCoreId = iter->second.cpuCoreId; - cacheSizes_ = cacheInfo[cpuCoreId].cacheSizes; - cacheSharing_ = cacheInfo[cpuCoreId].cacheSharing; + cacheSizes_ = cpuCores[cpuCoreId].cacheSizes; + cacheSharing_ = cpuCores[cpuCoreId].cacheSharing; } // Windows XP or later @@ -299,15 +302,16 @@ void CpuInfo::init() info[i].Cache.Type == CacheUnified)) { auto level = info[i].Cache.Level; + auto cacheSize = info[i].Cache.Size; // If the CPU core has multiple caches of the same level, - // then we are only interested in the first such cache - // since this is likely the fastest cache. Usually, all - // caches are ordered from fastest to slowest. - if (cacheSizes_[level] != 0) + // then we are only interested in the smallest such + // cache since this is likely the fastest cache. + if (cacheSizes_[level] > 0 && + cacheSizes_[level] <= cacheSize) continue; - cacheSizes_[level] = info[i].Cache.Size; + cacheSizes_[level] = cacheSize; // We assume the L1 and L2 caches are private if (info[i].Cache.Level <= 2) @@ -631,11 +635,10 @@ void CpuInfo::init() { std::string cpusOnline = "/sys/devices/system/cpu/online"; logicalCpuCores_ = parseThreadList(cpusOnline); - bool identicalL1CacheSizes = false; using CacheSize_t = std::size_t; // Items must be sorted in ascending order - std::map l1CacheStatistics; + std::map l1CacheSizes; std::vector cpuIds; cpuIds.reserve(3); @@ -673,34 +676,28 @@ void CpuInfo::init() if (cacheType == "Data" || cacheType == "Unified") { - std::size_t cacheSize = getCacheSize(path + "/size"); + std::string cacheSizePath = path + "/size"; + std::size_t cacheSize = getCacheSize(cacheSizePath); + if (cacheSize > 0) { - if (l1CacheStatistics.find(cacheSize) == l1CacheStatistics.end()) - l1CacheStatistics[cacheSize] = cpuId; - else - identicalL1CacheSizes = true; + if (l1CacheSizes.find(cacheSize) != l1CacheSizes.end()) + l1CacheSizes[cacheSize] = cpuId; + break; } - break; } } } - - // We have found 2 identical CPU cores. - // In this case we assume all CPU cores - // have the same cache hierarchy. - if (identicalL1CacheSizes) - break; } // Retrieve the cache sizes of the CPU core with the middle // L1 data cache size. If there are only 2 different L1 // cache sizes we retrieve the cache sizes of the CPU core // with the smaller L1 data cache size. - if (!l1CacheStatistics.empty()) + if (!l1CacheSizes.empty()) { - auto iter = l1CacheStatistics.begin(); - std::advance(iter, (l1CacheStatistics.size() - 1) / 2); + auto iter = l1CacheSizes.begin(); + std::advance(iter, (l1CacheSizes.size() - 1) / 2); std::size_t cpuId = iter->second; for (std::size_t i = 0; i <= 3; i++) @@ -712,24 +709,27 @@ void CpuInfo::init() if (level >= 1 && level <= 3) { - // If the CPU core has multiple caches of the same level, - // then we are only interested in the first such cache - // since this is likely the fastest cache. Usually, all - // caches are ordered from fastest to slowest. - if (cacheSizes_[level] != 0) - continue; - std::string type = path + "/type"; std::string cacheType = getString(type); if (cacheType == "Data" || cacheType == "Unified") { - std::string cacheSize = path + "/size"; + std::string cacheSizePath = path + "/size"; std::string sharedCpuList = path + "/shared_cpu_list"; std::string sharedCpuMap = path + "/shared_cpu_map"; - cacheSizes_[level] = getCacheSize(cacheSize); - cacheSharing_[level] = getThreads(sharedCpuList, sharedCpuMap); + std::size_t cacheSize = getCacheSize(cacheSizePath); + std::size_t cacheSharing = getThreads(sharedCpuList, sharedCpuMap); + + // If the CPU core has multiple caches of the same level, + // then we are only interested in the smallest such + // cache since this is likely the fastest cache. + if (cacheSizes_[level] > 0 && + cacheSizes_[level] <= cacheSize) + continue; + + cacheSizes_[level] = cacheSize; + cacheSharing_[level] = cacheSharing; } } }