From 2d951b91a237f18437e9c5335a0d9ae4ad0714bf Mon Sep 17 00:00:00 2001 From: tastynoob <934348725@qq.com> Date: Thu, 17 Oct 2024 17:53:51 +0800 Subject: [PATCH] mem: prefetcher config align with kmh Change-Id: I629804a66e00f2b6e90a3aea98e2a7049b1809ff --- configs/common/CacheConfig.py | 17 ++++++++--- configs/common/Options.py | 8 +++-- configs/example/kmh.py | 9 ++++-- configs/example/xiangshan.py | 3 ++ src/mem/cache/prefetch/Prefetcher.py | 9 ++++++ src/mem/cache/prefetch/cmc.cc | 4 +-- .../prefetch/l2_composite_with_worker.cc | 29 +++++++++++++++++-- .../prefetch/l2_composite_with_worker.hh | 16 ++++++++-- src/mem/cache/prefetch/sms.cc | 8 +++-- src/mem/cache/prefetch/sms.hh | 1 + src/mem/cache/prefetch/worker.cc | 5 +++- 11 files changed, 88 insertions(+), 21 deletions(-) diff --git a/configs/common/CacheConfig.py b/configs/common/CacheConfig.py index c3907e6f9d..fcfbada621 100644 --- a/configs/common/CacheConfig.py +++ b/configs/common/CacheConfig.py @@ -64,7 +64,7 @@ def _get_cache_opts(level, options): opts['assoc'] = getattr(options, assoc_attr) prefetcher_attr = '{}_hwp_type'.format(level) - if hasattr(options, prefetcher_attr): + if hasattr(options, prefetcher_attr) and (not options.no_pf): opts['prefetcher'] = _get_hwp(getattr(options, prefetcher_attr)) return opts @@ -130,6 +130,11 @@ def config_cache(options, system): # system.tol2bus_list.append(L2XBar(clk_domain = system.cpu_clk_domain, width=256)) system.l2_caches[i].cpu_side = system.tol2bus_list[i].mem_side_ports + if options.kmh_align: + assert options.l2_hwp_type == 'L2CompositeWithWorkerPrefetcher' + system.l2_caches[i].prefetcher.enable_cmc = True + system.l2_caches[i].prefetcher.enable_bop = True + system.l2_caches[i].prefetcher.enable_cdp = False if options.ideal_cache: assert not options.l3cache, \ @@ -195,12 +200,16 @@ def config_cache(options, system): dcache.prefetcher.enable_cplx = True dcache.prefetcher.pht_pf_level = options.pht_pf_level dcache.prefetcher.short_stride_thres = options.short_stride_thres + dcache.prefetcher.enable_temporal = not options.kmh_align dcache.prefetcher.fuzzy_stride_matching = False dcache.prefetcher.stream_pf_ahead = True + + dcache.prefetcher.enable_bop = not options.kmh_align dcache.prefetcher.bop_large.delay_queue_enable = True dcache.prefetcher.bop_large.bad_score = 10 dcache.prefetcher.bop_small.delay_queue_enable = True dcache.prefetcher.bop_small.bad_score = 5 + dcache.prefetcher.queue_size = 128 dcache.prefetcher.max_prefetch_requests_with_pending_translation = 128 dcache.prefetcher.region_size = 64*16 # 64B * blocks per region @@ -212,7 +221,7 @@ def config_cache(options, system): system.cpu[i].add_pf_downstream(dcache.prefetcher) if options.ideal_cache: dcache.prefetcher.stream_pf_ahead = False - if options.l1d_use_xsstride: + if options.kmh_align: dcache.prefetcher.enable_berti = False dcache.prefetcher.enable_sstride = True @@ -220,14 +229,14 @@ def config_cache(options, system): icache.response_latency = 0 dcache.response_latency = 0 - if options.l1_to_l2_pf_hint: + if (not options.no_pf) and options.l1_to_l2_pf_hint: assert dcache.prefetcher != NULL and \ system.l2_caches[i].prefetcher != NULL dcache.prefetcher.add_pf_downstream(system.l2_caches[i].prefetcher) system.l2_caches[i].prefetcher.queue_size = 64 system.l2_caches[i].prefetcher.max_prefetch_requests_with_pending_translation = 128 - if options.l3cache and options.l2_to_l3_pf_hint: + if (not options.no_pf) and options.l3cache and options.l2_to_l3_pf_hint: assert system.l2_caches[i].prefetcher != NULL and \ system.l3.prefetcher != NULL system.l2_caches[i].prefetcher.add_pf_downstream(system.l3.prefetcher) diff --git a/configs/common/Options.py b/configs/common/Options.py index 527ee37f0c..37c4c00f77 100644 --- a/configs/common/Options.py +++ b/configs/common/Options.py @@ -163,6 +163,8 @@ def addNoISAOptions(parser, configure_xiangshan=False): parser.add_argument("--cacheline_size", type=int, default=64) parser.add_argument("--ideal-cache", action="store_true") + parser.add_argument("--no-pf", default=False, + action="store_true", help="L1 icache hardware prefetcher") parser.add_argument("--l1i-hwp-type", default=None, choices=ObjectList.hwp_list.get_names(), help="L1 icache hardware prefetcher") parser.add_argument("--l1d-hwp-type", default='XSCompositePrefetcher', @@ -280,12 +282,12 @@ def addCommonOptions(parser, configure_xiangshan=False): action=ListRP, nargs=0, help="List available replacement policy types") + parser.add_argument("--kmh-align", action="store_true", default=False, + help=""" + Use kmu config""") parser.add_argument("--list-hwp-types", action=ListHWP, nargs=0, help="List available hardware prefetcher types") - parser.add_argument("--l1d-use-xsstride", action="store_true", default=False, - help=""" - Enable SPP component for L1 data prefetcher""") parser.add_argument("--l1d-enable-spp", action="store_true", default=False, help=""" Enable SPP component for L1 data prefetcher""") diff --git a/configs/example/kmh.py b/configs/example/kmh.py index 5e65e66b51..c70cb08886 100644 --- a/configs/example/kmh.py +++ b/configs/example/kmh.py @@ -41,10 +41,13 @@ args.enable_difftest = True args.enable_riscv_vector = True - args.l2_hwp_type = "WorkerPrefetcher" + # l1cache prefetcher use stream, stride + # l2cache prefetcher use pht, bop, cmc + # disable l1prefetcher store pf train + # disable l1 berti, l2 cdp + args.l2_hwp_type = "L2CompositeWithWorkerPrefetcher" args.pht_pf_level = 2 - args.l1d_use_xsstride = True - + args.kmh_align = True assert not args.external_memory_system diff --git a/configs/example/xiangshan.py b/configs/example/xiangshan.py index d01ff18db3..b26492c7ac 100644 --- a/configs/example/xiangshan.py +++ b/configs/example/xiangshan.py @@ -175,6 +175,9 @@ def build_test_system(np, args): for cpu in test_sys.cpu: cpu.enable_riscv_vector = True + for cpu in test_sys.cpu: + cpu.store_prefetch_train = not args.kmh_align + # config arch db if args.enable_arch_db: test_sys.arch_db = ArchDBer(arch_db_file=args.arch_db_file) diff --git a/src/mem/cache/prefetch/Prefetcher.py b/src/mem/cache/prefetch/Prefetcher.py index 75649d9d94..56c283dad7 100644 --- a/src/mem/cache/prefetch/Prefetcher.py +++ b/src/mem/cache/prefetch/Prefetcher.py @@ -995,6 +995,7 @@ class XSCompositePrefetcher(QueuedPrefetcher): enable_spp = Param.Bool(False, "Enable SPP component") enable_temporal = Param.Bool(False, "Enable temporal component") enable_berti = Param.Bool(True,"Enable berti component") + enable_bop = Param.Bool(True, "Enable BOP") enable_sstride = Param.Bool(False,"Enable sms stride component") enable_opt = Param.Bool(False,"Enable opt component") @@ -1025,6 +1026,14 @@ class L2CompositeWithWorkerPrefetcher(CompositeWithWorkerPrefetcher): cxx_header = "mem/cache/prefetch/l2_composite_with_worker.hh" cdp = Param.CDP(CDP(is_sub_prefetcher=True), "") + cmc = Param.CMCPrefetcher(CMCPrefetcher(is_sub_prefetcher=True), "") + bop_large = Param.BOPPrefetcher(BOPPrefetcher(is_sub_prefetcher=True), + "Large BOP used in composite prefetcher ") + bop_small = Param.BOPPrefetcher(SmallBOPPrefetcher(is_sub_prefetcher=True), + "Small BOP used in composite prefetcher ") + enable_bop = Param.Bool(False, "Enable BOP") + enable_cdp = Param.Bool(False, "Enable CDP") + enable_cmc = Param.Bool(False, "Enable CMC") class L3CompositeWithWorkerPrefetcher(CompositeWithWorkerPrefetcher): type = 'L3CompositeWithWorkerPrefetcher' diff --git a/src/mem/cache/prefetch/cmc.cc b/src/mem/cache/prefetch/cmc.cc index 156a199f22..a4ba811676 100644 --- a/src/mem/cache/prefetch/cmc.cc +++ b/src/mem/cache/prefetch/cmc.cc @@ -75,15 +75,15 @@ void CMCPrefetcher::doPrefetch(const PrefetchInfo &pfi, std::vector &addresses, bool late, PrefetchSourceType pf_source, bool is_first_shot) { - bool can_prefetch = !pfi.isWrite() && pfi.hasPC(); + bool can_prefetch = cache->level() == 1 ? (!pfi.isWrite() && pfi.hasPC()) : true; if (!can_prefetch) { return; } + Addr pc = pfi.hasPC() ? pfi.getPC() : 0; Addr vaddr = pfi.getAddr(); Addr block_addr = blockAddress(vaddr); bool is_secure = pfi.isSecure(); - Addr pc = pfi.getPC(); int prefetchSource = pf_source; // if (enableDB) { diff --git a/src/mem/cache/prefetch/l2_composite_with_worker.cc b/src/mem/cache/prefetch/l2_composite_with_worker.cc index 62050eb340..177451b098 100644 --- a/src/mem/cache/prefetch/l2_composite_with_worker.cc +++ b/src/mem/cache/prefetch/l2_composite_with_worker.cc @@ -10,16 +10,36 @@ namespace prefetch { L2CompositeWithWorkerPrefetcher::L2CompositeWithWorkerPrefetcher(const L2CompositeWithWorkerPrefetcherParams &p) - : CompositeWithWorkerPrefetcher(p), cdp(p.cdp) + : CompositeWithWorkerPrefetcher(p), + cdp(p.cdp), + largeBOP(p.bop_large), + smallBOP(p.bop_small), + cmc(p.cmc), + enableBOP(p.enable_bop), + enableCDP(p.enable_cdp), + enableCMC(p.enable_cmc) { cdp->pfLRUFilter = &pfLRUFilter; + largeBOP->filter = &pfLRUFilter; + smallBOP->filter = &pfLRUFilter; + cmc->filter = &pfLRUFilter; cdp->parentRid = p.sys->getRequestorId(this); } void -L2CompositeWithWorkerPrefetcher::calculatePrefetch(const PrefetchInfo &pfi, std::vector &addresses) +L2CompositeWithWorkerPrefetcher::calculatePrefetch(const PrefetchInfo &pfi, std::vector &addresses, + bool late, PrefetchSourceType pf_source, bool miss_repeat) { - cdp->calculatePrefetch(pfi, addresses); + if (enableCMC) { + cmc->doPrefetch(pfi, addresses, late, pf_source, false); + } + if (enableCDP) { + cdp->calculatePrefetch(pfi, addresses); + } + if (enableBOP) { + largeBOP->calculatePrefetch(pfi, addresses, late && pf_source == PrefetchSourceType::HWP_BOP); + smallBOP->calculatePrefetch(pfi, addresses, late && pf_source == PrefetchSourceType::HWP_BOP); + } } void @@ -64,6 +84,9 @@ L2CompositeWithWorkerPrefetcher::setParentInfo(System *sys, ProbeManager *pm, Ca { cdp->setParentInfo(sys, pm, _cache, blk_size); cdp->setStatsPtr(&prefetchStats); + largeBOP->setParentInfo(sys, pm, _cache, blk_size); + smallBOP->setParentInfo(sys, pm, _cache, blk_size); + cmc->setParentInfo(sys, pm, _cache, blk_size); CompositeWithWorkerPrefetcher::setParentInfo(sys, pm, _cache, blk_size); } diff --git a/src/mem/cache/prefetch/l2_composite_with_worker.hh b/src/mem/cache/prefetch/l2_composite_with_worker.hh index 3cd0939c8f..7637f3b2d8 100644 --- a/src/mem/cache/prefetch/l2_composite_with_worker.hh +++ b/src/mem/cache/prefetch/l2_composite_with_worker.hh @@ -3,7 +3,9 @@ #include +#include "mem/cache/prefetch/bop.hh" #include "mem/cache/prefetch/cdp.hh" +#include "mem/cache/prefetch/cmc.hh" #include "mem/cache/prefetch/composite_with_worker.hh" #include "params/L2CompositeWithWorkerPrefetcher.hh" @@ -19,7 +21,10 @@ class L2CompositeWithWorkerPrefetcher : public CompositeWithWorkerPrefetcher public: L2CompositeWithWorkerPrefetcher(const L2CompositeWithWorkerPrefetcherParams &p); - void calculatePrefetch(const PrefetchInfo &pfi, std::vector &addresses) override; + void calculatePrefetch(const PrefetchInfo &pfi, std::vector &addresses) override {} + + void calculatePrefetch(const PrefetchInfo &pfi, std::vector &addresses, bool late, + PrefetchSourceType source, bool miss_repeat) override; void addHintDownStream(Base *down_stream) override { @@ -38,6 +43,13 @@ class L2CompositeWithWorkerPrefetcher : public CompositeWithWorkerPrefetcher private: CDP *cdp; + BOP* largeBOP; + BOP* smallBOP; + CMCPrefetcher* cmc; + + const bool enableBOP; + const bool enableCDP; + const bool enableCMC; bool offloadLowAccuracy = true; }; @@ -46,4 +58,4 @@ class L2CompositeWithWorkerPrefetcher : public CompositeWithWorkerPrefetcher } // namespace gem5 -#endif // __MEM_CACHE_PREFETCH_COMPOITE_WITH_WORKER_L2_HH__ \ No newline at end of file +#endif // __MEM_CACHE_PREFETCH_COMPOITE_WITH_WORKER_L2_HH__ diff --git a/src/mem/cache/prefetch/sms.cc b/src/mem/cache/prefetch/sms.cc index 424a52eeb6..5b5ad10ac8 100644 --- a/src/mem/cache/prefetch/sms.cc +++ b/src/mem/cache/prefetch/sms.cc @@ -45,6 +45,7 @@ XSCompositePrefetcher::XSCompositePrefetcher(const XSCompositePrefetcherParams & enableTemporal(p.enable_temporal), enableSstride(p.enable_sstride), enableBerti(p.enable_berti), + enableBOP(p.enable_bop), enableOpt(p.enable_opt), enableXsstream(p.enable_xsstream), phtEarlyUpdate(p.pht_early_update), @@ -174,9 +175,10 @@ XSCompositePrefetcher::calculatePrefetch(const PrefetchInfo &pfi, std::vector