From cfb4e34c61f1288fb5224d9f9acae30e1b3038f1 Mon Sep 17 00:00:00 2001 From: "Shen, Wanglei" Date: Fri, 27 Dec 2024 21:28:12 +0800 Subject: [PATCH] identify arm64 and thoughput setting --- .../intel_cpu/src/cpu_streams_calculation.cpp | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp index 3af6a52d5f3342..0aeff0aafc0b44 100644 --- a/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp +++ b/src/plugins/intel_cpu/src/cpu_streams_calculation.cpp @@ -9,6 +9,9 @@ #include #include +#if (defined(OPENVINO_ARCH_ARM64) && defined(__linux__)) +# include "cpu/aarch64/cpu_isa_traits.hpp" +#endif #include "cpu_map_scheduling.hpp" #include "graph.h" #include "openvino/op/fake_quantize.hpp" @@ -552,6 +555,16 @@ int get_model_prefer_threads(const int num_streams, const int sockets = get_num_sockets(); auto model_prefer = 0; if (-1 == config.modelPreferThreads) { +#if (defined(OPENVINO_ARCH_ARM64) && defined(__linux__)) + config.modelPreferThreads = 8; + if (dnnl::impl::cpu::aarch64::mayiuse(dnnl::impl::cpu::aarch64::cpu_isa_t::isa_all) || + dnnl::impl::cpu::aarch64::mayiuse(dnnl::impl::cpu::aarch64::cpu_isa_t::sve_512) || + dnnl::impl::cpu::aarch64::mayiuse(dnnl::impl::cpu::aarch64::cpu_isa_t::sve_384) || + dnnl::impl::cpu::aarch64::mayiuse(dnnl::impl::cpu::aarch64::cpu_isa_t::sve_256) || + dnnl::impl::cpu::aarch64::mayiuse(dnnl::impl::cpu::aarch64::cpu_isa_t::sve_128)) { + config.modelPreferThreads = 16; + } +#else const auto isa = dnnl::get_effective_cpu_isa(); float isaSpecificThreshold = 1.0f; switch (isa) { @@ -579,7 +592,7 @@ int get_model_prefer_threads(const int num_streams, ov::MemBandwidthPressure networkToleranceForLowCache = ov::mem_bandwidth_pressure_tolerance(model, L2_cache_size, memThresholdAssumeLimitedForISA); -#if ((defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)) && defined(__linux__)) +# if (defined(OPENVINO_ARCH_ARM) && defined(__linux__)) config.modelPreferThreads = 4; if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) { if (networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) { @@ -590,7 +603,7 @@ int get_model_prefer_threads(const int num_streams, (networkToleranceForLowCache.ratio_mem_limited_gemms > ov::MemBandwidthPressure::LIMITED))) { config.modelPreferThreads = 8; } -#elif ((defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)) && defined(__APPLE__)) +# elif ((defined(OPENVINO_ARCH_ARM) || defined(OPENVINO_ARCH_ARM64)) && defined(__APPLE__)) config.modelPreferThreads = 1; if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) { if ((networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) || @@ -612,7 +625,7 @@ int get_model_prefer_threads(const int num_streams, networkToleranceForLowCache.ratio_compute_convs > ov::MemBandwidthPressure::LIMITED) { config.modelPreferThreads = 2; } -#else +# else config.modelPreferThreads = 0; if (networkToleranceForLowCache.max_mem_tolerance == ov::MemBandwidthPressure::UNKNOWN) { if ((networkToleranceForLowCache.ratio_compute_convs == ov::MemBandwidthPressure::ALL) || @@ -631,6 +644,7 @@ int get_model_prefer_threads(const int num_streams, (proc_type_table[0][HYPER_THREADING_PROC] == proc_type_table[0][MAIN_CORE_PROC])) { config.modelPreferThreads = 2; } +# endif #endif }