From 3cee59bcc3662354151656fe430f1483f0506a22 Mon Sep 17 00:00:00 2001 From: henrika Date: Fri, 3 May 2024 16:20:52 +0000 Subject: [PATCH] Measure GPU main thread contention All changes are behind the "enable-gpu-main-time-keeper-metrics" command-line flag. The new metrics are not added to XML since they are mainly intended for local tests in our testbed. Only fully supported on Windows and Mac as is. Without the flag this change is basically a nop. [1] With the flag: CrGpuMain is added as thread suffix to the already existing Scheduling.ThreadController.Xxx UMAs. The ThreadController::RunLevelTracker::RunLevel::RunLevel instance for thread name "CrGpuMain" will then disable subsampling and provide all its UMA on a new and higher rate to support the new wall-time based metrics. Three new UMAs are produced for the .CrGpuMain suffix (and for .Other) and they are aggregated for ~1 second before being logged: Scheduling.ThreadController.ActiveVsWallTimePercentage Scheduling.ThreadController.ActiveOnCpuVsWallTimePercentage Scheduling.ThreadController.ActiveOffCpuVsWallTimePercentage where "Active = ActiveOnCpu + ActiveOffCpu". The aggregated metrics are reset after each UMA metric has been added. See [2] for details. An extensive set of tests have been performed on two different Windows devices using https://henrik-and.github.io/crop-demo/ as driver for GPU load. Example for a high GPU load is given by [3] and one for a low load by [4]. [1] A minor "total_idle_time_ += idle_time" is done in ThreadController::RunLevelTracker::RunLevel::LogOnActiveMetrics also without the flag. [2] https://screenshot.googleplex.com/pL9KhwzqYU95unk [3] https://paste.googleplex.com/6295780167516160 [4] https://paste.googleplex.com/6477783567433728 Bug: b/332864440 Change-Id: I354a0bf55beb2d785efdeb4e2bf64ec8d0d9edbb Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5447513 Reviewed-by: Gabriel Charette Commit-Queue: Henrik Andreasson Cr-Commit-Position: refs/heads/main@{#1296137} --- base/task/current_thread.cc | 6 +- base/task/current_thread.h | 7 +- .../sequence_manager/sequence_manager_impl.cc | 6 +- .../sequence_manager/sequence_manager_impl.h | 4 +- .../sequence_manager/thread_controller.cc | 73 +++++++++++++++++-- .../task/sequence_manager/thread_controller.h | 28 ++++++- ...troller_with_message_pump_impl_unittest.cc | 4 +- content/browser/gpu/gpu_process_host.cc | 1 + content/gpu/gpu_main.cc | 10 +++ gpu/config/gpu_switches.cc | 5 ++ gpu/config/gpu_switches.h | 1 + 11 files changed, 127 insertions(+), 18 deletions(-) diff --git a/base/task/current_thread.cc b/base/task/current_thread.cc index a93ee7e4213cb3..c4f8862b21ccfe 100644 --- a/base/task/current_thread.cc +++ b/base/task/current_thread.cc @@ -71,8 +71,10 @@ bool CurrentThread::IsIdleForTesting() { } void CurrentThread::EnableMessagePumpTimeKeeperMetrics( - const char* thread_name) { - return current_->EnableMessagePumpTimeKeeperMetrics(thread_name); + const char* thread_name, + bool wall_time_based_metrics_enabled_for_testing) { + return current_->EnableMessagePumpTimeKeeperMetrics( + thread_name, wall_time_based_metrics_enabled_for_testing); } void CurrentThread::AddTaskObserver(TaskObserver* task_observer) { diff --git a/base/task/current_thread.h b/base/task/current_thread.h index 28c074dcbd6366..6e21e7299afbf2 100644 --- a/base/task/current_thread.h +++ b/base/task/current_thread.h @@ -209,7 +209,12 @@ class BASE_EXPORT CurrentThread { // Enables ThreadControllerWithMessagePumpImpl's TimeKeeper metrics. // `thread_name` will be used as a suffix. - void EnableMessagePumpTimeKeeperMetrics(const char* thread_name); + // Setting `wall_time_based_metrics_enabled_for_testing` adds wall-time + // based metrics for this thread. This is only for test environments as it + // disables subsampling. + void EnableMessagePumpTimeKeeperMetrics( + const char* thread_name, + bool wall_time_based_metrics_enabled_for_testing = false); protected: explicit CurrentThread( diff --git a/base/task/sequence_manager/sequence_manager_impl.cc b/base/task/sequence_manager/sequence_manager_impl.cc index 417229d29bfb4f..706c384b5d0baa 100644 --- a/base/task/sequence_manager/sequence_manager_impl.cc +++ b/base/task/sequence_manager/sequence_manager_impl.cc @@ -1130,8 +1130,10 @@ bool SequenceManagerImpl::IsIdleForTesting() { } void SequenceManagerImpl::EnableMessagePumpTimeKeeperMetrics( - const char* thread_name) { - controller_->EnableMessagePumpTimeKeeperMetrics(thread_name); + const char* thread_name, + bool wall_time_based_metrics_enabled_for_testing) { + controller_->EnableMessagePumpTimeKeeperMetrics( + thread_name, wall_time_based_metrics_enabled_for_testing); } size_t SequenceManagerImpl::GetPendingTaskCountForTesting() const { diff --git a/base/task/sequence_manager/sequence_manager_impl.h b/base/task/sequence_manager/sequence_manager_impl.h index 9c0bc8e4b5eb27..211db4875134ef 100644 --- a/base/task/sequence_manager/sequence_manager_impl.h +++ b/base/task/sequence_manager/sequence_manager_impl.h @@ -171,7 +171,9 @@ class BASE_EXPORT SequenceManagerImpl void AttachToMessagePump(); #endif bool IsIdleForTesting() override; - void EnableMessagePumpTimeKeeperMetrics(const char* thread_name); + void EnableMessagePumpTimeKeeperMetrics( + const char* thread_name, + bool wall_time_based_metrics_enabled_for_testing = false); // Requests that a task to process work is scheduled. void ScheduleWork(); diff --git a/base/task/sequence_manager/thread_controller.cc b/base/task/sequence_manager/thread_controller.cc index a8e9cc70a3f1d4..5bf8ce736d5b39 100644 --- a/base/task/sequence_manager/thread_controller.cc +++ b/base/task/sequence_manager/thread_controller.cc @@ -117,23 +117,30 @@ std::string ThreadController::RunLevelTracker::RunLevel::GetSuffixForHistogram( } void ThreadController::EnableMessagePumpTimeKeeperMetrics( - const char* thread_name) { + const char* thread_name, + bool wall_time_based_metrics_enabled_for_testing) { // MessagePump runs too fast, a low-res clock would result in noisy metrics. if (!base::TimeTicks::IsHighResolution()) return; - run_level_tracker_.EnableTimeKeeperMetrics(thread_name); + run_level_tracker_.EnableTimeKeeperMetrics( + thread_name, wall_time_based_metrics_enabled_for_testing); } void ThreadController::RunLevelTracker::EnableTimeKeeperMetrics( - const char* thread_name) { - time_keeper_.EnableRecording(thread_name); + const char* thread_name, + bool wall_time_based_metrics_enabled_for_testing) { + time_keeper_.EnableRecording(thread_name, + wall_time_based_metrics_enabled_for_testing); } void ThreadController::RunLevelTracker::TimeKeeper::EnableRecording( - const char* thread_name) { + const char* thread_name, + bool wall_time_based_metrics_enabled_for_testing) { DCHECK(!histogram_); thread_name_ = thread_name; + wall_time_based_metrics_enabled_for_testing_ = + wall_time_based_metrics_enabled_for_testing; histogram_ = LinearHistogram::FactoryGet( JoinString({"Scheduling.MessagePumpTimeKeeper", thread_name}, "."), 1, @@ -162,8 +169,7 @@ void ThreadController::RunLevelTracker::OnRunLoopStarted(State initial_state, DCHECK_CALLED_ON_VALID_THREAD(outer_->associated_thread_->thread_checker); const bool is_nested = !run_levels_.empty(); - run_levels_.emplace(initial_state, is_nested, time_keeper_, lazy_now - ); + run_levels_.emplace(initial_state, is_nested, time_keeper_, lazy_now); // In unit tests, RunLoop::Run() acts as the initial wake-up. if (!is_nested && initial_state != kIdle) @@ -322,6 +328,13 @@ ThreadController::RunLevelTracker::RunLevel::~RunLevel() { ThreadController::RunLevelTracker::RunLevel::RunLevel(RunLevel&& other) = default; +void ThreadController::RunLevelTracker::RunLevel::LogPercentageMetric( + const char* name, + int percentage) { + UmaHistogramPercentage( + base::StrCat({name, base::StrCat({".", GetThreadName()})}), percentage); +} + void ThreadController::RunLevelTracker::RunLevel::LogPercentageMetric( const char* name, int percentage, @@ -361,12 +374,18 @@ void ThreadController::RunLevelTracker::RunLevel::LogOnActiveMetrics( LogIntervalMetric("Scheduling.ThreadController.IdleDuration", idle_time, idle_time); last_active_end_ = base::TimeTicks(); + accumulated_idle_time_ += idle_time; } // Taking thread ticks can be expensive. Make sure to do it rarely enough to // not have a discernible impact on performance. static const bool thread_ticks_supported = ThreadTicks::IsSupported(); - if (thread_ticks_supported && metrics_sub_sampler_.ShouldSample(0.001)) { + // Disable subsampling to support wall-time based metrics. Only supported for + // testing purposes. By default, the subsampling probability is 0.1%. + const double probability = + time_keeper_->wall_time_based_metrics_enabled_for_testing() ? 1.0 : 0.001; + if (thread_ticks_supported && + metrics_sub_sampler_.ShouldSample(probability)) { last_active_start_ = lazy_now.Now(); last_active_threadtick_start_ = ThreadTicks::Now(); } @@ -402,6 +421,44 @@ void ThreadController::RunLevelTracker::RunLevel::LogOnIdleMetrics( "Scheduling.ThreadController.ActiveIntervalOnCpuPercentage", active_interval_cpu_percentage, elapsed_ticks); + if (time_keeper_->wall_time_based_metrics_enabled_for_testing()) { + accumulated_active_time_ += elapsed_ticks; + accumulated_active_on_cpu_time_ += elapsed_thread_ticks; + accumulated_active_off_cpu_time_ += + (elapsed_ticks - elapsed_thread_ticks); + + // Accumulated wall-time since last wall-time based metric was stored. + const base::TimeDelta accumulated_wall_time = + accumulated_active_time_ + accumulated_idle_time_; + + // Add wall-time based ratio metrics (in percent) when the total sum of + // active and idle times is larger than one second. + if (accumulated_wall_time > Seconds(1)) { + const int active_vs_wall_time_percentage = checked_cast( + (accumulated_active_time_ * 100).IntDiv(accumulated_wall_time)); + LogPercentageMetric( + "Scheduling.ThreadController.ActiveVsWallTimePercentage", + active_vs_wall_time_percentage); + const int active_on_cpu_vs_wall_time_percentage = + checked_cast((accumulated_active_on_cpu_time_ * 100) + .IntDiv(accumulated_wall_time)); + LogPercentageMetric( + "Scheduling.ThreadController.ActiveOnCpuVsWallTimePercentage", + active_on_cpu_vs_wall_time_percentage); + const int active_off_cpu_vs_wall_time_percentage = + checked_cast((accumulated_active_off_cpu_time_ * 100) + .IntDiv(accumulated_wall_time)); + LogPercentageMetric( + "Scheduling.ThreadController.ActiveOffCpuVsWallTimePercentage", + active_off_cpu_vs_wall_time_percentage); + + accumulated_idle_time_ = base::TimeDelta(); + accumulated_active_time_ = base::TimeDelta(); + accumulated_active_on_cpu_time_ = base::TimeDelta(); + accumulated_active_off_cpu_time_ = base::TimeDelta(); + } + } + // Reset timings. last_active_start_ = base::TimeTicks(); last_active_threadtick_start_ = base::ThreadTicks(); diff --git a/base/task/sequence_manager/thread_controller.h b/base/task/sequence_manager/thread_controller.h index 12b78c3f4df16f..6ee818e299549e 100644 --- a/base/task/sequence_manager/thread_controller.h +++ b/base/task/sequence_manager/thread_controller.h @@ -7,6 +7,7 @@ #include #include +#include #include #include @@ -149,7 +150,11 @@ class BASE_EXPORT ThreadController { features::EmitThreadControllerProfilerMetadata emit_profiler_metadata); // Enables TimeKeeper metrics. `thread_name` will be used as a suffix. - void EnableMessagePumpTimeKeeperMetrics(const char* thread_name); + // Setting `wall_time_based_metrics_enabled_for_testing` adds wall-time + // based metrics for this thread. It also also disables subsampling. + void EnableMessagePumpTimeKeeperMetrics( + const char* thread_name, + bool wall_time_based_metrics_enabled_for_testing); // Currently only overridden on ThreadControllerWithMessagePumpImpl. // @@ -191,6 +196,9 @@ class BASE_EXPORT ThreadController { // would result in an inconsistent set of DCHECKs... raw_ptr time_source_; // Not owned. + // Whether or not wall-time based metrics are enabled. + bool wall_time_based_metrics_enabled_for_testing_; + // Tracks the state of each run-level (main and nested ones) in its associated // ThreadController. It does so using two high-level principles: // 1) #work-in-work-implies-nested : @@ -269,7 +277,9 @@ class BASE_EXPORT ThreadController { // RunLevelTracker. void RecordScheduleWork(); - void EnableTimeKeeperMetrics(const char* thread_name); + void EnableTimeKeeperMetrics( + const char* thread_name, + bool wall_time_based_metrics_for_testing_enabled); // Observes changes of state sent as trace-events so they can be tested. class TraceObserverForTesting { @@ -295,7 +305,8 @@ class BASE_EXPORT ThreadController { public: explicit TimeKeeper(const RunLevelTracker& outer); - void EnableRecording(const char* thread_name); + void EnableRecording(const char* thread_name, + bool wall_time_based_metrics_enabled_for_testing); // Records the start time of the first phase out-of-idle. The kScheduled // phase will be attributed the time before this point once its @@ -322,6 +333,10 @@ class BASE_EXPORT ThreadController { const std::string& thread_name() const { return thread_name_; } + bool wall_time_based_metrics_enabled_for_testing() const { + return wall_time_based_metrics_enabled_for_testing_; + } + private: enum class ShouldRecordReqs { // Regular should-record requirements. @@ -345,6 +360,8 @@ class BASE_EXPORT ThreadController { static const char* PhaseToEventName(Phase phase); std::string thread_name_; + // Whether or not wall-time based metrics are reported. + bool wall_time_based_metrics_enabled_for_testing_ = false; // Cumulative time deltas for each phase, reported and reset when >=100ms. std::array deltas_ = {}; // Set at the start of the first work item out-of-idle. Consumed from the @@ -397,6 +414,7 @@ class BASE_EXPORT ThreadController { } private: + void LogPercentageMetric(const char* name, int value); void LogPercentageMetric(const char* name, int value, base::TimeDelta interval_duration); @@ -409,6 +427,10 @@ class BASE_EXPORT ThreadController { base::TimeTicks last_active_end_; base::TimeTicks last_active_start_; base::ThreadTicks last_active_threadtick_start_; + base::TimeDelta accumulated_idle_time_; + base::TimeDelta accumulated_active_time_; + base::TimeDelta accumulated_active_on_cpu_time_; + base::TimeDelta accumulated_active_off_cpu_time_; MetricsSubSampler metrics_sub_sampler_; State state_ = kIdle; diff --git a/base/task/sequence_manager/thread_controller_with_message_pump_impl_unittest.cc b/base/task/sequence_manager/thread_controller_with_message_pump_impl_unittest.cc index a9fe5993e5a98b..858347bced5a15 100644 --- a/base/task/sequence_manager/thread_controller_with_message_pump_impl_unittest.cc +++ b/base/task/sequence_manager/thread_controller_with_message_pump_impl_unittest.cc @@ -79,7 +79,9 @@ class ThreadControllerForTest : public ThreadControllerWithMessagePumpImpl { // EnableMessagePumpTimeKeeperMetrics is a no-op on hardware which lacks // high-res clocks. ASSERT_TRUE(TimeTicks::IsHighResolution()); - EnableMessagePumpTimeKeeperMetrics("TestMainThread"); + EnableMessagePumpTimeKeeperMetrics( + "TestMainThread", + /*wall_time_based_metrics_enabled_for_testing=*/false); } // Optionally emplaced, strict from then on. diff --git a/content/browser/gpu/gpu_process_host.cc b/content/browser/gpu/gpu_process_host.cc index a2ce50a01d5f8f..03f3f1335f3292 100644 --- a/content/browser/gpu/gpu_process_host.cc +++ b/content/browser/gpu/gpu_process_host.cc @@ -260,6 +260,7 @@ static const char* const kSwitchNames[] = { switches::kDisableSkiaRuntimeOpts, switches::kDRMVirtualConnectorIsExternal, switches::kEnableBackgroundThreadPool, + switches::kEnableGpuMainTimeKeeperMetrics, switches::kEnableGpuRasterization, switches::kEnableSkiaGraphite, switches::kDoubleBufferCompositing, diff --git a/content/gpu/gpu_main.cc b/content/gpu/gpu_main.cc index 96f66d5e238528..bb019363bf40f8 100644 --- a/content/gpu/gpu_main.cc +++ b/content/gpu/gpu_main.cc @@ -24,6 +24,7 @@ #include "base/run_loop.h" #include "base/strings/string_number_conversions.h" #include "base/system/sys_info.h" +#include "base/task/current_thread.h" #include "base/task/single_thread_task_executor.h" #include "base/task/thread_pool/thread_pool_instance.h" #include "base/threading/platform_thread.h" @@ -423,6 +424,15 @@ int GpuMain(MainFunctionParams parameters) { base::HighResolutionTimerManager hi_res_timer_manager; + // Adds support of wall-time based TimerKeeper metrics for the main GPU thread + // when command-line flag is set. CrGpuMain will be used as suffix for each + // metric. + if (command_line.HasSwitch(switches::kEnableGpuMainTimeKeeperMetrics)) { + base::CurrentThread::Get()->EnableMessagePumpTimeKeeperMetrics( + "CrGpuMain", + /*wall_time_based_metrics_enabled_for_testing=*/true); + } + { TRACE_EVENT0("gpu", "Run Message Loop"); run_loop.Run(); diff --git a/gpu/config/gpu_switches.cc b/gpu/config/gpu_switches.cc index 131a080b2f5a79..d83ef266cb1998 100644 --- a/gpu/config/gpu_switches.cc +++ b/gpu/config/gpu_switches.cc @@ -152,4 +152,9 @@ const char kShaderCachePath[] = "shader-cache-path"; // https://learn.microsoft.com/en-us/windows/ai/directml/dml-version-history const char kUseRedistributableDirectML[] = "use-redist-dml"; +// Enables ThreadControllerWithMessagePumpImpl's TimeKeeper UMA metrics using +// CrGpuMain as suffix. +const char kEnableGpuMainTimeKeeperMetrics[] = + "enable-gpu-main-time-keeper-metrics"; + } // namespace switches diff --git a/gpu/config/gpu_switches.h b/gpu/config/gpu_switches.h index c1db72c004b405..af9a2533665422 100644 --- a/gpu/config/gpu_switches.h +++ b/gpu/config/gpu_switches.h @@ -53,6 +53,7 @@ GPU_EXPORT extern const char kShaderCachePath[]; GPU_EXPORT extern const char kDisableSkiaGraphite[]; GPU_EXPORT extern const char kEnableSkiaGraphite[]; GPU_EXPORT extern const char kUseRedistributableDirectML[]; +GPU_EXPORT extern const char kEnableGpuMainTimeKeeperMetrics[]; } // namespace switches