Skip to content

Commit

Permalink
Measure GPU main thread contention
Browse files Browse the repository at this point in the history
All changes are behind the "enable-gpu-main-time-keeper-metrics"
command-line flag. The new metrics are not added to XML since they are
mainly intended for local tests in our testbed.

Only fully supported on Windows and Mac as is.

Without the flag this change is basically a nop. [1]

With the flag:

CrGpuMain is added as thread suffix to the already existing
Scheduling.ThreadController.Xxx UMAs. The
ThreadController::RunLevelTracker::RunLevel::RunLevel instance for
thread name "CrGpuMain" will then disable subsampling and provide all
its UMA on a new and higher rate to support the new wall-time based
metrics.

Three new UMAs are produced for the .CrGpuMain suffix (and for .Other)
and they are aggregated for ~1 second before being logged:

Scheduling.ThreadController.ActiveVsWallTimePercentage
Scheduling.ThreadController.ActiveOnCpuVsWallTimePercentage
Scheduling.ThreadController.ActiveOffCpuVsWallTimePercentage

where "Active = ActiveOnCpu + ActiveOffCpu".

The aggregated metrics are reset after each UMA metric has been added.

See [2] for details.

An extensive set of tests have been performed on two different Windows
devices using https://henrik-and.github.io/crop-demo/ as driver for
GPU load.

Example for a high GPU load is given by [3] and one for a low load
by [4].

[1] A minor "total_idle_time_ += idle_time" is done in
ThreadController::RunLevelTracker::RunLevel::LogOnActiveMetrics also
without the flag.
[2] https://screenshot.googleplex.com/pL9KhwzqYU95unk
[3] https://paste.googleplex.com/6295780167516160
[4] https://paste.googleplex.com/6477783567433728

Bug: b/332864440
Change-Id: I354a0bf55beb2d785efdeb4e2bf64ec8d0d9edbb
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/5447513
Reviewed-by: Gabriel Charette <[email protected]>
Commit-Queue: Henrik Andreasson <[email protected]>
Cr-Commit-Position: refs/heads/main@{#1296137}
  • Loading branch information
henrikand authored and Chromium LUCI CQ committed May 3, 2024
1 parent 8e36899 commit 3cee59b
Show file tree
Hide file tree
Showing 11 changed files with 127 additions and 18 deletions.
6 changes: 4 additions & 2 deletions base/task/current_thread.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,10 @@ bool CurrentThread::IsIdleForTesting() {
}

void CurrentThread::EnableMessagePumpTimeKeeperMetrics(
const char* thread_name) {
return current_->EnableMessagePumpTimeKeeperMetrics(thread_name);
const char* thread_name,
bool wall_time_based_metrics_enabled_for_testing) {
return current_->EnableMessagePumpTimeKeeperMetrics(
thread_name, wall_time_based_metrics_enabled_for_testing);
}

void CurrentThread::AddTaskObserver(TaskObserver* task_observer) {
Expand Down
7 changes: 6 additions & 1 deletion base/task/current_thread.h
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,12 @@ class BASE_EXPORT CurrentThread {

// Enables ThreadControllerWithMessagePumpImpl's TimeKeeper metrics.
// `thread_name` will be used as a suffix.
void EnableMessagePumpTimeKeeperMetrics(const char* thread_name);
// Setting `wall_time_based_metrics_enabled_for_testing` adds wall-time
// based metrics for this thread. This is only for test environments as it
// disables subsampling.
void EnableMessagePumpTimeKeeperMetrics(
const char* thread_name,
bool wall_time_based_metrics_enabled_for_testing = false);

protected:
explicit CurrentThread(
Expand Down
6 changes: 4 additions & 2 deletions base/task/sequence_manager/sequence_manager_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1130,8 +1130,10 @@ bool SequenceManagerImpl::IsIdleForTesting() {
}

void SequenceManagerImpl::EnableMessagePumpTimeKeeperMetrics(
const char* thread_name) {
controller_->EnableMessagePumpTimeKeeperMetrics(thread_name);
const char* thread_name,
bool wall_time_based_metrics_enabled_for_testing) {
controller_->EnableMessagePumpTimeKeeperMetrics(
thread_name, wall_time_based_metrics_enabled_for_testing);
}

size_t SequenceManagerImpl::GetPendingTaskCountForTesting() const {
Expand Down
4 changes: 3 additions & 1 deletion base/task/sequence_manager/sequence_manager_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,9 @@ class BASE_EXPORT SequenceManagerImpl
void AttachToMessagePump();
#endif
bool IsIdleForTesting() override;
void EnableMessagePumpTimeKeeperMetrics(const char* thread_name);
void EnableMessagePumpTimeKeeperMetrics(
const char* thread_name,
bool wall_time_based_metrics_enabled_for_testing = false);

// Requests that a task to process work is scheduled.
void ScheduleWork();
Expand Down
73 changes: 65 additions & 8 deletions base/task/sequence_manager/thread_controller.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,23 +117,30 @@ std::string ThreadController::RunLevelTracker::RunLevel::GetSuffixForHistogram(
}

void ThreadController::EnableMessagePumpTimeKeeperMetrics(
const char* thread_name) {
const char* thread_name,
bool wall_time_based_metrics_enabled_for_testing) {
// MessagePump runs too fast, a low-res clock would result in noisy metrics.
if (!base::TimeTicks::IsHighResolution())
return;

run_level_tracker_.EnableTimeKeeperMetrics(thread_name);
run_level_tracker_.EnableTimeKeeperMetrics(
thread_name, wall_time_based_metrics_enabled_for_testing);
}

void ThreadController::RunLevelTracker::EnableTimeKeeperMetrics(
const char* thread_name) {
time_keeper_.EnableRecording(thread_name);
const char* thread_name,
bool wall_time_based_metrics_enabled_for_testing) {
time_keeper_.EnableRecording(thread_name,
wall_time_based_metrics_enabled_for_testing);
}

void ThreadController::RunLevelTracker::TimeKeeper::EnableRecording(
const char* thread_name) {
const char* thread_name,
bool wall_time_based_metrics_enabled_for_testing) {
DCHECK(!histogram_);
thread_name_ = thread_name;
wall_time_based_metrics_enabled_for_testing_ =
wall_time_based_metrics_enabled_for_testing;

histogram_ = LinearHistogram::FactoryGet(
JoinString({"Scheduling.MessagePumpTimeKeeper", thread_name}, "."), 1,
Expand Down Expand Up @@ -162,8 +169,7 @@ void ThreadController::RunLevelTracker::OnRunLoopStarted(State initial_state,
DCHECK_CALLED_ON_VALID_THREAD(outer_->associated_thread_->thread_checker);

const bool is_nested = !run_levels_.empty();
run_levels_.emplace(initial_state, is_nested, time_keeper_, lazy_now
);
run_levels_.emplace(initial_state, is_nested, time_keeper_, lazy_now);

// In unit tests, RunLoop::Run() acts as the initial wake-up.
if (!is_nested && initial_state != kIdle)
Expand Down Expand Up @@ -322,6 +328,13 @@ ThreadController::RunLevelTracker::RunLevel::~RunLevel() {
ThreadController::RunLevelTracker::RunLevel::RunLevel(RunLevel&& other) =
default;

void ThreadController::RunLevelTracker::RunLevel::LogPercentageMetric(
const char* name,
int percentage) {
UmaHistogramPercentage(
base::StrCat({name, base::StrCat({".", GetThreadName()})}), percentage);
}

void ThreadController::RunLevelTracker::RunLevel::LogPercentageMetric(
const char* name,
int percentage,
Expand Down Expand Up @@ -361,12 +374,18 @@ void ThreadController::RunLevelTracker::RunLevel::LogOnActiveMetrics(
LogIntervalMetric("Scheduling.ThreadController.IdleDuration", idle_time,
idle_time);
last_active_end_ = base::TimeTicks();
accumulated_idle_time_ += idle_time;
}

// Taking thread ticks can be expensive. Make sure to do it rarely enough to
// not have a discernible impact on performance.
static const bool thread_ticks_supported = ThreadTicks::IsSupported();
if (thread_ticks_supported && metrics_sub_sampler_.ShouldSample(0.001)) {
// Disable subsampling to support wall-time based metrics. Only supported for
// testing purposes. By default, the subsampling probability is 0.1%.
const double probability =
time_keeper_->wall_time_based_metrics_enabled_for_testing() ? 1.0 : 0.001;
if (thread_ticks_supported &&
metrics_sub_sampler_.ShouldSample(probability)) {
last_active_start_ = lazy_now.Now();
last_active_threadtick_start_ = ThreadTicks::Now();
}
Expand Down Expand Up @@ -402,6 +421,44 @@ void ThreadController::RunLevelTracker::RunLevel::LogOnIdleMetrics(
"Scheduling.ThreadController.ActiveIntervalOnCpuPercentage",
active_interval_cpu_percentage, elapsed_ticks);

if (time_keeper_->wall_time_based_metrics_enabled_for_testing()) {
accumulated_active_time_ += elapsed_ticks;
accumulated_active_on_cpu_time_ += elapsed_thread_ticks;
accumulated_active_off_cpu_time_ +=
(elapsed_ticks - elapsed_thread_ticks);

// Accumulated wall-time since last wall-time based metric was stored.
const base::TimeDelta accumulated_wall_time =
accumulated_active_time_ + accumulated_idle_time_;

// Add wall-time based ratio metrics (in percent) when the total sum of
// active and idle times is larger than one second.
if (accumulated_wall_time > Seconds(1)) {
const int active_vs_wall_time_percentage = checked_cast<int>(
(accumulated_active_time_ * 100).IntDiv(accumulated_wall_time));
LogPercentageMetric(
"Scheduling.ThreadController.ActiveVsWallTimePercentage",
active_vs_wall_time_percentage);
const int active_on_cpu_vs_wall_time_percentage =
checked_cast<int>((accumulated_active_on_cpu_time_ * 100)
.IntDiv(accumulated_wall_time));
LogPercentageMetric(
"Scheduling.ThreadController.ActiveOnCpuVsWallTimePercentage",
active_on_cpu_vs_wall_time_percentage);
const int active_off_cpu_vs_wall_time_percentage =
checked_cast<int>((accumulated_active_off_cpu_time_ * 100)
.IntDiv(accumulated_wall_time));
LogPercentageMetric(
"Scheduling.ThreadController.ActiveOffCpuVsWallTimePercentage",
active_off_cpu_vs_wall_time_percentage);

accumulated_idle_time_ = base::TimeDelta();
accumulated_active_time_ = base::TimeDelta();
accumulated_active_on_cpu_time_ = base::TimeDelta();
accumulated_active_off_cpu_time_ = base::TimeDelta();
}
}

// Reset timings.
last_active_start_ = base::TimeTicks();
last_active_threadtick_start_ = base::ThreadTicks();
Expand Down
28 changes: 25 additions & 3 deletions base/task/sequence_manager/thread_controller.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#include <optional>
#include <stack>
#include <string>
#include <string_view>
#include <vector>

Expand Down Expand Up @@ -149,7 +150,11 @@ class BASE_EXPORT ThreadController {
features::EmitThreadControllerProfilerMetadata emit_profiler_metadata);

// Enables TimeKeeper metrics. `thread_name` will be used as a suffix.
void EnableMessagePumpTimeKeeperMetrics(const char* thread_name);
// Setting `wall_time_based_metrics_enabled_for_testing` adds wall-time
// based metrics for this thread. It also also disables subsampling.
void EnableMessagePumpTimeKeeperMetrics(
const char* thread_name,
bool wall_time_based_metrics_enabled_for_testing);

// Currently only overridden on ThreadControllerWithMessagePumpImpl.
//
Expand Down Expand Up @@ -191,6 +196,9 @@ class BASE_EXPORT ThreadController {
// would result in an inconsistent set of DCHECKs...
raw_ptr<const TickClock> time_source_; // Not owned.

// Whether or not wall-time based metrics are enabled.
bool wall_time_based_metrics_enabled_for_testing_;

// Tracks the state of each run-level (main and nested ones) in its associated
// ThreadController. It does so using two high-level principles:
// 1) #work-in-work-implies-nested :
Expand Down Expand Up @@ -269,7 +277,9 @@ class BASE_EXPORT ThreadController {
// RunLevelTracker.
void RecordScheduleWork();

void EnableTimeKeeperMetrics(const char* thread_name);
void EnableTimeKeeperMetrics(
const char* thread_name,
bool wall_time_based_metrics_for_testing_enabled);

// Observes changes of state sent as trace-events so they can be tested.
class TraceObserverForTesting {
Expand All @@ -295,7 +305,8 @@ class BASE_EXPORT ThreadController {
public:
explicit TimeKeeper(const RunLevelTracker& outer);

void EnableRecording(const char* thread_name);
void EnableRecording(const char* thread_name,
bool wall_time_based_metrics_enabled_for_testing);

// Records the start time of the first phase out-of-idle. The kScheduled
// phase will be attributed the time before this point once its
Expand All @@ -322,6 +333,10 @@ class BASE_EXPORT ThreadController {

const std::string& thread_name() const { return thread_name_; }

bool wall_time_based_metrics_enabled_for_testing() const {
return wall_time_based_metrics_enabled_for_testing_;
}

private:
enum class ShouldRecordReqs {
// Regular should-record requirements.
Expand All @@ -345,6 +360,8 @@ class BASE_EXPORT ThreadController {
static const char* PhaseToEventName(Phase phase);

std::string thread_name_;
// Whether or not wall-time based metrics are reported.
bool wall_time_based_metrics_enabled_for_testing_ = false;
// Cumulative time deltas for each phase, reported and reset when >=100ms.
std::array<TimeDelta, Phase::kLastPhase + 1> deltas_ = {};
// Set at the start of the first work item out-of-idle. Consumed from the
Expand Down Expand Up @@ -397,6 +414,7 @@ class BASE_EXPORT ThreadController {
}

private:
void LogPercentageMetric(const char* name, int value);
void LogPercentageMetric(const char* name,
int value,
base::TimeDelta interval_duration);
Expand All @@ -409,6 +427,10 @@ class BASE_EXPORT ThreadController {
base::TimeTicks last_active_end_;
base::TimeTicks last_active_start_;
base::ThreadTicks last_active_threadtick_start_;
base::TimeDelta accumulated_idle_time_;
base::TimeDelta accumulated_active_time_;
base::TimeDelta accumulated_active_on_cpu_time_;
base::TimeDelta accumulated_active_off_cpu_time_;
MetricsSubSampler metrics_sub_sampler_;

State state_ = kIdle;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,9 @@ class ThreadControllerForTest : public ThreadControllerWithMessagePumpImpl {
// EnableMessagePumpTimeKeeperMetrics is a no-op on hardware which lacks
// high-res clocks.
ASSERT_TRUE(TimeTicks::IsHighResolution());
EnableMessagePumpTimeKeeperMetrics("TestMainThread");
EnableMessagePumpTimeKeeperMetrics(
"TestMainThread",
/*wall_time_based_metrics_enabled_for_testing=*/false);
}

// Optionally emplaced, strict from then on.
Expand Down
1 change: 1 addition & 0 deletions content/browser/gpu/gpu_process_host.cc
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,7 @@ static const char* const kSwitchNames[] = {
switches::kDisableSkiaRuntimeOpts,
switches::kDRMVirtualConnectorIsExternal,
switches::kEnableBackgroundThreadPool,
switches::kEnableGpuMainTimeKeeperMetrics,
switches::kEnableGpuRasterization,
switches::kEnableSkiaGraphite,
switches::kDoubleBufferCompositing,
Expand Down
10 changes: 10 additions & 0 deletions content/gpu/gpu_main.cc
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "base/run_loop.h"
#include "base/strings/string_number_conversions.h"
#include "base/system/sys_info.h"
#include "base/task/current_thread.h"
#include "base/task/single_thread_task_executor.h"
#include "base/task/thread_pool/thread_pool_instance.h"
#include "base/threading/platform_thread.h"
Expand Down Expand Up @@ -423,6 +424,15 @@ int GpuMain(MainFunctionParams parameters) {

base::HighResolutionTimerManager hi_res_timer_manager;

// Adds support of wall-time based TimerKeeper metrics for the main GPU thread
// when command-line flag is set. CrGpuMain will be used as suffix for each
// metric.
if (command_line.HasSwitch(switches::kEnableGpuMainTimeKeeperMetrics)) {
base::CurrentThread::Get()->EnableMessagePumpTimeKeeperMetrics(
"CrGpuMain",
/*wall_time_based_metrics_enabled_for_testing=*/true);
}

{
TRACE_EVENT0("gpu", "Run Message Loop");
run_loop.Run();
Expand Down
5 changes: 5 additions & 0 deletions gpu/config/gpu_switches.cc
Original file line number Diff line number Diff line change
Expand Up @@ -152,4 +152,9 @@ const char kShaderCachePath[] = "shader-cache-path";
// https://learn.microsoft.com/en-us/windows/ai/directml/dml-version-history
const char kUseRedistributableDirectML[] = "use-redist-dml";

// Enables ThreadControllerWithMessagePumpImpl's TimeKeeper UMA metrics using
// CrGpuMain as suffix.
const char kEnableGpuMainTimeKeeperMetrics[] =
"enable-gpu-main-time-keeper-metrics";

} // namespace switches
1 change: 1 addition & 0 deletions gpu/config/gpu_switches.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ GPU_EXPORT extern const char kShaderCachePath[];
GPU_EXPORT extern const char kDisableSkiaGraphite[];
GPU_EXPORT extern const char kEnableSkiaGraphite[];
GPU_EXPORT extern const char kUseRedistributableDirectML[];
GPU_EXPORT extern const char kEnableGpuMainTimeKeeperMetrics[];

} // namespace switches

Expand Down

0 comments on commit 3cee59b

Please sign in to comment.