diff --git a/base/Base.jl b/base/Base.jl index d2ef6b1b42a7f..fbd5db6913c52 100644 --- a/base/Base.jl +++ b/base/Base.jl @@ -425,7 +425,8 @@ include("weakkeydict.jl") include("scopedvalues.jl") # used by task.jl -const Workqueue_sched_times = ScopedValues.ScopedValue{Vector{UInt}}() +const task_times_per_thread = ScopedValues.ScopedValue{Vector{UInt}}() +const sleep_times_per_thread = ScopedValues.ScopedValue{Vector{UInt}}() # metaprogramming include("meta.jl") diff --git a/base/task.jl b/base/task.jl index b6dee8a760c56..44d437cf3f647 100644 --- a/base/task.jl +++ b/base/task.jl @@ -1100,7 +1100,7 @@ function try_yieldto(undo) try # when timing time spent in the scheduler we time the time spent in the task # then subtract the elapsed time in `@timed` from the total time to get the scheduler time - t, ts = isassigned(Workqueue_sched_times) ? (time_ns(), Workqueue_sched_times[]) : (nothing, nothing) + t, ts = isassigned(task_times_per_thread) ? (time_ns(), task_times_per_thread[]) : (nothing, nothing) ccall(:jl_switch, Cvoid, ()) if ts !== nothing t = time_ns() - t @@ -1181,8 +1181,19 @@ end function wait() GC.safepoint() - W = workqueue_for(Threads.threadid()) + tid = Threads.threadid() + W = workqueue_for(tid) + # optional sleep timing + t, ts = isassigned(sleep_times_per_thread) ? (time_ns(), sleep_times_per_thread[]) : (nothing, nothing) poptask(W) + if ts !== nothing # sleep timing is enabled + t = time_ns() - t + if tid <= length(ts) + ts[tid] += t + end + # TODO: grow the array if needed i.e. if a thread is added within the `@timed` block + end + result = try_yieldto(ensure_rescheduled) process_events() # return when we come out of the queue diff --git a/base/timing.jl b/base/timing.jl index 6be8cd9bcfe47..8595663816c8e 100644 --- a/base/timing.jl +++ b/base/timing.jl @@ -196,10 +196,10 @@ function format_bytes(bytes; binary=true) # also used by InteractiveUtils end end -function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_conflicts=0, compile_time=0, recompile_time=0, wall_time_sched=0, +function time_print(io::IO, elapsedtime, bytes=0, gctime=0, allocs=0, lock_conflicts=0, compile_time=0, recompile_time=0, sched_time_avg=0, newline=false; msg::Union{String,Nothing}=nothing) timestr = Ryu.writefixed(Float64(elapsedtime/1e9), 6) - wall_time_sched_perc = wall_time_sched / (elapsedtime / 1e9) + wall_time_sched_perc = sched_time_avg / (elapsedtime / 1e9) sched_thresh = 0.1 str = sprint() do io if msg isa String @@ -354,7 +354,7 @@ macro time(msg, ex) local ret = @timed $(esc(ex)) local _msg = $(esc(msg)) local _msg_str = _msg === nothing ? _msg : string(_msg) - time_print(stdout, ret.time*1e9, ret.gcstats.allocd, ret.gcstats.total_time, gc_alloc_count(ret.gcstats), ret.lock_conflicts, ret.compile_time*1e9, ret.recompile_time*1e9, ret.wall_time_sched, true; msg=_msg_str) + time_print(stdout, ret.time*1e9, ret.gcstats.allocd, ret.gcstats.total_time, gc_alloc_count(ret.gcstats), ret.lock_conflicts, ret.compile_time*1e9, ret.recompile_time*1e9, ret.sched_time_avg, true; msg=_msg_str) ret.value end end @@ -608,7 +608,7 @@ julia> stats.recompile_time macro timed(ex) quote Experimental.@force_compile - ScopedValues.@with Workqueue_sched_times => zeros(UInt, Threads.maxthreadid()) begin + ScopedValues.@with task_times_per_thread => zeros(UInt, Threads.maxthreadid()) sleep_times_per_thread => zeros(UInt, Threads.maxthreadid()) begin Experimental.@force_compile Threads.lock_profiling(true) local lock_conflicts = Threads.LOCK_CONFLICT_COUNT[] @@ -624,9 +624,15 @@ macro timed(ex) Threads.lock_profiling(false)) ) local diff = GC_Diff(gc_num(), stats) - # filter out zeros which can only happen if nothing was scheduled - local sched_times = Int.(filter(>(0), Workqueue_sched_times[])) - local wall_time_sched = isempty(sched_times) ? 0 : sum(Int(elapsedtime) .- sched_times) / length(sched_times) + local sched_times = Int[] + for i in 1:Threads.maxthreadid() + # filter out zeros in task timers which can only happen if nothing was scheduled + if task_times_per_thread[][i] != 0 + # subtract task and sleep times from global elapsed time to get scheduling time per thread + push!(sched_times, Int(elapsedtime) - Int(task_times_per_thread[][i]) - Int(sleep_times_per_thread[][i])) + end + end + local sched_time_avg = isempty(sched_times) ? 0 : sum(sched_times) / length(sched_times) ( value=val, @@ -637,7 +643,7 @@ macro timed(ex) lock_conflicts=lock_conflicts, compile_time=compile_elapsedtimes[1]/1e9, recompile_time=compile_elapsedtimes[2]/1e9, - wall_time_sched=wall_time_sched/1e9 + sched_time_avg=sched_time_avg/1e9 ) end end