@@ -463,23 +463,17 @@ function addprocs(manager::ClusterManager; kwargs...)
463
463
464
464
cluster_mgmt_from_master_check ()
465
465
466
+ # Call worker-starting callbacks
467
+ warning_interval = params[:callback_warning_interval ]
468
+ _run_callbacks_concurrently (" worker-starting" , worker_starting_callbacks,
469
+ warning_interval, [(manager, kwargs)])
470
+
471
+ # Add new workers
466
472
new_workers = @lock worker_lock addprocs_locked (manager:: ClusterManager , params)
467
473
468
- callback_tasks = Dict {Any, Task} ()
469
- for worker in new_workers
470
- for (name, callback) in worker_added_callbacks
471
- callback_tasks[name] = Threads. @spawn callback (worker)
472
- end
473
- end
474
-
475
- running_callbacks = () -> [" '$(key) '" for (key, task) in callback_tasks if ! istaskdone (task)]
476
- while timedwait (() -> isempty (running_callbacks ()), params[:callback_warning_interval ]) === :timed_out
477
- callbacks_str = join (running_callbacks (), " , " )
478
- @warn " Waiting for these worker-added callbacks to finish: $(callbacks_str) "
479
- end
480
-
481
- # Wait on the tasks so that exceptions bubble up
482
- wait .(values (callback_tasks))
474
+ # Call worker-started callbacks
475
+ _run_callbacks_concurrently (" worker-started" , worker_started_callbacks,
476
+ warning_interval, new_workers)
483
477
484
478
return new_workers
485
479
end
@@ -870,7 +864,8 @@ const HDR_COOKIE_LEN=16
870
864
const map_pid_wrkr = Dict {Int, Union{Worker, LocalProcess}} ()
871
865
const map_sock_wrkr = IdDict ()
872
866
const map_del_wrkr = Set {Int} ()
873
- const worker_added_callbacks = Dict {Any, Base.Callable} ()
867
+ const worker_starting_callbacks = Dict {Any, Base.Callable} ()
868
+ const worker_started_callbacks = Dict {Any, Base.Callable} ()
874
869
const worker_exiting_callbacks = Dict {Any, Base.Callable} ()
875
870
const worker_exited_callbacks = Dict {Any, Base.Callable} ()
876
871
882
877
883
878
# Callbacks
884
879
885
- function _add_callback (f, key, dict)
886
- if ! hasmethod (f, Tuple{Int})
887
- throw (ArgumentError (" Callback function is invalid, it must be able to accept a single Int argument" ))
880
+ function _run_callbacks_concurrently (callbacks_name, callbacks_dict, warning_interval, arglist)
881
+ callback_tasks = Dict {Any, Task} ()
882
+ for args in arglist
883
+ for (name, callback) in callbacks_dict
884
+ callback_tasks[name] = Threads. @spawn callback (args... )
885
+ end
886
+ end
887
+
888
+ running_callbacks = () -> [" '$(key) '" for (key, task) in callback_tasks if ! istaskdone (task)]
889
+ while timedwait (() -> isempty (running_callbacks ()), warning_interval) === :timed_out
890
+ callbacks_str = join (running_callbacks (), " , " )
891
+ @warn " Waiting for these $(callbacks_name) callbacks to finish: $(callbacks_str) "
892
+ end
893
+
894
+ # Wait on the tasks so that exceptions bubble up
895
+ wait .(values (callback_tasks))
896
+ end
897
+
898
+ function _add_callback (f, key, dict; arg_types= Tuple{Int})
899
+ desired_signature = " f(" * join ([" ::$(t) " for t in arg_types. types], " , " ) * " )"
900
+
901
+ if ! hasmethod (f, arg_types)
902
+ throw (ArgumentError (" Callback function is invalid, it must be able to be called with these argument types: $(desired_signature) " ))
888
903
elseif haskey (dict, key)
889
904
throw (ArgumentError (" A callback function with key '$(key) ' already exists" ))
890
905
end
@@ -900,29 +915,58 @@ end
900
915
_remove_callback (key, dict) = delete! (dict, key)
901
916
902
917
"""
903
- add_worker_added_callback(f::Base.Callable; key=nothing)
918
+ add_worker_starting_callback(f::Base.Callable; key=nothing)
919
+
920
+ Register a callback to be called on the master process immediately before new
921
+ workers are started. The callback `f` will be called with the `ClusterManager`
922
+ instance that is being used and a dictionary of parameters related to adding
923
+ workers, i.e. `f(manager, params)`. The `params` dictionary is specific to the
924
+ `manager` type. Note that the `LocalManager` and `SSHManager` cluster managers
925
+ in DistributedNext are not fully documented yet, see the
926
+ [managers.jl](https://github.com/JuliaParallel/DistributedNext.jl/blob/master/src/managers.jl)
927
+ file for their definitions.
928
+
929
+ !!! warning
930
+ Adding workers can fail so it is not guaranteed that the workers requested
931
+ will exist.
932
+
933
+ The worker-starting callbacks will be executed concurrently. If one throws an
934
+ exception it will not be caught and will bubble up through [`addprocs`](@ref).
935
+
936
+ Keep in mind that the callbacks will add to the time taken to launch workers; so
937
+ try to either keep the callbacks fast to execute, or do the actual work
938
+ asynchronously by spawning a task in the callback (beware of race conditions if
939
+ you do this).
940
+ """
941
+ add_worker_starting_callback (f:: Base.Callable ; key= nothing ) = _add_callback (f, key, worker_starting_callbacks;
942
+ arg_types= Tuple{ClusterManager, Dict})
943
+
944
+ remove_worker_starting_callback (key) = _remove_callback (key, worker_starting_callbacks)
945
+
946
+ """
947
+ add_worker_started_callback(f::Base.Callable; key=nothing)
904
948
905
949
Register a callback to be called on the master process whenever a worker is
906
950
added. The callback will be called with the added worker ID,
907
951
e.g. `f(w::Int)`. Chooses and returns a unique key for the callback if `key` is
908
952
not specified.
909
953
910
- The worker-added callbacks will be executed concurrently. If one throws an
954
+ The worker-started callbacks will be executed concurrently. If one throws an
911
955
exception it will not be caught and will bubble up through [`addprocs()`](@ref).
912
956
913
957
Keep in mind that the callbacks will add to the time taken to launch workers; so
914
958
try to either keep the callbacks fast to execute, or do the actual
915
959
initialization asynchronously by spawning a task in the callback (beware of race
916
960
conditions if you do this).
917
961
"""
918
- add_worker_added_callback (f:: Base.Callable ; key= nothing ) = _add_callback (f, key, worker_added_callbacks )
962
+ add_worker_started_callback (f:: Base.Callable ; key= nothing ) = _add_callback (f, key, worker_started_callbacks )
919
963
920
964
"""
921
- remove_worker_added_callback (key)
965
+ remove_worker_started_callback (key)
922
966
923
- Remove the callback for `key` that was added with [`add_worker_added_callback ()`](@ref).
967
+ Remove the callback for `key` that was added with [`add_worker_started_callback ()`](@ref).
924
968
"""
925
- remove_worker_added_callback (key) = _remove_callback (key, worker_added_callbacks )
969
+ remove_worker_started_callback (key) = _remove_callback (key, worker_started_callbacks )
926
970
927
971
"""
928
972
add_worker_exiting_callback(f::Base.Callable; key=nothing)
0 commit comments