From 9a5c1057ba242e3999c68fe8d09bddb5e9a06c97 Mon Sep 17 00:00:00 2001 From: Dave Parfitt Date: Mon, 22 Jul 2013 11:17:56 -0400 Subject: [PATCH 1/4] cleanup rt_dirty behavior --- src/riak_repl2_fscoordinator.erl | 23 +++++++++++++++++++++-- src/riak_repl_stats.erl | 6 ++++-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/riak_repl2_fscoordinator.erl b/src/riak_repl2_fscoordinator.erl index fba95217..bf2a03e7 100644 --- a/src/riak_repl2_fscoordinator.erl +++ b/src/riak_repl2_fscoordinator.erl @@ -350,6 +350,7 @@ handle_cast(start_fullsync, State) -> lager:info("Starting fullsync (source) with max_fssource_node=~p and max_fssource_cluster=~p", [MaxSource, MaxCluster]), {ok, Ring} = riak_core_ring_manager:get_my_ring(), + check_nodes_for_rt_dirty(Ring), N = largest_n(Ring), Partitions = sort_partitions(Ring), State2 = State#state{ @@ -1007,8 +1008,17 @@ notify_rt_dirty_nodes(State = #state{dirty_nodes = DirtyNodes, NodesToNotify = lists:subtract(AllNodesList, ordsets:to_list(DirtyNodesDuringFS)), lager:debug("Notifying nodes ~p", [ NodesToNotify]), - _ = rpc:multicall(NodesToNotify, riak_repl_stats, clear_rt_dirty, []), - State#state{dirty_nodes=ordsets:new()}; + {_, BadNodes} = rpc:multicall(NodesToNotify, + riak_repl_stats, + clear_rt_dirty, []), + case BadNodes of + [] -> + %% all nodes nodified, clear rt_dirty state + State#state{dirty_nodes=ordsets:new()}; + Nodes -> + lager:warning("Failed to clear rt_dirty on ~p", [Nodes]), + State + end; false -> lager:debug("No dirty nodes before fullsync started"), State @@ -1063,3 +1073,12 @@ flush_exit_message(Pid) -> ok end. +%% check all nodes in the cluster for existing rt_dirty files +%% and reset their rt_dirty flag if it exists +check_nodes_for_rt_dirty(Ring) -> + Owners = riak_core_ring:all_owners(Ring), + [ case rpc:call(Node, riak_repl_stats, is_rt_dirty,[]) of + false -> ok; + _ -> riak_repl2_fscoordinator:node_dirty(Node) + end || Node <- Owners ]. + diff --git a/src/riak_repl_stats.erl b/src/riak_repl_stats.erl index f654ce49..093ad799 100644 --- a/src/riak_repl_stats.erl +++ b/src/riak_repl_stats.erl @@ -140,8 +140,10 @@ rt_dirty() -> riak_repl2_fscoordinator:node_dirty(node()) catch _:_ -> - lager:debug("Failed to notify coordinator of rt_dirty status") - end + %% This could be triggered on startup if the + %% fscoordinator isn't running + lager:warning("Failed to notify coordinator of rt_dirty status.") + end end), ok; false -> ok From de33a5808cc577e6d927d4a05bb8ed36185ea244 Mon Sep 17 00:00:00 2001 From: Micah Warren Date: Tue, 9 Dec 2014 14:28:43 -0600 Subject: [PATCH 2/4] Fixed node pulling from owners proplist. --- src/riak_repl2_fscoordinator.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_repl2_fscoordinator.erl b/src/riak_repl2_fscoordinator.erl index bf2a03e7..6a3a772d 100644 --- a/src/riak_repl2_fscoordinator.erl +++ b/src/riak_repl2_fscoordinator.erl @@ -1080,5 +1080,5 @@ check_nodes_for_rt_dirty(Ring) -> [ case rpc:call(Node, riak_repl_stats, is_rt_dirty,[]) of false -> ok; _ -> riak_repl2_fscoordinator:node_dirty(Node) - end || Node <- Owners ]. + end || {_Part, Node} <- Owners ]. From cfb3f1f22f1191c5dab299942a6c67b7a2b1246b Mon Sep 17 00:00:00 2001 From: Micah Warren Date: Tue, 9 Dec 2014 14:28:56 -0600 Subject: [PATCH 3/4] Added check so an fscoordinator won't call into itself. --- src/riak_repl2_fscoordinator.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/riak_repl2_fscoordinator.erl b/src/riak_repl2_fscoordinator.erl index 6a3a772d..39120553 100644 --- a/src/riak_repl2_fscoordinator.erl +++ b/src/riak_repl2_fscoordinator.erl @@ -195,7 +195,7 @@ node_dirty(Node) -> Leader -> Fullsyncs = riak_repl2_fscoordinator_sup:started(Leader), [riak_repl2_fscoordinator:node_dirty(Pid, Node) || - {_, Pid} <- Fullsyncs] + {_, Pid} <- Fullsyncs, Pid =/= self()] end. node_dirty(Pid, Node) -> From 82c567fb5f9dc9cdffb4d7a3758417c3de5c1c6a Mon Sep 17 00:00:00 2001 From: Micah Warren Date: Mon, 22 Dec 2014 14:02:19 -0600 Subject: [PATCH 4/4] Added reason for node dirty marking failure log warning. --- src/riak_repl_stats.erl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/riak_repl_stats.erl b/src/riak_repl_stats.erl index 093ad799..25c433c2 100644 --- a/src/riak_repl_stats.erl +++ b/src/riak_repl_stats.erl @@ -139,10 +139,10 @@ rt_dirty() -> try riak_repl2_fscoordinator:node_dirty(node()) catch - _:_ -> + W:Y -> %% This could be triggered on startup if the %% fscoordinator isn't running - lager:warning("Failed to notify coordinator of rt_dirty status.") + lager:warning("Failed to notify coordinator of rt_dirty status due to ~p:~p.", [W,Y]) end end), ok;