Skip to content

Commit a054902

Browse files
committed
CURATOR-724. Fix LeaderLatch recover on reconnected and missing leaderPath
Signed-off-by: tison <[email protected]>
1 parent ad19795 commit a054902

File tree

1 file changed

+21
-10
lines changed
  • curator-recipes/src/main/java/org/apache/curator/framework/recipes/leader

1 file changed

+21
-10
lines changed

curator-recipes/src/main/java/org/apache/curator/framework/recipes/leader/LeaderLatch.java

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ public void processResult(CuratorFramework client, CuratorEvent event) throws Ex
509509
getChildren();
510510
}
511511
} else {
512-
log.error("getChildren() failed. rc = {}", event.getResultCode());
512+
log.error("creatingParentContainersIfNeeded() failed (rc = {})", event.getResultCode());
513513
}
514514
}
515515
};
@@ -528,7 +528,7 @@ private synchronized void internalStart() {
528528
reset();
529529
} catch (Exception e) {
530530
ThreadUtils.checkInterrupted(e);
531-
log.error("An error occurred checking resetting leadership.", e);
531+
log.error("failed to check resetting leadership.", e);
532532
}
533533
}
534534
}
@@ -548,7 +548,7 @@ private void checkLeadership(List<String> children) throws Exception {
548548
log.debug("checkLeadership with id: {}, ourPath: {}, children: {}", id, localOurPath, sortedChildren);
549549

550550
if (ourIndex < 0) {
551-
log.error("Can't find our node. Resetting. Index: {}", ourIndex);
551+
log.error("failed to find our node; resetting (index: {})", ourIndex);
552552
reset();
553553
return;
554554
}
@@ -582,7 +582,7 @@ public void process(WatchedEvent event) {
582582
getChildren();
583583
} catch (Exception ex) {
584584
ThreadUtils.checkInterrupted(ex);
585-
log.error("An error occurred checking the leadership.", ex);
585+
log.error("failed to check the leadership.", ex);
586586
}
587587
}
588588
}
@@ -607,6 +607,17 @@ private void getChildren() throws Exception {
607607
public void processResult(CuratorFramework client, CuratorEvent event) throws Exception {
608608
if (event.getResultCode() == KeeperException.Code.OK.intValue()) {
609609
checkLeadership(event.getChildren());
610+
} else if (event.getResultCode() == KeeperException.Code.NONODE.intValue()) {
611+
// latchPath has gone - reset
612+
//
613+
// This is possible when RECONNECTED during:
614+
// (1) Scale the zk cluster to 0 nodes.
615+
// (2) Scale it back.
616+
//
617+
// See also https://issues.apache.org/jira/browse/CURATOR-724
618+
reset();
619+
} else {
620+
log.error("getChildren() failed (rc = {})", event.getResultCode());
610621
}
611622
}
612623
};
@@ -616,11 +627,6 @@ public void processResult(CuratorFramework client, CuratorEvent event) throws Ex
616627
@VisibleForTesting
617628
protected void handleStateChange(ConnectionState newState) {
618629
switch (newState) {
619-
default: {
620-
// NOP
621-
break;
622-
}
623-
624630
case RECONNECTED: {
625631
try {
626632
if (client.getConnectionStateErrorPolicy().isErrorState(ConnectionState.SUSPENDED)
@@ -629,7 +635,7 @@ protected void handleStateChange(ConnectionState newState) {
629635
}
630636
} catch (Exception e) {
631637
ThreadUtils.checkInterrupted(e);
632-
log.error("Could not reset leader latch", e);
638+
log.error("failed to reset leader latch", e);
633639
setLeadership(false);
634640
}
635641
break;
@@ -646,6 +652,11 @@ protected void handleStateChange(ConnectionState newState) {
646652
setLeadership(false);
647653
break;
648654
}
655+
656+
default: {
657+
// NOP
658+
break;
659+
}
649660
}
650661
}
651662

0 commit comments

Comments
 (0)