Skip to content

Commit 0cb1d61

Browse files
authored
KAFKA-14292; Fix KRaft controlled shutdown delay (apache#12736)
The `controlledShutDownOffset` is defined as the "offset at which the broker should complete its controlled shutdown, or -1 if the broker is not performing a controlled shutdown". The controller sets this offset to a non-negative integer on receiving a heartbeat from a broker that's in controlled shutdown state. Currently, this offset is being updated and bumped every single time a broker in controlled shutdown mode send a heartbeat, delaying when controlled shutdown can actually complete for the broker. We should only update the offset when it was previously set to -1 to allow controlled shutdown to complete. Reviewers: Luke Chen <[email protected]>, Jason Gustafson <[email protected]>
1 parent 484f85f commit 0cb1d61

File tree

3 files changed

+56
-19
lines changed

3 files changed

+56
-19
lines changed

metadata/src/main/java/org/apache/kafka/controller/BrokerHeartbeatManager.java

+29-14
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package org.apache.kafka.controller;
1919

20+
import java.util.OptionalLong;
2021
import org.apache.kafka.common.message.BrokerHeartbeatRequestData;
2122
import org.apache.kafka.common.utils.LogContext;
2223
import org.apache.kafka.common.utils.Time;
@@ -42,8 +43,9 @@
4243
/**
4344
* The BrokerHeartbeatManager manages all the soft state associated with broker heartbeats.
4445
* Soft state is state which does not appear in the metadata log. This state includes
45-
* things like the last time each broker sent us a heartbeat, and whether the broker is
46-
* trying to perform a controlled shutdown.
46+
* things like the last time each broker sent us a heartbeat. As of KIP-841, the controlled
47+
* shutdown state is no longer treated as soft state and is persisted to the metadata log on broker
48+
* controlled shutdown requests.
4749
*
4850
* Only the active controller has a BrokerHeartbeatManager, since only the active
4951
* controller handles broker heartbeats. Standby controllers will create a heartbeat
@@ -77,7 +79,7 @@ static class BrokerHeartbeatState {
7779
* if the broker is not performing a controlled shutdown. When this field is
7880
* updated, we also have to update the broker's position in the shuttingDown set.
7981
*/
80-
private long controlledShutDownOffset;
82+
private long controlledShutdownOffset;
8183

8284
/**
8385
* The previous entry in the unfenced list, or null if the broker is not in that list.
@@ -95,7 +97,7 @@ static class BrokerHeartbeatState {
9597
this.prev = null;
9698
this.next = null;
9799
this.metadataOffset = -1;
98-
this.controlledShutDownOffset = -1;
100+
this.controlledShutdownOffset = -1;
99101
}
100102

101103
/**
@@ -116,7 +118,7 @@ boolean fenced() {
116118
* Returns true only if the broker is in controlled shutdown state.
117119
*/
118120
boolean shuttingDown() {
119-
return controlledShutDownOffset >= 0;
121+
return controlledShutdownOffset >= 0;
120122
}
121123
}
122124

@@ -275,6 +277,16 @@ Collection<BrokerHeartbeatState> brokers() {
275277
return brokers.values();
276278
}
277279

280+
// VisibleForTesting
281+
OptionalLong controlledShutdownOffset(int brokerId) {
282+
BrokerHeartbeatState broker = brokers.get(brokerId);
283+
if (broker == null || broker.controlledShutdownOffset == -1) {
284+
return OptionalLong.empty();
285+
}
286+
return OptionalLong.of(broker.controlledShutdownOffset);
287+
}
288+
289+
278290
/**
279291
* Mark a broker as fenced.
280292
*
@@ -381,7 +393,7 @@ void touch(int brokerId, boolean fenced, long metadataOffset) {
381393
if (fenced) {
382394
// If a broker is fenced, it leaves controlled shutdown. On its next heartbeat,
383395
// it will shut down immediately.
384-
broker.controlledShutDownOffset = -1;
396+
broker.controlledShutdownOffset = -1;
385397
} else {
386398
unfenced.add(broker);
387399
if (!broker.shuttingDown()) {
@@ -400,12 +412,13 @@ long lowestActiveOffset() {
400412
}
401413

402414
/**
403-
* Mark a broker as being in the controlled shutdown state.
415+
* Mark a broker as being in the controlled shutdown state. We only update the
416+
* controlledShutdownOffset if the broker was previously not in controlled shutdown state.
404417
*
405418
* @param brokerId The broker id.
406419
* @param controlledShutDownOffset The offset at which controlled shutdown will be complete.
407420
*/
408-
void updateControlledShutdownOffset(int brokerId, long controlledShutDownOffset) {
421+
void maybeUpdateControlledShutdownOffset(int brokerId, long controlledShutDownOffset) {
409422
BrokerHeartbeatState broker = brokers.get(brokerId);
410423
if (broker == null) {
411424
throw new RuntimeException("Unable to locate broker " + brokerId);
@@ -414,9 +427,11 @@ void updateControlledShutdownOffset(int brokerId, long controlledShutDownOffset)
414427
throw new RuntimeException("Fenced brokers cannot enter controlled shutdown.");
415428
}
416429
active.remove(broker);
417-
broker.controlledShutDownOffset = controlledShutDownOffset;
418-
log.debug("Updated the controlled shutdown offset for broker {} to {}.",
419-
brokerId, controlledShutDownOffset);
430+
if (broker.controlledShutdownOffset < 0) {
431+
broker.controlledShutdownOffset = controlledShutDownOffset;
432+
log.debug("Updated the controlled shutdown offset for broker {} to {}.",
433+
brokerId, controlledShutDownOffset);
434+
}
420435
}
421436

422437
/**
@@ -581,17 +596,17 @@ BrokerControlStates calculateNextBrokerState(int brokerId,
581596
return new BrokerControlStates(currentState, CONTROLLED_SHUTDOWN);
582597
}
583598
long lowestActiveOffset = lowestActiveOffset();
584-
if (broker.controlledShutDownOffset <= lowestActiveOffset) {
599+
if (broker.controlledShutdownOffset <= lowestActiveOffset) {
585600
log.info("The request from broker {} to shut down has been granted " +
586601
"since the lowest active offset {} is now greater than the " +
587602
"broker's controlled shutdown offset {}.", brokerId,
588-
lowestActiveOffset, broker.controlledShutDownOffset);
603+
lowestActiveOffset, broker.controlledShutdownOffset);
589604
return new BrokerControlStates(currentState, SHUTDOWN_NOW);
590605
}
591606
log.debug("The request from broker {} to shut down can not yet be granted " +
592607
"because the lowest active offset {} is not greater than the broker's " +
593608
"shutdown offset {}.", brokerId, lowestActiveOffset,
594-
broker.controlledShutDownOffset);
609+
broker.controlledShutdownOffset);
595610
return new BrokerControlStates(currentState, CONTROLLED_SHUTDOWN);
596611

597612
default:

metadata/src/main/java/org/apache/kafka/controller/QuorumController.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -1995,7 +1995,7 @@ public ControllerResult<BrokerHeartbeatReply> generateRecordsAndResult() {
19951995
public void processBatchEndOffset(long offset) {
19961996
if (inControlledShutdown) {
19971997
clusterControl.heartbeatManager().
1998-
updateControlledShutdownOffset(brokerId, offset);
1998+
maybeUpdateControlledShutdownOffset(brokerId, offset);
19991999
}
20002000
}
20012001
});

metadata/src/test/java/org/apache/kafka/controller/BrokerHeartbeatManagerTest.java

+26-4
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import java.util.HashSet;
2222
import java.util.Iterator;
2323
import java.util.Optional;
24+
import java.util.OptionalLong;
2425
import java.util.Set;
2526
import java.util.TreeSet;
2627
import org.apache.kafka.common.message.BrokerHeartbeatRequestData;
@@ -203,17 +204,38 @@ public void testUsableBrokerIterator() {
203204
expected.add(new UsableBroker(3, Optional.of("rack2"), false));
204205
expected.add(new UsableBroker(4, Optional.of("rack1"), true));
205206
assertEquals(expected, usableBrokersToSet(manager));
206-
manager.updateControlledShutdownOffset(2, 0);
207+
manager.maybeUpdateControlledShutdownOffset(2, 0);
207208
assertEquals(100L, manager.lowestActiveOffset());
208209
assertThrows(RuntimeException.class,
209-
() -> manager.updateControlledShutdownOffset(4, 0));
210+
() -> manager.maybeUpdateControlledShutdownOffset(4, 0));
210211
manager.touch(4, false, 100);
211-
manager.updateControlledShutdownOffset(4, 0);
212+
manager.maybeUpdateControlledShutdownOffset(4, 0);
212213
expected.remove(new UsableBroker(2, Optional.of("rack1"), false));
213214
expected.remove(new UsableBroker(4, Optional.of("rack1"), true));
214215
assertEquals(expected, usableBrokersToSet(manager));
215216
}
216217

218+
@Test
219+
public void testControlledShutdownOffsetIsOnlyUpdatedOnce() {
220+
BrokerHeartbeatManager manager = newBrokerHeartbeatManager();
221+
assertEquals(Collections.emptySet(), usableBrokersToSet(manager));
222+
manager.touch(0, false, 100);
223+
manager.touch(1, false, 100);
224+
manager.touch(2, false, 98);
225+
manager.touch(3, false, 100);
226+
manager.touch(4, true, 100);
227+
assertEquals(OptionalLong.empty(), manager.controlledShutdownOffset(2));
228+
manager.maybeUpdateControlledShutdownOffset(2, 98);
229+
assertEquals(OptionalLong.of(98), manager.controlledShutdownOffset(2));
230+
manager.maybeUpdateControlledShutdownOffset(2, 99);
231+
assertEquals(OptionalLong.of(98), manager.controlledShutdownOffset(2));
232+
assertEquals(OptionalLong.empty(), manager.controlledShutdownOffset(3));
233+
manager.maybeUpdateControlledShutdownOffset(3, 101);
234+
assertEquals(OptionalLong.of(101), manager.controlledShutdownOffset(3));
235+
manager.maybeUpdateControlledShutdownOffset(3, 102);
236+
assertEquals(OptionalLong.of(101), manager.controlledShutdownOffset(3));
237+
}
238+
217239
@Test
218240
public void testBrokerHeartbeatStateList() {
219241
BrokerHeartbeatStateList list = new BrokerHeartbeatStateList();
@@ -256,7 +278,7 @@ public void testCalculateNextBrokerState() {
256278
manager.touch(3, false, 100);
257279
manager.touch(4, true, 100);
258280
manager.touch(5, false, 99);
259-
manager.updateControlledShutdownOffset(5, 99);
281+
manager.maybeUpdateControlledShutdownOffset(5, 99);
260282

261283
assertEquals(98L, manager.lowestActiveOffset());
262284

0 commit comments

Comments
 (0)