KAFKA-14296; Partition leaders are not demoted during kraft controlled shutdown (apache#12741)

dajac · web-flow · commit 5cff8f67b060 · 2022-10-13T16:59:19.000-07:00
When the `BrokerServer` starts its shutting down process, it transitions to `SHUTTING_DOWN` and sets `isShuttingDown` to `true`. With this state change, the follower state changes are short-cutted. This means that a broker which was serving as leader would remain acting as a leader until controlled shutdown completes. Instead, we want the leader and ISR state to be updated so that requests will return NOT_LEADER and the client can find the new leader. We missed this case while implementing apache#12187. This patch fixes the issue and updates an existing test to ensure that `isShuttingDown` has not effect. We should consider adding integration tests for this as well. We can do this separately. Reviewers: Ismael Juma <ismael@juma.me.uk>, José Armando García Sancio <jsancio@users.noreply.github.com>, Jason Gustafson <jason@confluent.io>
diff --git a/core/src/main/scala/kafka/server/ReplicaManager.scala b/core/src/main/scala/kafka/server/ReplicaManager.scala
@@ -2142,7 +2142,6 @@ class ReplicaManager(val config: KafkaConfig,
   ): Unit = {
     stateChangeLogger.info(s"Transitioning ${localFollowers.size} partition(s) to " +
       "local followers.")
-    val shuttingDown = isShuttingDown.get()
     val partitionsToStartFetching = new mutable.HashMap[TopicPartition, Partition]
     val partitionsToStopFetching = new mutable.HashMap[TopicPartition, Boolean]
     val followerTopicSet = new mutable.HashSet[String]
@@ -2151,28 +2150,24 @@ class ReplicaManager(val config: KafkaConfig,
         try {
           followerTopicSet.add(tp.topic)
 
-          if (shuttingDown) {
-            stateChangeLogger.trace(s"Unable to start fetching $tp with topic " +
-              s"ID ${info.topicId} because the replica manager is shutting down.")
-          } else {
-            // We always update the follower state.
-            // - This ensure that a replica with no leader can step down;
-            // - This also ensures that the local replica is created even if the leader
-            //   is unavailable. This is required to ensure that we include the partition's
-            //   high watermark in the checkpoint file (see KAFKA-1647).
-            val state = info.partition.toLeaderAndIsrPartitionState(tp, isNew)
-            val isNewLeaderEpoch = partition.makeFollower(state, offsetCheckpoints, Some(info.topicId))
-
-            if (isInControlledShutdown && (info.partition.leader == NO_LEADER ||
-                !info.partition.isr.contains(config.brokerId))) {
-              // During controlled shutdown, replica with no leaders and replica
-              // where this broker is not in the ISR are stopped.
-              partitionsToStopFetching.put(tp, false)
-            } else if (isNewLeaderEpoch) {
-              // Otherwise, fetcher is restarted if the leader epoch has changed.
-              partitionsToStartFetching.put(tp, partition)
-            }
+          // We always update the follower state.
+          // - This ensure that a replica with no leader can step down;
+          // - This also ensures that the local replica is created even if the leader
+          //   is unavailable. This is required to ensure that we include the partition's
+          //   high watermark in the checkpoint file (see KAFKA-1647).
+          val state = info.partition.toLeaderAndIsrPartitionState(tp, isNew)
+          val isNewLeaderEpoch = partition.makeFollower(state, offsetCheckpoints, Some(info.topicId))
+
+          if (isInControlledShutdown && (info.partition.leader == NO_LEADER ||
+              !info.partition.isr.contains(config.brokerId))) {
+            // During controlled shutdown, replica with no leaders and replica
+            // where this broker is not in the ISR are stopped.
+            partitionsToStopFetching.put(tp, false)
+          } else if (isNewLeaderEpoch) {
+            // Otherwise, fetcher is restarted if the leader epoch has changed.
+            partitionsToStartFetching.put(tp, partition)
           }
+
           changedPartitions.add(partition)
         } catch {
           case e: KafkaStorageException =>
diff --git a/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala b/core/src/test/scala/unit/kafka/server/ReplicaManagerTest.scala
@@ -21,7 +21,7 @@ import java.io.File
 import java.net.InetAddress
 import java.nio.file.Files
 import java.util
-import java.util.concurrent.atomic.{AtomicLong, AtomicReference}
+import java.util.concurrent.atomic.{AtomicBoolean, AtomicLong, AtomicReference}
 import java.util.concurrent.{CountDownLatch, TimeUnit}
 import java.util.stream.IntStream
 import java.util.{Collections, Optional, Properties}
@@ -2210,7 +2210,8 @@ class ReplicaManagerTest {
     aliveBrokerIds: Seq[Int] = Seq(0, 1),
     propsModifier: Properties => Unit = _ => {},
     mockReplicaFetcherManager: Option[ReplicaFetcherManager] = None,
-    mockReplicaAlterLogDirsManager: Option[ReplicaAlterLogDirsManager] = None
+    mockReplicaAlterLogDirsManager: Option[ReplicaAlterLogDirsManager] = None,
+    isShuttingDown: AtomicBoolean = new AtomicBoolean(false)
   ): ReplicaManager = {
     val props = TestUtils.createBrokerConfig(brokerId, TestUtils.MockZkConnect)
     props.put("log.dirs", TestUtils.tempRelativeDir("data").getAbsolutePath + "," + TestUtils.tempRelativeDir("data2").getAbsolutePath)
@@ -2245,6 +2246,7 @@ class ReplicaManagerTest {
       metadataCache = metadataCache,
       logDirFailureChannel = new LogDirFailureChannel(config.logDirs.size),
       alterPartitionManager = alterPartitionManager,
+      isShuttingDown = isShuttingDown,
       delayedProducePurgatoryParam = Some(mockProducePurgatory),
       delayedFetchPurgatoryParam = Some(mockFetchPurgatory),
       delayedDeleteRecordsPurgatoryParam = Some(mockDeleteRecordsPurgatory),
@@ -3868,10 +3870,12 @@ class ReplicaManagerTest {
     val foo2 = new TopicPartition("foo", 2)
 
     val mockReplicaFetcherManager = mock(classOf[ReplicaFetcherManager])
+    val isShuttingDown = new AtomicBoolean(false)
     val replicaManager = setupReplicaManagerWithMockedPurgatories(
       timer = new MockTimer(time),
       brokerId = localId,
-      mockReplicaFetcherManager = Some(mockReplicaFetcherManager)
+      mockReplicaFetcherManager = Some(mockReplicaFetcherManager),
+      isShuttingDown = isShuttingDown
     )
 
     try {
@@ -3940,6 +3944,10 @@ class ReplicaManagerTest {
 
       reset(mockReplicaFetcherManager)
 
+      // The broker transitions to SHUTTING_DOWN state. This should not have
+      // any impact in KRaft mode.
+      isShuttingDown.set(true)
+
       // The replica begins the controlled shutdown.
       replicaManager.beginControlledShutdown()