Support removing PROVISIONING nodes and apply DRAINED maintenance on them

AhmedSoliman · facebook-github-bot · commit 005802986104 · 2019-10-09T07:12:29.000-07:00
Summary:
This addresses the following cases:
- A node that is PROVISIONING should be removable (removeNodes shouldn't reject that)
- Applying a MAY_DISAPPEAR or DRAINED maintenance on a node/shard that is PROVISIONING should appear as COMPLETED (MaintenanceProgress).
- Treat the shard workflow at the state of PROVISIONING or NONE to be equal, both have completed, no further actions needed.

Reviewed By: MohamedBassem

Differential Revision: D17817955

fbshipit-source-id: b56df01bbc5046d166a314ff1edcf589f24cef54
diff --git a/logdevice/admin/cluster_membership/RemoveNodesHandler.cpp b/logdevice/admin/cluster_membership/RemoveNodesHandler.cpp
@@ -74,7 +74,9 @@ check_is_disabled(const NodesConfiguration& nodes_configuration,
         nodes_configuration.getStorageMembership()->getShardStates(idx);
     for (const auto& state : states) {
       storage_disabled &=
-          state.second.storage_state == membership::StorageState::NONE;
+          (state.second.storage_state == membership::StorageState::NONE ||
+           state.second.storage_state ==
+               membership::StorageState::PROVISIONING);
     }
   }
 
diff --git a/logdevice/admin/maintenance/MaintenanceManager.cpp b/logdevice/admin/maintenance/MaintenanceManager.cpp
@@ -601,6 +601,9 @@ MaintenanceManager::getShardOperationalStateInternal(ShardID shard) const {
         result = ShardOperationalState::ENABLED;
       }
       break;
+    case membership::StorageState::PROVISIONING:
+      result = ShardOperationalState::PROVISIONING;
+      break;
     default:
       // This should never happen. All storage state
       // cases are handled above
@@ -1896,12 +1899,17 @@ bool MaintenanceManager::isTargetAchieved(ShardOperationalState current,
   static folly::F14FastSet<ShardOperationalState> may_disappear_states{
       {ShardOperationalState::MAY_DISAPPEAR,
        ShardOperationalState::MIGRATING_DATA,
+       ShardOperationalState::PROVISIONING,
        ShardOperationalState::DRAINED}};
 
+  // Any of these states are considered higher or equals the DRAINED state.
+  static folly::F14FastSet<ShardOperationalState> drained_states{
+      {ShardOperationalState::PROVISIONING, ShardOperationalState::DRAINED}};
+
   if (target == ShardOperationalState::MAY_DISAPPEAR) {
     return may_disappear_states.count(current) > 0;
   } else if (target == ShardOperationalState::DRAINED) {
-    return current == ShardOperationalState::DRAINED;
+    return drained_states.count(current) > 0;
   } else {
     // we don't know any other targets.
     ld_assert(false);
diff --git a/logdevice/admin/maintenance/ShardWorkflow.cpp b/logdevice/admin/maintenance/ShardWorkflow.cpp
@@ -125,6 +125,10 @@ void ShardWorkflow::computeMaintenanceStatusForDrain() {
 
   switch (current_storage_state_) {
     case membership::StorageState::NONE:
+      // If the node is provisioning, we can consider it drained as well. Any
+      // maintenance that needs this node to be drained will appear completed
+      // immediately.
+    case membership::StorageState::PROVISIONING:
       // We have reached the target already, there is no further transitions
       // needed to declare the shard as DRAINED.
       updateStatus(MaintenanceStatus::COMPLETED);
diff --git a/logdevice/admin/test/ClusterMembershipAPIHandlerTest.cpp b/logdevice/admin/test/ClusterMembershipAPIHandlerTest.cpp
@@ -165,6 +165,53 @@ TEST_F(ClusterMemebershipAPIIntegrationTest, TestRemoveAliveNodes) {
   }
 }
 
+TEST_F(ClusterMemebershipAPIIntegrationTest, TestRemoveProvisioningNodes) {
+  ASSERT_EQ(0, cluster_->start({0, 1, 2, 3}));
+  cluster_->getNode(0).waitUntilNodeStateReady();
+  auto admin_client = cluster_->getNode(0).createAdminClient();
+
+  {
+    // Add two nodes with 2 shards each. They will get added as PROVISIONING.
+    thrift::AddNodesResponse resp;
+    admin_client->sync_addNodes(resp, buildAddNodesRequest({100, 101}));
+    ASSERT_EQ(2, resp.added_nodes.size());
+
+    wait_until("AdminServer's NC picks the additions", [&]() {
+      thrift::NodesConfigResponse nc;
+      admin_client->sync_getNodesConfig(nc, thrift::NodesFilter{});
+      return nc.version >= resp.new_nodes_configuration_version;
+    });
+  }
+
+  thrift::RemoveNodesResponse resp;
+  admin_client->sync_removeNodes(resp, buildRemoveNodesRequest({100, 101}));
+  EXPECT_EQ(2, resp.removed_nodes.size());
+}
+
+TEST_F(ClusterMemebershipAPIIntegrationTest, TestApplyDrainOnProvisioning) {
+  ASSERT_EQ(0, cluster_->start({0, 1, 2, 3}));
+  cluster_->getNode(0).waitUntilNodeStateReady();
+  auto admin_client = cluster_->getNode(0).createAdminClient();
+
+  {
+    // Add two nodes with 2 shards each. They will get added as PROVISIONING.
+    thrift::AddNodesResponse resp;
+    admin_client->sync_addNodes(resp, buildAddNodesRequest({100, 101}));
+    ASSERT_EQ(2, resp.added_nodes.size());
+
+    wait_until("AdminServer's NC picks the additions", [&]() {
+      thrift::NodesConfigResponse nc;
+      admin_client->sync_getNodesConfig(nc, thrift::NodesFilter{});
+      return nc.version >= resp.new_nodes_configuration_version;
+    });
+  }
+
+  // We didn't provision the shared, let's apply a DRAINED maintenance
+  // immediately.
+  disableAndWait(
+      {mkShardID(100, -1), mkShardID(101, -1)}, {mkNodeID(100), mkNodeID(101)});
+}
+
 TEST_F(ClusterMemebershipAPIIntegrationTest, TestRemoveNonExistentNode) {
   ASSERT_EQ(0, cluster_->start({0, 1, 2, 3}));
   auto admin_client = cluster_->getNode(0).createAdminClient();

Original file line number	Diff line number	Diff line change
`@@ -74,7 +74,9 @@ check_is_disabled(const NodesConfiguration& nodes_configuration,`
`74`	`74`	`nodes_configuration.getStorageMembership()->getShardStates(idx);`
`75`	`75`	`for (const auto& state : states) {`
`76`	`76`	`storage_disabled &=`
`77`		`- state.second.storage_state == membership::StorageState::NONE;`
	`77`	`+ (state.second.storage_state == membership::StorageState::NONE \|\|`
	`78`	`+ state.second.storage_state ==`
	`79`	`+ membership::StorageState::PROVISIONING);`
`78`	`80`	`}`
`79`	`81`	`}`
`80`	`82`