From 7529dc95391e8fc0ab5fdeeae56412657b7dc5a8 Mon Sep 17 00:00:00 2001 From: Niels Bauman <33722607+nielsbauman@users.noreply.github.com> Date: Mon, 4 Mar 2024 13:59:21 +0100 Subject: [PATCH] Make Health API more resilient to multi-version clusters (#105789) First check whether the full cluster supports a specific indicator (feature) before we mark an indicator as "unknown" when (meta) data is missing from the cluster state. --- docs/changelog/105789.yaml | 6 +++ .../rest-api-spec/test/health/10_basic.yml | 6 +-- .../elasticsearch/health/HealthFeatures.java | 10 +++- .../node/DiskHealthIndicatorService.java | 17 ++++++- .../ShardsCapacityHealthIndicatorService.java | 15 +++++- .../elasticsearch/node/NodeConstruction.java | 6 +-- ...sitoryIntegrityHealthIndicatorService.java | 23 +++++++-- .../node/DiskHealthIndicatorServiceTests.java | 46 ++++++++++++----- ...dsCapacityHealthIndicatorServiceTests.java | 51 +++++++++++++++---- ...yIntegrityHealthIndicatorServiceTests.java | 19 +++++-- 10 files changed, 157 insertions(+), 42 deletions(-) create mode 100644 docs/changelog/105789.yaml diff --git a/docs/changelog/105789.yaml b/docs/changelog/105789.yaml new file mode 100644 index 0000000000000..02a6936fa3294 --- /dev/null +++ b/docs/changelog/105789.yaml @@ -0,0 +1,6 @@ +pr: 105789 +summary: Make Health API more resilient to multi-version clusters +area: Health +type: bug +issues: + - 90183 diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/health/10_basic.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/health/10_basic.yml index 5e6ca8247997c..1dc35c165b4e0 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/health/10_basic.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/health/10_basic.yml @@ -1,10 +1,8 @@ --- "cluster health basic test": - skip: - version: all - reason: "AwaitsFix https://github.com/elastic/elasticsearch/issues/90183" - # version: "- 8.3.99" - # reason: "health was only added in 8.2.0, and master_is_stable in 8.4.0" + version: "- 8.3.99" + reason: "health was only added in 8.2.0, and master_is_stable in 8.4.0" - do: health_report: { } diff --git a/server/src/main/java/org/elasticsearch/health/HealthFeatures.java b/server/src/main/java/org/elasticsearch/health/HealthFeatures.java index 3a5d11f862efc..4b3bcf7e7278f 100644 --- a/server/src/main/java/org/elasticsearch/health/HealthFeatures.java +++ b/server/src/main/java/org/elasticsearch/health/HealthFeatures.java @@ -13,13 +13,21 @@ import org.elasticsearch.features.NodeFeature; import java.util.Map; +import java.util.Set; public class HealthFeatures implements FeatureSpecification { public static final NodeFeature SUPPORTS_HEALTH = new NodeFeature("health.supports_health"); + public static final NodeFeature SUPPORTS_SHARDS_CAPACITY_INDICATOR = new NodeFeature("health.shards_capacity_indicator"); + public static final NodeFeature SUPPORTS_EXTENDED_REPOSITORY_INDICATOR = new NodeFeature("health.extended_repository_indicator"); + + @Override + public Set getFeatures() { + return Set.of(SUPPORTS_EXTENDED_REPOSITORY_INDICATOR); + } @Override public Map getHistoricalFeatures() { - return Map.of(SUPPORTS_HEALTH, Version.V_8_5_0); + return Map.of(SUPPORTS_HEALTH, Version.V_8_5_0, SUPPORTS_SHARDS_CAPACITY_INDICATOR, Version.V_8_8_0); } } diff --git a/server/src/main/java/org/elasticsearch/health/node/DiskHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/DiskHealthIndicatorService.java index 2805aa88a7e54..3304b71b4ca31 100644 --- a/server/src/main/java/org/elasticsearch/health/node/DiskHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/DiskHealthIndicatorService.java @@ -17,7 +17,9 @@ import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.util.set.Sets; +import org.elasticsearch.features.FeatureService; import org.elasticsearch.health.Diagnosis; +import org.elasticsearch.health.HealthFeatures; import org.elasticsearch.health.HealthIndicatorDetails; import org.elasticsearch.health.HealthIndicatorImpact; import org.elasticsearch.health.HealthIndicatorResult; @@ -71,9 +73,11 @@ public class DiskHealthIndicatorService implements HealthIndicatorService { private static final String IMPACT_CLUSTER_FUNCTIONALITY_UNAVAILABLE_ID = "cluster_functionality_unavailable"; private final ClusterService clusterService; + private final FeatureService featureService; - public DiskHealthIndicatorService(ClusterService clusterService) { + public DiskHealthIndicatorService(ClusterService clusterService, FeatureService featureService) { this.clusterService = clusterService; + this.featureService = featureService; } @Override @@ -83,8 +87,18 @@ public String name() { @Override public HealthIndicatorResult calculate(boolean verbose, int maxAffectedResourcesCount, HealthInfo healthInfo) { + ClusterState clusterState = clusterService.state(); Map diskHealthInfoMap = healthInfo.diskInfoByNode(); if (diskHealthInfoMap == null || diskHealthInfoMap.isEmpty()) { + if (featureService.clusterHasFeature(clusterState, HealthFeatures.SUPPORTS_HEALTH) == false) { + return createIndicator( + HealthStatus.GREEN, + "No disk usage data available. The cluster currently has mixed versions (an upgrade may be in progress).", + HealthIndicatorDetails.EMPTY, + List.of(), + List.of() + ); + } /* * If there is no disk health info, that either means that a new health node was just elected, or something is seriously * wrong with health data collection on the health node. Either way, we immediately return UNKNOWN. If there are at least @@ -98,7 +112,6 @@ public HealthIndicatorResult calculate(boolean verbose, int maxAffectedResources Collections.emptyList() ); } - ClusterState clusterState = clusterService.state(); logNodesMissingHealthInfo(diskHealthInfoMap, clusterState); DiskHealthAnalyzer diskHealthAnalyzer = new DiskHealthAnalyzer(diskHealthInfoMap, clusterState); diff --git a/server/src/main/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorService.java index 1852e504b61db..16e18b69d5c1d 100644 --- a/server/src/main/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorService.java @@ -12,7 +12,9 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.TriFunction; import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.features.FeatureService; import org.elasticsearch.health.Diagnosis; +import org.elasticsearch.health.HealthFeatures; import org.elasticsearch.health.HealthIndicatorDetails; import org.elasticsearch.health.HealthIndicatorImpact; import org.elasticsearch.health.HealthIndicatorResult; @@ -90,9 +92,11 @@ public class ShardsCapacityHealthIndicatorService implements HealthIndicatorServ ); private final ClusterService clusterService; + private final FeatureService featureService; - public ShardsCapacityHealthIndicatorService(ClusterService clusterService) { + public ShardsCapacityHealthIndicatorService(ClusterService clusterService, FeatureService featureService) { this.clusterService = clusterService; + this.featureService = featureService; } @Override @@ -105,6 +109,15 @@ public HealthIndicatorResult calculate(boolean verbose, int maxAffectedResources var state = clusterService.state(); var healthMetadata = HealthMetadata.getFromClusterState(state); if (healthMetadata == null || healthMetadata.getShardLimitsMetadata() == null) { + if (featureService.clusterHasFeature(state, HealthFeatures.SUPPORTS_SHARDS_CAPACITY_INDICATOR) == false) { + return createIndicator( + HealthStatus.GREEN, + "No shard limits configured yet. The cluster currently has mixed versions (an upgrade may be in progress).", + HealthIndicatorDetails.EMPTY, + List.of(), + List.of() + ); + } return unknownIndicator(); } diff --git a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java index 72345b7d860f6..da43e14a92772 100644 --- a/server/src/main/java/org/elasticsearch/node/NodeConstruction.java +++ b/server/src/main/java/org/elasticsearch/node/NodeConstruction.java @@ -1189,9 +1189,9 @@ private Module loadDiagnosticServices( var serverHealthIndicatorServices = Stream.of( new StableMasterHealthIndicatorService(coordinationDiagnosticsService, clusterService), - new RepositoryIntegrityHealthIndicatorService(clusterService), - new DiskHealthIndicatorService(clusterService), - new ShardsCapacityHealthIndicatorService(clusterService) + new RepositoryIntegrityHealthIndicatorService(clusterService, featureService), + new DiskHealthIndicatorService(clusterService, featureService), + new ShardsCapacityHealthIndicatorService(clusterService, featureService) ); var pluginHealthIndicatorServices = pluginsService.filterPlugins(HealthPlugin.class) .flatMap(plugin -> plugin.getHealthIndicatorServices().stream()); diff --git a/server/src/main/java/org/elasticsearch/snapshots/RepositoryIntegrityHealthIndicatorService.java b/server/src/main/java/org/elasticsearch/snapshots/RepositoryIntegrityHealthIndicatorService.java index 0b460b5cb2fb7..67afddcb70664 100644 --- a/server/src/main/java/org/elasticsearch/snapshots/RepositoryIntegrityHealthIndicatorService.java +++ b/server/src/main/java/org/elasticsearch/snapshots/RepositoryIntegrityHealthIndicatorService.java @@ -12,7 +12,9 @@ import org.elasticsearch.cluster.metadata.RepositoriesMetadata; import org.elasticsearch.cluster.metadata.RepositoryMetadata; import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.features.FeatureService; import org.elasticsearch.health.Diagnosis; +import org.elasticsearch.health.HealthFeatures; import org.elasticsearch.health.HealthIndicatorDetails; import org.elasticsearch.health.HealthIndicatorImpact; import org.elasticsearch.health.HealthIndicatorResult; @@ -59,6 +61,8 @@ public class RepositoryIntegrityHealthIndicatorService implements HealthIndicato public static final String NO_REPOS_CONFIGURED = "No snapshot repositories configured."; public static final String ALL_REPOS_HEALTHY = "All repositories are healthy."; public static final String NO_REPO_HEALTH_INFO = "No repository health info."; + public static final String MIXED_VERSIONS = + "No repository health info. The cluster currently has mixed versions (an upgrade may be in progress)."; public static final List IMPACTS = List.of( new HealthIndicatorImpact( @@ -95,9 +99,11 @@ public class RepositoryIntegrityHealthIndicatorService implements HealthIndicato ); private final ClusterService clusterService; + private final FeatureService featureService; - public RepositoryIntegrityHealthIndicatorService(ClusterService clusterService) { + public RepositoryIntegrityHealthIndicatorService(ClusterService clusterService, FeatureService featureService) { this.clusterService = clusterService; + this.featureService = featureService; } @Override @@ -128,7 +134,7 @@ public HealthIndicatorResult calculate(boolean verbose, int maxAffectedResources /** * Analyzer for the cluster's repositories health; aids in constructing a {@link HealthIndicatorResult}. */ - static class RepositoryHealthAnalyzer { + class RepositoryHealthAnalyzer { private final ClusterState clusterState; private final int totalRepositories; private final List corruptedRepositories; @@ -137,6 +143,7 @@ static class RepositoryHealthAnalyzer { private final Set invalidRepositories = new HashSet<>(); private final Set nodesWithInvalidRepos = new HashSet<>(); private final HealthStatus healthStatus; + private boolean clusterHasFeature = true; private RepositoryHealthAnalyzer( ClusterState clusterState, @@ -167,7 +174,15 @@ private RepositoryHealthAnalyzer( || invalidRepositories.isEmpty() == false) { healthStatus = YELLOW; } else if (repositoriesHealthByNode.isEmpty()) { - healthStatus = UNKNOWN; + clusterHasFeature = featureService.clusterHasFeature( + clusterState, + HealthFeatures.SUPPORTS_EXTENDED_REPOSITORY_INDICATOR + ) == false; + if (clusterHasFeature) { + healthStatus = GREEN; + } else { + healthStatus = UNKNOWN; + } } else { healthStatus = GREEN; } @@ -179,7 +194,7 @@ public HealthStatus getHealthStatus() { public String getSymptom() { if (healthStatus == GREEN) { - return ALL_REPOS_HEALTHY; + return clusterHasFeature ? ALL_REPOS_HEALTHY : MIXED_VERSIONS; } else if (healthStatus == UNKNOWN) { return NO_REPO_HEALTH_INFO; } diff --git a/server/src/test/java/org/elasticsearch/health/node/DiskHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/health/node/DiskHealthIndicatorServiceTests.java index a622c1ff600d6..0d38aaf5b3e4a 100644 --- a/server/src/test/java/org/elasticsearch/health/node/DiskHealthIndicatorServiceTests.java +++ b/server/src/test/java/org/elasticsearch/health/node/DiskHealthIndicatorServiceTests.java @@ -26,7 +26,9 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.features.FeatureService; import org.elasticsearch.health.Diagnosis; +import org.elasticsearch.health.HealthFeatures; import org.elasticsearch.health.HealthIndicatorImpact; import org.elasticsearch.health.HealthIndicatorResult; import org.elasticsearch.health.HealthStatus; @@ -39,6 +41,8 @@ import org.elasticsearch.xcontent.XContentFactory; import org.elasticsearch.xcontent.XContentParser; import org.elasticsearch.xcontent.XContentType; +import org.junit.Before; +import org.mockito.Mockito; import java.io.IOException; import java.util.Collection; @@ -66,6 +70,7 @@ import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.iterableWithSize; import static org.hamcrest.Matchers.startsWith; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -98,10 +103,20 @@ public class DiskHealthIndicatorServiceTests extends ESTestCase { DiscoveryNodeRole.TRANSFORM_ROLE ); + private FeatureService featureService; + + @Before + public void setUp() throws Exception { + super.setUp(); + + featureService = Mockito.mock(FeatureService.class); + Mockito.when(featureService.clusterHasFeature(any(), any())).thenReturn(true); + } + public void testServiceBasics() { Set discoveryNodes = createNodesWithAllRoles(); ClusterService clusterService = createClusterService(discoveryNodes, false); - DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService, featureService); { HealthStatus expectedStatus = HealthStatus.UNKNOWN; HealthInfo healthInfo = HealthInfo.EMPTY_HEALTH_INFO; @@ -125,7 +140,7 @@ public void testServiceBasics() { public void testIndicatorYieldsGreenWhenNodeHasUnknownStatus() { Set discoveryNodes = createNodesWithAllRoles(); ClusterService clusterService = createClusterService(discoveryNodes, false); - DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService, featureService); HealthStatus expectedStatus = HealthStatus.GREEN; HealthInfo healthInfo = createHealthInfoWithOneUnhealthyNode(HealthStatus.UNKNOWN, discoveryNodes); @@ -136,7 +151,7 @@ public void testIndicatorYieldsGreenWhenNodeHasUnknownStatus() { public void testGreen() throws IOException { Set discoveryNodes = createNodesWithAllRoles(); ClusterService clusterService = createClusterService(discoveryNodes, false); - DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService, featureService); HealthStatus expectedStatus = HealthStatus.GREEN; HealthInfo healthInfo = createHealthInfoWithOneUnhealthyNode(expectedStatus, discoveryNodes); HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); @@ -171,7 +186,7 @@ public void testYellowMixedNodes() throws IOException { final var clusterService = createClusterService(Set.of(), allNodes, indexNameToNodeIdsMap); HealthStatus expectedStatus = HealthStatus.YELLOW; HealthInfo healthInfo = createHealthInfo(new HealthInfoConfig(expectedStatus, allNodes.size(), allNodes)); - DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService, featureService); HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); assertThat(result.status(), equalTo(expectedStatus)); assertThat(result.symptom(), containsString("with roles: [data")); @@ -249,7 +264,7 @@ public void testRedNoBlockedIndicesAndRedAllRoleNodes() throws IOException { indexNameToNodeIdsMap.put(indexName, new HashSet<>(randomNonEmptySubsetOf(affectedNodeIds))); } ClusterService clusterService = createClusterService(Set.of(), discoveryNodes, indexNameToNodeIdsMap); - DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService, featureService); Map diskInfoByNode = new HashMap<>(); for (DiscoveryNode discoveryNode : discoveryNodes) { if (affectedNodeIds.contains(discoveryNode.getId())) { @@ -313,7 +328,7 @@ public void testRedNoBlockedIndicesAndRedAllRoleNodes() throws IOException { public void testRedWithBlockedIndicesAndGreenNodes() throws IOException { Set discoveryNodes = createNodesWithAllRoles(); ClusterService clusterService = createClusterService(discoveryNodes, true); - DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService, featureService); HealthStatus expectedStatus = HealthStatus.RED; HealthInfo healthInfo = createHealthInfoWithOneUnhealthyNode(HealthStatus.GREEN, discoveryNodes); @@ -358,7 +373,7 @@ public void testRedWithBlockedIndicesAndGreenNodes() throws IOException { public void testRedWithBlockedIndicesAndYellowNodes() throws IOException { Set discoveryNodes = createNodesWithAllRoles(); ClusterService clusterService = createClusterService(discoveryNodes, true); - DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService, featureService); HealthStatus expectedStatus = HealthStatus.RED; int numberOfYellowNodes = randomIntBetween(1, discoveryNodes.size()); HealthInfo healthInfo = createHealthInfo(new HealthInfoConfig(HealthStatus.YELLOW, numberOfYellowNodes, discoveryNodes)); @@ -437,7 +452,7 @@ public void testRedBlockedIndicesAndRedAllRolesNodes() throws IOException { } } ClusterService clusterService = createClusterService(blockedIndices, discoveryNodes, indexNameToNodeIdsMap); - DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService, featureService); HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); assertThat(result.status(), equalTo(expectedStatus)); assertThat( @@ -476,7 +491,7 @@ public void testRedNodesWithoutAnyBlockedIndices() throws IOException { indexNameToNodeIdsMap.put(indexName, nonRedNodeIds); } ClusterService clusterService = createClusterService(Set.of(), discoveryNodes, indexNameToNodeIdsMap); - DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService, featureService); HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); assertThat(result.status(), equalTo(expectedStatus)); assertThat(result.impacts().size(), equalTo(3)); @@ -512,7 +527,7 @@ public void testMissingHealthInfo() { Set discoveryNodesInClusterState = new HashSet<>(discoveryNodes); discoveryNodesInClusterState.add(DiscoveryNodeUtils.create(randomAlphaOfLength(30), UUID.randomUUID().toString())); ClusterService clusterService = createClusterService(discoveryNodesInClusterState, false); - DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService, featureService); { HealthInfo healthInfo = HealthInfo.EMPTY_HEALTH_INFO; HealthIndicatorResult result = diskHealthIndicatorService.calculate(true, healthInfo); @@ -544,7 +559,7 @@ public void testUnhealthyMasterNodes() { Set roles = Set.of(DiscoveryNodeRole.MASTER_ROLE, otherRole); Set discoveryNodes = createNodes(roles); ClusterService clusterService = createClusterService(discoveryNodes, false); - DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService, featureService); HealthStatus expectedStatus = randomFrom(HealthStatus.RED, HealthStatus.YELLOW); int numberOfProblemNodes = randomIntBetween(1, discoveryNodes.size()); HealthInfo healthInfo = createHealthInfo(new HealthInfoConfig(expectedStatus, numberOfProblemNodes, discoveryNodes)); @@ -599,7 +614,7 @@ public void testUnhealthyNonDataNonMasterNodes() { Set roles = new HashSet<>(randomNonEmptySubsetOf(OTHER_ROLES)); Set nodes = createNodes(roles); ClusterService clusterService = createClusterService(nodes, false); - DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService, featureService); HealthStatus expectedStatus = randomFrom(HealthStatus.RED, HealthStatus.YELLOW); int numberOfProblemNodes = randomIntBetween(1, nodes.size()); HealthInfo healthInfo = createHealthInfo(new HealthInfoConfig(expectedStatus, numberOfProblemNodes, nodes)); @@ -655,7 +670,7 @@ public void testBlockedIndexWithRedNonDataNodesAndYellowDataNodes() { Set masterNodes = createNodes(masterRole); Set otherNodes = createNodes(otherRoles); ClusterService clusterService = createClusterService(Sets.union(Sets.union(dataNodes, masterNodes), otherNodes), true); - DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService, featureService); int numberOfRedMasterNodes = randomIntBetween(1, masterNodes.size()); int numberOfRedOtherNodes = randomIntBetween(1, otherNodes.size()); int numberOfYellowDataNodes = randomIntBetween(1, dataNodes.size()); @@ -877,7 +892,7 @@ public void testLimitNumberOfAffectedResources() { Set masterNodes = createNodes(20, masterRole); Set otherNodes = createNodes(10, otherRoles); ClusterService clusterService = createClusterService(Sets.union(Sets.union(dataNodes, masterNodes), otherNodes), true); - DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService); + DiskHealthIndicatorService diskHealthIndicatorService = new DiskHealthIndicatorService(clusterService, featureService); int numberOfRedMasterNodes = masterNodes.size(); int numberOfRedOtherNodes = otherNodes.size(); int numberOfYellowDataNodes = dataNodes.size(); @@ -1055,9 +1070,11 @@ static ClusterState createClusterState( Collection nodes, Map> indexNameToNodeIdsMap ) { + Map> features = new HashMap<>(); DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(); for (DiscoveryNode node : nodes) { nodesBuilder = nodesBuilder.add(node); + features.put(node.getId(), Set.of(HealthFeatures.SUPPORTS_HEALTH.id())); } nodesBuilder.localNodeId(randomFrom(nodes).getId()); nodesBuilder.masterNodeId(randomFrom(nodes).getId()); @@ -1093,6 +1110,7 @@ static ClusterState createClusterState( state.metadata(metadata.generateClusterUuidIfNeeded().build()); state.routingTable(routingTable.build()); state.blocks(clusterBlocksBuilder); + state.nodeFeatures(features); return state.build(); } diff --git a/server/src/test/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceTests.java index f6e856079012d..c57f19999a915 100644 --- a/server/src/test/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceTests.java +++ b/server/src/test/java/org/elasticsearch/health/node/ShardsCapacityHealthIndicatorServiceTests.java @@ -19,6 +19,8 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.features.FeatureService; +import org.elasticsearch.health.HealthFeatures; import org.elasticsearch.health.HealthStatus; import org.elasticsearch.health.metadata.HealthMetadata; import org.elasticsearch.index.IndexVersion; @@ -36,6 +38,7 @@ import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; +import org.mockito.Mockito; import java.io.IOException; import java.util.List; @@ -60,6 +63,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.is; +import static org.mockito.ArgumentMatchers.any; public class ShardsCapacityHealthIndicatorServiceTests extends ESTestCase { @@ -68,6 +72,7 @@ public class ShardsCapacityHealthIndicatorServiceTests extends ESTestCase { private static ThreadPool threadPool; private ClusterService clusterService; + private FeatureService featureService; private DiscoveryNode dataNode; private DiscoveryNode frozenNode; @@ -86,6 +91,9 @@ public void setUp() throws Exception { .build(); clusterService = ClusterServiceUtils.createClusterService(threadPool); + + featureService = Mockito.mock(FeatureService.class); + Mockito.when(featureService.clusterHasFeature(any(), any())).thenReturn(true); } @After @@ -113,7 +121,7 @@ public void testNoShardsCapacityMetadata() throws IOException { createIndexInDataNode(100) ) ); - var target = new ShardsCapacityHealthIndicatorService(clusterService); + var target = new ShardsCapacityHealthIndicatorService(clusterService, featureService); var indicatorResult = target.calculate(true, HealthInfo.EMPTY_HEALTH_INFO); assertEquals(indicatorResult.status(), HealthStatus.UNKNOWN); @@ -127,7 +135,10 @@ public void testIndicatorYieldsGreenInCaseThereIsRoom() throws IOException { int maxShardsPerNode = randomValidMaxShards(); int maxShardsPerNodeFrozen = randomValidMaxShards(); var clusterService = createClusterService(maxShardsPerNode, maxShardsPerNodeFrozen, createIndexInDataNode(maxShardsPerNode / 4)); - var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService, featureService).calculate( + true, + HealthInfo.EMPTY_HEALTH_INFO + ); assertEquals(indicatorResult.status(), HealthStatus.GREEN); assertTrue(indicatorResult.impacts().isEmpty()); @@ -151,7 +162,10 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep // Only data_nodes does not have enough space int maxShardsPerNodeFrozen = randomValidMaxShards(); var clusterService = createClusterService(25, maxShardsPerNodeFrozen, createIndexInDataNode(4)); - var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService, featureService).calculate( + true, + HealthInfo.EMPTY_HEALTH_INFO + ); assertEquals(indicatorResult.status(), YELLOW); assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the configured maximum number of shards for data nodes."); @@ -174,7 +188,10 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep // Only frozen_nodes does not have enough space int maxShardsPerNode = randomValidMaxShards(); var clusterService = createClusterService(maxShardsPerNode, 25, createIndexInFrozenNode(4)); - var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService, featureService).calculate( + true, + HealthInfo.EMPTY_HEALTH_INFO + ); assertEquals(indicatorResult.status(), YELLOW); assertEquals( @@ -199,7 +216,10 @@ public void testIndicatorYieldsYellowInCaseThereIsNotEnoughRoom() throws IOExcep { // Both data and frozen nodes does not have enough space var clusterService = createClusterService(25, 25, createIndexInDataNode(4), createIndexInFrozenNode(4)); - var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService, featureService).calculate( + true, + HealthInfo.EMPTY_HEALTH_INFO + ); assertEquals(indicatorResult.status(), YELLOW); assertEquals( @@ -230,7 +250,10 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio // Only data_nodes does not have enough space int maxShardsPerNodeFrozen = randomValidMaxShards(); var clusterService = createClusterService(25, maxShardsPerNodeFrozen, createIndexInDataNode(11)); - var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService, featureService).calculate( + true, + HealthInfo.EMPTY_HEALTH_INFO + ); assertEquals(indicatorResult.status(), RED); assertEquals(indicatorResult.symptom(), "Cluster is close to reaching the configured maximum number of shards for data nodes."); @@ -253,7 +276,10 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio // Only frozen_nodes does not have enough space int maxShardsPerNode = randomValidMaxShards(); var clusterService = createClusterService(maxShardsPerNode, 25, createIndexInFrozenNode(11)); - var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService, featureService).calculate( + true, + HealthInfo.EMPTY_HEALTH_INFO + ); assertEquals(indicatorResult.status(), RED); assertEquals( @@ -278,7 +304,10 @@ public void testIndicatorYieldsRedInCaseThereIsNotEnoughRoom() throws IOExceptio { // Both data and frozen nodes does not have enough space var clusterService = createClusterService(25, 25, createIndexInDataNode(11), createIndexInFrozenNode(11)); - var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService).calculate(true, HealthInfo.EMPTY_HEALTH_INFO); + var indicatorResult = new ShardsCapacityHealthIndicatorService(clusterService, featureService).calculate( + true, + HealthInfo.EMPTY_HEALTH_INFO + ); assertEquals(indicatorResult.status(), RED); assertEquals( @@ -397,7 +426,11 @@ private ClusterState createClusterState( metadata.put(idxMetadata); } - return ClusterState.builder(clusterState).metadata(metadata).build(); + var features = Set.of(HealthFeatures.SUPPORTS_SHARDS_CAPACITY_INDICATOR.id()); + return ClusterState.builder(clusterState) + .metadata(metadata) + .nodeFeatures(Map.of(dataNode.getId(), features, frozenNode.getId(), features)) + .build(); } private static IndexMetadata.Builder createIndexInDataNode(int shards) { diff --git a/server/src/test/java/org/elasticsearch/snapshots/RepositoryIntegrityHealthIndicatorServiceTests.java b/server/src/test/java/org/elasticsearch/snapshots/RepositoryIntegrityHealthIndicatorServiceTests.java index 0dfe27ee6dc50..572375d64d8b8 100644 --- a/server/src/test/java/org/elasticsearch/snapshots/RepositoryIntegrityHealthIndicatorServiceTests.java +++ b/server/src/test/java/org/elasticsearch/snapshots/RepositoryIntegrityHealthIndicatorServiceTests.java @@ -18,8 +18,10 @@ import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.features.FeatureService; import org.elasticsearch.health.Diagnosis; import org.elasticsearch.health.Diagnosis.Resource.Type; +import org.elasticsearch.health.HealthFeatures; import org.elasticsearch.health.HealthIndicatorDetails; import org.elasticsearch.health.HealthIndicatorResult; import org.elasticsearch.health.SimpleHealthIndicatorDetails; @@ -27,12 +29,14 @@ import org.elasticsearch.health.node.RepositoriesHealthInfo; import org.elasticsearch.test.ESTestCase; import org.junit.Before; +import org.mockito.Mockito; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.stream.Stream; import static org.elasticsearch.cluster.node.DiscoveryNode.DISCOVERY_NODE_COMPARATOR; @@ -47,6 +51,7 @@ import static org.elasticsearch.snapshots.RepositoryIntegrityHealthIndicatorService.NAME; import static org.elasticsearch.snapshots.RepositoryIntegrityHealthIndicatorService.UNKNOWN_DEFINITION; import static org.hamcrest.Matchers.equalTo; +import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -55,6 +60,7 @@ public class RepositoryIntegrityHealthIndicatorServiceTests extends ESTestCase { private DiscoveryNode node1; private DiscoveryNode node2; private HealthInfo healthInfo; + private FeatureService featureService; @Before public void setUp() throws Exception { @@ -74,6 +80,9 @@ public void setUp() throws Exception { ) ) ); + + featureService = Mockito.mock(FeatureService.class); + Mockito.when(featureService.clusterHasFeature(any(), any())).thenReturn(true); } public void testIsGreenWhenAllRepositoriesAreHealthy() { @@ -349,11 +358,13 @@ public void testMappedFieldsForTelemetry() { } private ClusterState createClusterStateWith(RepositoriesMetadata metadata) { - var builder = ClusterState.builder(new ClusterName("test-cluster")); + var features = Set.of(HealthFeatures.SUPPORTS_EXTENDED_REPOSITORY_INDICATOR.id()); + var builder = ClusterState.builder(new ClusterName("test-cluster")) + .nodes(DiscoveryNodes.builder().add(node1).add(node2).build()) + .nodeFeatures(Map.of(node1.getId(), features, node2.getId(), features)); if (metadata != null) { builder.metadata(Metadata.builder().putCustom(RepositoriesMetadata.TYPE, metadata)); } - builder.nodes(DiscoveryNodes.builder().add(node1).add(node2).build()); return builder.build(); } @@ -361,10 +372,10 @@ private static RepositoryMetadata createRepositoryMetadata(String name, boolean return new RepositoryMetadata(name, "uuid", "s3", Settings.EMPTY, corrupted ? CORRUPTED_REPO_GEN : EMPTY_REPO_GEN, EMPTY_REPO_GEN); } - private static RepositoryIntegrityHealthIndicatorService createRepositoryIntegrityHealthIndicatorService(ClusterState clusterState) { + private RepositoryIntegrityHealthIndicatorService createRepositoryIntegrityHealthIndicatorService(ClusterState clusterState) { var clusterService = mock(ClusterService.class); when(clusterService.state()).thenReturn(clusterState); - return new RepositoryIntegrityHealthIndicatorService(clusterService); + return new RepositoryIntegrityHealthIndicatorService(clusterService, featureService); } private SimpleHealthIndicatorDetails createDetails(int total, int corruptedCount, List corrupted, int unknown, int invalid) {