From 5fadfb73da2092ff749d86ee9878cd9778f62f94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Jamr=C3=B3z?= <79092062+k-jamroz@users.noreply.github.com> Date: Thu, 14 Mar 2024 12:47:40 +0100 Subject: [PATCH] Make AddIndexBackupOperation allowed in passive state [HZ-4582] [HZ-4572] (#1076) `AddIndexBackupOperation` should be allowed in passive state for the same reason why `AddIndexOperation` is. The issue became visible after improving assertions in https://github.com/hazelcast/hazelcast-mono/pull/1009, before it just failed (with warnings in logs) during proxy initialisation. This is a missing part of https://github.com/hazelcast/hazelcast-mono/pull/596. This PR fixes useless warnings `java.lang.IllegalStateException: Cluster is in PASSIVE state! Operation: com.hazelcast.map.impl.operation.AddIndexBackupOperation`. Map proxy initialization on primary does not fail - backup operation errors are ignored, so indexes should be fine. The only cases where it might matter are: 1. for nodes that do not own any partitions but that is very unlikely (see comment in `AddIndexBackupOperation.runInternal`). 2. if the members are restarted quickly one after another. Backup operation has 5s timeout and in case of backup operation error no response is returned. This may cause MapProxy not being initialized yet when another member joins, so it will not be sent in `PostJoinProxyOperation`. There is a discrepancy in handling `Backup` which contains not-`AllowedDuringPassiveState` operation in PASSIVE state between normal execution (allowed! - does not check nested operation) and offloaded execution (fails with `IllegalStateException` - steps engine checks preconditions for nested operation). That explains the difference between force-offload and normal behavior. This might be fixed separately as it is error prone, but generally our operations should be reasonably defined and not cause problems. Fixes HZ-4582, HZ-4572 Fixes https://github.com/hazelcast/hazelcast-enterprise/issues/7076 Fixes https://github.com/hazelcast/hazelcast-enterprise/issues/7075 GitOrigin-RevId: ef1eec30f3a78f07c0c1e38ae6fbffb482a6a9f4 --- .../map/impl/operation/AddIndexBackupOperation.java | 6 +++++- .../impl/operationservice/impl/OperationRunnerImpl.java | 3 --- .../com/hazelcast/client/map/ClientIndexStatsTest.java | 7 +++++++ 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/hazelcast/src/main/java/com/hazelcast/map/impl/operation/AddIndexBackupOperation.java b/hazelcast/src/main/java/com/hazelcast/map/impl/operation/AddIndexBackupOperation.java index 2d6444e27cf1..c9e07a887b02 100644 --- a/hazelcast/src/main/java/com/hazelcast/map/impl/operation/AddIndexBackupOperation.java +++ b/hazelcast/src/main/java/com/hazelcast/map/impl/operation/AddIndexBackupOperation.java @@ -22,11 +22,15 @@ import com.hazelcast.nio.ObjectDataInput; import com.hazelcast.nio.ObjectDataOutput; import com.hazelcast.query.impl.IndexRegistry; +import com.hazelcast.spi.impl.AllowedDuringPassiveState; import com.hazelcast.spi.impl.operationservice.BackupOperation; import java.io.IOException; -public class AddIndexBackupOperation extends MapOperation implements BackupOperation { +public class AddIndexBackupOperation extends MapOperation implements BackupOperation, + // AddIndexBackupOperation is used when map proxy for IMap with indexes is initialized during passive state + // (eg. IMap is read for the first time after HotRestart recovery when the cluster is still in PASSIVE state) + AllowedDuringPassiveState { private IndexConfig config; diff --git a/hazelcast/src/main/java/com/hazelcast/spi/impl/operationservice/impl/OperationRunnerImpl.java b/hazelcast/src/main/java/com/hazelcast/spi/impl/operationservice/impl/OperationRunnerImpl.java index 78716ed9b1a9..961ca82f7208 100644 --- a/hazelcast/src/main/java/com/hazelcast/spi/impl/operationservice/impl/OperationRunnerImpl.java +++ b/hazelcast/src/main/java/com/hazelcast/spi/impl/operationservice/impl/OperationRunnerImpl.java @@ -244,9 +244,6 @@ public void ensureNodeAndClusterHealth(Operation op) { * @param op the operation to execute * @param startNanos the time, as returned by {@link System#nanoTime} when this operation * started execution - * @return {@code true} if this operation was not executed and should be retried at a later time, - * {@code false} if the operation should not be retried, either because it - * timed out or has run successfully */ protected void run(Operation op, long startNanos) { executedOperationsCounter.inc(); diff --git a/hazelcast/src/test/java/com/hazelcast/client/map/ClientIndexStatsTest.java b/hazelcast/src/test/java/com/hazelcast/client/map/ClientIndexStatsTest.java index 3ba7631d05bf..ffdeb5902436 100644 --- a/hazelcast/src/test/java/com/hazelcast/client/map/ClientIndexStatsTest.java +++ b/hazelcast/src/test/java/com/hazelcast/client/map/ClientIndexStatsTest.java @@ -223,7 +223,14 @@ private void restartCluster(boolean graceful, ClusterState restartInState, Strin warmUpPartitions(member1, member2); member1.getCluster().changeClusterState(restartInState); member1 = restartMember(member1, member2, graceful); + + // do not restart the other member until proxy is fully initialized on first member + // so the proxy can be included in PostJoinProxyOperation. Proxy initialization may take + // some time (especially when backup operations fail) and is async process. + assertMapProxyInitializedEventually(indexMapName); + member2 = restartMember(member2, member1, graceful); + member1.getCluster().changeClusterState(ClusterState.ACTIVE); // Wait for proxy initialization by post join operation to avoid race condition.