From 5fadfb73da2092ff749d86ee9878cd9778f62f94 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Krzysztof=20Jamr=C3=B3z?=
 <79092062+k-jamroz@users.noreply.github.com>
Date: Thu, 14 Mar 2024 12:47:40 +0100
Subject: [PATCH] Make AddIndexBackupOperation allowed in passive state
 [HZ-4582] [HZ-4572] (#1076)

`AddIndexBackupOperation` should be allowed in passive state for the
same reason why `AddIndexOperation` is.

The issue became visible after improving assertions in
https://github.com/hazelcast/hazelcast-mono/pull/1009, before it just
failed (with warnings in logs) during proxy initialisation. This is a
missing part of https://github.com/hazelcast/hazelcast-mono/pull/596.

This PR fixes useless warnings `java.lang.IllegalStateException: Cluster
is in PASSIVE state! Operation:
com.hazelcast.map.impl.operation.AddIndexBackupOperation`. Map proxy
initialization on primary does not fail - backup operation errors are
ignored, so indexes should be fine. The only cases where it might matter
are:

1. for nodes that do not own any partitions but that is very unlikely
(see comment in `AddIndexBackupOperation.runInternal`).
2. if the members are restarted quickly one after another. Backup
operation has 5s timeout and in case of backup operation error no
response is returned. This may cause MapProxy not being initialized yet
when another member joins, so it will not be sent in
`PostJoinProxyOperation`.

There is a discrepancy in handling `Backup` which contains
not-`AllowedDuringPassiveState` operation in PASSIVE state between
normal execution (allowed! - does not check nested operation) and
offloaded execution (fails with `IllegalStateException` - steps engine
checks preconditions for nested operation). That explains the difference
between force-offload and normal behavior. This might be fixed
separately as it is error prone, but generally our operations should be
reasonably defined and not cause problems.

Fixes HZ-4582, HZ-4572
Fixes https://github.com/hazelcast/hazelcast-enterprise/issues/7076
Fixes https://github.com/hazelcast/hazelcast-enterprise/issues/7075

GitOrigin-RevId: ef1eec30f3a78f07c0c1e38ae6fbffb482a6a9f4
---
 .../map/impl/operation/AddIndexBackupOperation.java        | 6 +++++-
 .../impl/operationservice/impl/OperationRunnerImpl.java    | 3 ---
 .../com/hazelcast/client/map/ClientIndexStatsTest.java     | 7 +++++++
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/hazelcast/src/main/java/com/hazelcast/map/impl/operation/AddIndexBackupOperation.java b/hazelcast/src/main/java/com/hazelcast/map/impl/operation/AddIndexBackupOperation.java
index 2d6444e27cf1..c9e07a887b02 100644
--- a/hazelcast/src/main/java/com/hazelcast/map/impl/operation/AddIndexBackupOperation.java
+++ b/hazelcast/src/main/java/com/hazelcast/map/impl/operation/AddIndexBackupOperation.java
@@ -22,11 +22,15 @@
 import com.hazelcast.nio.ObjectDataInput;
 import com.hazelcast.nio.ObjectDataOutput;
 import com.hazelcast.query.impl.IndexRegistry;
+import com.hazelcast.spi.impl.AllowedDuringPassiveState;
 import com.hazelcast.spi.impl.operationservice.BackupOperation;
 
 import java.io.IOException;
 
-public class AddIndexBackupOperation extends MapOperation implements BackupOperation {
+public class AddIndexBackupOperation extends MapOperation implements BackupOperation,
+        // AddIndexBackupOperation is used when map proxy for IMap with indexes is initialized during passive state
+        // (eg. IMap is read for the first time after HotRestart recovery when the cluster is still in PASSIVE state)
+        AllowedDuringPassiveState {
 
     private IndexConfig config;
 
diff --git a/hazelcast/src/main/java/com/hazelcast/spi/impl/operationservice/impl/OperationRunnerImpl.java b/hazelcast/src/main/java/com/hazelcast/spi/impl/operationservice/impl/OperationRunnerImpl.java
index 78716ed9b1a9..961ca82f7208 100644
--- a/hazelcast/src/main/java/com/hazelcast/spi/impl/operationservice/impl/OperationRunnerImpl.java
+++ b/hazelcast/src/main/java/com/hazelcast/spi/impl/operationservice/impl/OperationRunnerImpl.java
@@ -244,9 +244,6 @@ public void ensureNodeAndClusterHealth(Operation op) {
      * @param op         the operation to execute
      * @param startNanos the time, as returned by {@link System#nanoTime} when this operation
      *                   started execution
-     * @return {@code true} if this operation was not executed and should be retried at a later time,
-     * {@code false} if the operation should not be retried, either because it
-     * timed out or has run successfully
      */
     protected void run(Operation op, long startNanos) {
         executedOperationsCounter.inc();
diff --git a/hazelcast/src/test/java/com/hazelcast/client/map/ClientIndexStatsTest.java b/hazelcast/src/test/java/com/hazelcast/client/map/ClientIndexStatsTest.java
index 3ba7631d05bf..ffdeb5902436 100644
--- a/hazelcast/src/test/java/com/hazelcast/client/map/ClientIndexStatsTest.java
+++ b/hazelcast/src/test/java/com/hazelcast/client/map/ClientIndexStatsTest.java
@@ -223,7 +223,14 @@ private void restartCluster(boolean graceful, ClusterState restartInState, Strin
         warmUpPartitions(member1, member2);
         member1.getCluster().changeClusterState(restartInState);
         member1 = restartMember(member1, member2, graceful);
+
+        // do not restart the other member until proxy is fully initialized on first member
+        // so the proxy can be included in PostJoinProxyOperation. Proxy initialization may take
+        // some time (especially when backup operations fail) and is async process.
+        assertMapProxyInitializedEventually(indexMapName);
+
         member2 = restartMember(member2, member1, graceful);
+
         member1.getCluster().changeClusterState(ClusterState.ACTIVE);
 
         // Wait for proxy initialization by post join operation to avoid race condition.