From 1122325fbc5fd821ec25159ca128fb3572aec841 Mon Sep 17 00:00:00 2001 From: Victor Cavichioli <79488234+VictorCavichioli@users.noreply.github.com> Date: Thu, 17 Oct 2024 07:58:27 -0300 Subject: [PATCH] Implement RepairScheduler, Schedule Manager and Related Things (#742) * Introduce RepairScheduler and SchedulerManager with Incremental Repair * Fix PMD Violations and Rebase Change * Removing test that requires lock to pass * Fix Review Comments --- CHANGES.md | 7 + .../ecchronos/application/config/Config.java | 17 + .../config/connection/ConnectionConfig.java | 23 - .../DistributedNativeConnection.java | 28 +- .../config/repair/RepairConfig.java | 2 +- .../config/runpolicy/RunPolicyConfig.java | 34 ++ .../config/runpolicy/TimeBasedConfig.java | 34 ++ .../config/runpolicy/package-info.java | 18 + .../AgentNativeConnectionProvider.java | 8 +- .../application/spring/BeanConfigurator.java | 31 +- .../application/spring/ECChronos.java | 117 ++++ .../spring/ECChronosInternals.java | 180 ++++++ .../spring/RetrySchedulerService.java | 19 +- application/src/main/resources/ecc.yml | 7 + .../application/config/TestConfig.java | 4 +- .../application/config/TestDefaultConfig.java | 2 +- .../config/repair/TestRepairSchedule.java | 2 +- .../spring/RetrySchedulerServiceTest.java | 2 +- application/src/test/resources/all_set.yml | 6 +- .../DistributedJmxConnectionProviderImpl.java | 4 +- core.impl/pom.xml | 4 + .../impl/logging/ThrottledLogMessage.java | 109 ++++ .../core/impl/logging/ThrottlingLogger.java | 99 +++ .../core/impl/logging/package-info.java | 18 + .../core/impl/metrics/CassandraMetrics.java | 170 ++++++ .../core/impl/metrics/package-info.java | 18 + .../DefaultRepairConfigurationProvider.java | 578 ++++++++++++++++++ .../core/impl/repair/RepairGroup.java | 259 ++++++++ .../core/impl/repair/RepairTask.java | 471 ++++++++++++++ .../incremental/IncrementalRepairJob.java | 338 ++++++++++ .../incremental/IncrementalRepairTask.java | 96 +++ .../impl/repair/incremental/package-info.java | 18 + .../core/impl/repair/package-info.java | 18 + .../scheduler/DefaultJobComparator.java | 43 ++ .../repair/scheduler/RepairSchedulerImpl.java | 401 ++++++++++++ .../repair/scheduler/ScheduleManagerImpl.java | 358 +++++++++++ .../repair/scheduler/ScheduledJobQueue.java | 163 +++++ .../impl/repair/scheduler/package-info.java | 18 + .../state/ReplicationStateImpl.java | 2 +- .../impl/{ => repair}/state/package-info.java | 2 +- .../table/ReplicatedTableProviderImpl.java | 169 +++++ .../core/impl/table/TimeBasedRunPolicy.java | 397 ++++++++++++ .../core/impl/repair/TestRepairGroup.java | 213 +++++++ .../incremental/TestIncrementalRepairJob.java | 305 +++++++++ .../core/impl/repair/scheduler/DummyJob.java | 72 +++ .../scheduler/TestRepairSchedulerImpl.java | 283 +++++++++ .../repair/scheduler/TestScheduleManager.java | 342 +++++++++++ .../scheduler/TestScheduledJobQueue.java | 192 ++++++ .../impl/state/TestReplicationStateImpl.java | 1 + .../repair/config/RepairConfiguration.java | 34 ++ .../ecchronos/core/repair/package-info.java | 2 +- .../repair/scheduler/RepairScheduler.java | 63 ++ .../core/repair/scheduler/RunPolicy.java | 27 + .../repair/scheduler/ScheduleManager.java | 46 ++ .../core/repair/scheduler/ScheduledJob.java | 378 ++++++++++++ .../repair/scheduler/ScheduledRepairJob.java | 139 +++++ .../scheduler/ScheduledRepairJobView.java | 200 ++++++ .../core/repair/scheduler/ScheduledTask.java | 57 ++ .../core/repair/scheduler/package-info.java | 18 + .../core/state/RepairStateSnapshot.java | 187 ++++++ .../core/state/ReplicaRepairGroup.java | 102 ++++ .../core/state/VnodeRepairState.java | 224 +++++++ .../core/state/VnodeRepairStateUtils.java | 100 +++ .../core/state/VnodeRepairStates.java | 71 +++ .../core/table/ReplicatedTableProvider.java | 39 ++ .../core/table/TableRepairMetrics.java | 59 ++ .../core/table/TableRepairPolicy.java | 29 + .../core/state/TestRepairStateSnapshot.java | 79 +++ .../core/state/TestReplicaRepairGroup.java | 67 ++ .../core/state/TestVnodeRepairState.java | 94 +++ utils/pom.xml | 6 + .../utils/converter/ManyToOneIterator.java | 66 ++ .../{enums => }/converter/UnitConverter.java | 2 +- .../{enums => }/converter/package-info.java | 2 +- .../utils/enums/repair/RepairStatus.java | 3 + .../exceptions/ScheduledJobException.java | 39 ++ 76 files changed, 7769 insertions(+), 66 deletions(-) create mode 100644 application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/runpolicy/RunPolicyConfig.java create mode 100644 application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/runpolicy/TimeBasedConfig.java create mode 100644 application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/runpolicy/package-info.java create mode 100644 application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/ECChronos.java create mode 100644 application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/ECChronosInternals.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/logging/ThrottledLogMessage.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/logging/ThrottlingLogger.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/logging/package-info.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/metrics/CassandraMetrics.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/metrics/package-info.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/DefaultRepairConfigurationProvider.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/RepairGroup.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/RepairTask.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/IncrementalRepairJob.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/IncrementalRepairTask.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/package-info.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/package-info.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/DefaultJobComparator.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/RepairSchedulerImpl.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/ScheduleManagerImpl.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/ScheduledJobQueue.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/package-info.java rename core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/{ => repair}/state/ReplicationStateImpl.java (99%) rename core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/{ => repair}/state/package-info.java (90%) create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/table/ReplicatedTableProviderImpl.java create mode 100644 core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/table/TimeBasedRunPolicy.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/TestRepairGroup.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/TestIncrementalRepairJob.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/DummyJob.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/TestRepairSchedulerImpl.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/TestScheduleManager.java create mode 100644 core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/TestScheduledJobQueue.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/RepairScheduler.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/RunPolicy.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduleManager.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduledJob.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduledRepairJob.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduledRepairJobView.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduledTask.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/package-info.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairStateSnapshot.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/ReplicaRepairGroup.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/VnodeRepairState.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/VnodeRepairStateUtils.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/VnodeRepairStates.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/table/ReplicatedTableProvider.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/table/TableRepairMetrics.java create mode 100644 core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/table/TableRepairPolicy.java create mode 100644 core/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/state/TestRepairStateSnapshot.java create mode 100644 core/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/state/TestReplicaRepairGroup.java create mode 100644 core/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/state/TestVnodeRepairState.java create mode 100644 utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/converter/ManyToOneIterator.java rename utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/{enums => }/converter/UnitConverter.java (96%) rename utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/{enums => }/converter/package-info.java (90%) create mode 100644 utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/exceptions/ScheduledJobException.java diff --git a/CHANGES.md b/CHANGES.md index f0e679e2f..fb1d475b7 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,13 @@ ## Version 1.0.0 (Not yet Released) +* Investigate Creation of RepairScheduler and ScheduleManager #714 +* Implement ScheduledJobQueue for Prioritized Job Management and Execution - Issue #740 +* Implement RepairGroup Class for Managing and Executing Repair Tasks - Issue #738 +* Create IncrementalRepairTask Class - Issue #736 +* Implement ScheduledRepairJob, ScheduledJob and ScheduledTask for Automated Recurring Task Scheduling in Cassandra - Issue #737 +* Create RepairTask Abstract Class to Handle Repair Operations - Issue #717 +* Create ReplicationState and ReplicationStateImpl Class for Managing Token-to-Replicas Mapping - Issue #722 * Create a RepairHistory to Store Information on Repair Operations Performed by ecChronos Agent #730 * Generate Unique EcChronos ID #678 * Create RepairConfiguration class for repair configurations - Issue #716 diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/Config.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/Config.java index b26c8798f..7cc16003a 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/Config.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/Config.java @@ -17,6 +17,7 @@ import com.ericsson.bss.cassandra.ecchronos.application.config.connection.ConnectionConfig; import com.ericsson.bss.cassandra.ecchronos.application.config.repair.GlobalRepairConfig; import com.ericsson.bss.cassandra.ecchronos.application.config.rest.RestServerConfig; +import com.ericsson.bss.cassandra.ecchronos.application.config.runpolicy.RunPolicyConfig; import com.ericsson.bss.cassandra.ecchronos.application.config.scheduler.SchedulerConfig; import com.fasterxml.jackson.annotation.JsonProperty; @@ -24,6 +25,7 @@ public class Config { private ConnectionConfig myConnectionConfig = new ConnectionConfig(); private GlobalRepairConfig myRepairConfig = new GlobalRepairConfig(); + private RunPolicyConfig myRunPolicyConfig = new RunPolicyConfig(); private SchedulerConfig mySchedulerConfig = new SchedulerConfig(); private RestServerConfig myRestServerConfig = new RestServerConfig(); @@ -68,6 +70,21 @@ public void setRepairConfig(final GlobalRepairConfig repairConfig) } } + @JsonProperty("run_policy") + public final RunPolicyConfig getRunPolicy() + { + return myRunPolicyConfig; + } + + @JsonProperty("run_policy") + public final void setRunPolicyConfig(final RunPolicyConfig runPolicyConfig) + { + if (runPolicyConfig != null) + { + myRunPolicyConfig = runPolicyConfig; + } + } + @JsonProperty("scheduler") public final SchedulerConfig getSchedulerConfig() { diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/ConnectionConfig.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/ConnectionConfig.java index 942f412c9..277676a94 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/ConnectionConfig.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/ConnectionConfig.java @@ -14,14 +14,12 @@ */ package com.ericsson.bss.cassandra.ecchronos.application.config.connection; -import com.ericsson.bss.cassandra.ecchronos.application.config.repair.Interval; import com.fasterxml.jackson.annotation.JsonProperty; public class ConnectionConfig { private DistributedNativeConnection myCqlConnection = new DistributedNativeConnection(); private DistributedJmxConnection myJmxConnection = new DistributedJmxConnection(); - private Interval myConnectionDelay = new Interval(); @JsonProperty("cql") public final DistributedNativeConnection getCqlConnection() @@ -58,26 +56,5 @@ public final String toString() { return String.format("Connection(cql=%s, jmx=%s)", myCqlConnection, myJmxConnection); } - /** - * Sets the connectionDelay used to specify the time until the next connection. - * - * @param connectionDelay - * the local datacenter to set. - */ - @JsonProperty("connectionDelay") - public void setConnectionDelay(final Interval connectionDelay) - { - myConnectionDelay = connectionDelay; - } - /** - * Gets the connectionDelay used to specify the time until the next connection. - * - * @return the connectionDelay. - */ - @JsonProperty("connectionDelay") - public Interval getConnectionDelay() - { - return myConnectionDelay; - } } diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/DistributedNativeConnection.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/DistributedNativeConnection.java index ecc7e0049..ca9a453b4 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/DistributedNativeConnection.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/connection/DistributedNativeConnection.java @@ -15,9 +15,11 @@ package com.ericsson.bss.cassandra.ecchronos.application.config.connection; import com.ericsson.bss.cassandra.ecchronos.application.config.Config; +import com.ericsson.bss.cassandra.ecchronos.application.config.repair.Interval; import com.ericsson.bss.cassandra.ecchronos.application.providers.AgentNativeConnectionProvider; import com.ericsson.bss.cassandra.ecchronos.connection.CertificateHandler; import com.ericsson.bss.cassandra.ecchronos.connection.DistributedNativeConnectionProvider; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.DefaultRepairConfigurationProvider; import com.fasterxml.jackson.annotation.JsonProperty; import java.util.function.Supplier; @@ -25,6 +27,7 @@ public class DistributedNativeConnection extends Connection { private AgentConnectionConfig myAgentConnectionConfig = new AgentConnectionConfig(); + private Interval myConnectionDelay = new Interval(); public DistributedNativeConnection() { @@ -50,6 +53,28 @@ public final void setAgentConnectionConfig(final AgentConnectionConfig agentConn myAgentConnectionConfig = agentConnectionConfig; } + /** + * Sets the connectionDelay used to specify the time until the next connection. + * + * @param connectionDelay + * the local datacenter to set. + */ + @JsonProperty("connectionDelay") + public void setConnectionDelay(final Interval connectionDelay) + { + myConnectionDelay = connectionDelay; + } + /** + * Gets the connectionDelay used to specify the time until the next connection. + * + * @return the connectionDelay. + */ + @JsonProperty("connectionDelay") + public Interval getConnectionDelay() + { + return myConnectionDelay; + } + /** * @return Class[] */ @@ -60,7 +85,8 @@ protected Class[] expectedConstructor() { Config.class, Supplier.class, - CertificateHandler.class + CertificateHandler.class, + DefaultRepairConfigurationProvider.class, }; } } diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/repair/RepairConfig.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/repair/RepairConfig.java index e6e51c04c..d0f12d80a 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/repair/RepairConfig.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/repair/RepairConfig.java @@ -15,7 +15,7 @@ package com.ericsson.bss.cassandra.ecchronos.application.config.repair; import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; -import com.ericsson.bss.cassandra.ecchronos.utils.enums.converter.UnitConverter; +import com.ericsson.bss.cassandra.ecchronos.utils.converter.UnitConverter; import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairType; import java.util.Locale; import java.util.concurrent.TimeUnit; diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/runpolicy/RunPolicyConfig.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/runpolicy/RunPolicyConfig.java new file mode 100644 index 000000000..18275fb9c --- /dev/null +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/runpolicy/RunPolicyConfig.java @@ -0,0 +1,34 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.application.config.runpolicy; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class RunPolicyConfig +{ + private TimeBasedConfig myTimeBasedConfig = new TimeBasedConfig(); + + @JsonProperty("time_based") + public final TimeBasedConfig getTimeBasedConfig() + { + return myTimeBasedConfig; + } + + @JsonProperty("time_based") + public final void setTimeBasedConfig(final TimeBasedConfig timeBasedConfig) + { + myTimeBasedConfig = timeBasedConfig; + } +} diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/runpolicy/TimeBasedConfig.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/runpolicy/TimeBasedConfig.java new file mode 100644 index 000000000..98c79333d --- /dev/null +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/runpolicy/TimeBasedConfig.java @@ -0,0 +1,34 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.application.config.runpolicy; + +import com.fasterxml.jackson.annotation.JsonProperty; + +public class TimeBasedConfig +{ + private String myKeyspaceName = "ecchronos"; + + @JsonProperty("keyspace") + public final String getKeyspaceName() + { + return myKeyspaceName; + } + + @JsonProperty("keyspace") + public final void setKeyspaceName(final String keyspaceName) + { + myKeyspaceName = keyspaceName; + } +} diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/runpolicy/package-info.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/runpolicy/package-info.java new file mode 100644 index 000000000..29ad7507c --- /dev/null +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/config/runpolicy/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Contains configurations related to run policy. + */ +package com.ericsson.bss.cassandra.ecchronos.application.config.runpolicy; diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/providers/AgentNativeConnectionProvider.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/providers/AgentNativeConnectionProvider.java index b124ff592..958056996 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/providers/AgentNativeConnectionProvider.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/providers/AgentNativeConnectionProvider.java @@ -27,6 +27,7 @@ import com.ericsson.bss.cassandra.ecchronos.connection.DistributedNativeConnectionProvider; import com.ericsson.bss.cassandra.ecchronos.connection.impl.builders.DistributedNativeBuilder; import com.ericsson.bss.cassandra.ecchronos.connection.impl.providers.DistributedNativeConnectionProviderImpl; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.DefaultRepairConfigurationProvider; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -63,7 +64,8 @@ public class AgentNativeConnectionProvider implements DistributedNativeConnectio public AgentNativeConnectionProvider( final Config config, final Supplier cqlSecuritySupplier, - final CertificateHandler certificateHandler + final CertificateHandler certificateHandler, + final DefaultRepairConfigurationProvider defaultRepairConfigurationProvider ) { AgentConnectionConfig agentConnectionConfig = config.getConnectionConfig().getCqlConnection() @@ -89,7 +91,9 @@ public AgentNativeConnectionProvider( .withAgentType(agentConnectionConfig.getType()) .withLocalDatacenter(agentConnectionConfig.getLocalDatacenter()) .withAuthProvider(authProvider) - .withSslEngineFactory(sslEngineFactory); + .withSslEngineFactory(sslEngineFactory) + .withSchemaChangeListener(defaultRepairConfigurationProvider) + .withNodeStateListener(defaultRepairConfigurationProvider); LOG.info("Preparing Agent Connection Config"); nativeConnectionBuilder = resolveAgentProviderBuilder(nativeConnectionBuilder, agentConnectionConfig); LOG.info("Establishing Connection With Nodes"); diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/BeanConfigurator.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/BeanConfigurator.java index 68e29dd96..9e7c8e2da 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/BeanConfigurator.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/BeanConfigurator.java @@ -21,7 +21,7 @@ import com.ericsson.bss.cassandra.ecchronos.application.providers.AgentJmxConnectionProvider; import com.ericsson.bss.cassandra.ecchronos.connection.DistributedJmxConnectionProvider; import com.ericsson.bss.cassandra.ecchronos.core.impl.metadata.NodeResolverImpl; -import com.ericsson.bss.cassandra.ecchronos.core.impl.state.ReplicationStateImpl; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.state.ReplicationStateImpl; import com.ericsson.bss.cassandra.ecchronos.core.metadata.NodeResolver; import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicationState; import com.ericsson.bss.cassandra.ecchronos.data.sync.EccNodesSync; @@ -42,6 +42,8 @@ import com.ericsson.bss.cassandra.ecchronos.application.providers.AgentNativeConnectionProvider; import com.ericsson.bss.cassandra.ecchronos.connection.CertificateHandler; import com.ericsson.bss.cassandra.ecchronos.connection.DistributedNativeConnectionProvider; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.DefaultRepairConfigurationProvider; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.boot.web.embedded.tomcat.TomcatServletWebServerFactory; @@ -141,6 +143,17 @@ public void addFormatters(final FormatterRegistry registry) }; } + /** + * Provides a {@link DefaultRepairConfigurationProvider} bean. + * + * @return a {@link DefaultRepairConfigurationProvider} object. + */ + @Bean + public DefaultRepairConfigurationProvider defaultRepairConfigurationProvider() + { + return new DefaultRepairConfigurationProvider(); + } + /** * Configures the embedded web server factory with the host and port specified in the application configuration. * @@ -168,10 +181,11 @@ public ConfigurableServletWebServerFactory webServerFactory(final Config config) */ @Bean public DistributedNativeConnectionProvider distributedNativeConnectionProvider( - final Config config + final Config config, + final DefaultRepairConfigurationProvider defaultRepairConfigurationProvider ) { - return getDistributedNativeConnection(config, cqlSecurity::get); + return getDistributedNativeConnection(config, cqlSecurity::get, defaultRepairConfigurationProvider); } /** @@ -253,12 +267,17 @@ private Config getConfiguration() throws ConfigurationException private DistributedNativeConnectionProvider getDistributedNativeConnection( final Config config, - final Supplier securitySupplier + final Supplier securitySupplier, + final DefaultRepairConfigurationProvider defaultRepairConfigurationProvider ) { Supplier tlsSupplier = () -> securitySupplier.get().getCqlTlsConfig(); CertificateHandler certificateHandler = createCertificateHandler(tlsSupplier); - return new AgentNativeConnectionProvider(config, securitySupplier, certificateHandler); + return new AgentNativeConnectionProvider( + config, + securitySupplier, + certificateHandler, + defaultRepairConfigurationProvider); } private DistributedJmxConnectionProvider getDistributedJmxConnection( @@ -299,7 +318,7 @@ private EccNodesSync getEccNodesSync( final DistributedNativeConnectionProvider distributedNativeConnectionProvider ) throws UnknownHostException, EcChronosException, ConfigurationException { - Interval connectionDelay = config().getConnectionConfig().getConnectionDelay(); + Interval connectionDelay = config().getConnectionConfig().getCqlConnection().getConnectionDelay(); EccNodesSync myEccNodesSync = EccNodesSync.newBuilder() .withInitialNodesList(distributedNativeConnectionProvider.getNodes()) .withSession(distributedNativeConnectionProvider.getCqlSession()) diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/ECChronos.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/ECChronos.java new file mode 100644 index 000000000..58f5f507b --- /dev/null +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/ECChronos.java @@ -0,0 +1,117 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.application.spring; + +import com.ericsson.bss.cassandra.ecchronos.application.config.Config; +import com.ericsson.bss.cassandra.ecchronos.application.config.repair.FileBasedRepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.connection.DistributedJmxConnectionProvider; +import com.ericsson.bss.cassandra.ecchronos.connection.DistributedNativeConnectionProvider; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.DefaultRepairConfigurationProvider; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.scheduler.RepairSchedulerImpl; +import com.ericsson.bss.cassandra.ecchronos.core.impl.table.TimeBasedRunPolicy; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.RepairScheduler; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicationState; +import com.ericsson.bss.cassandra.ecchronos.core.table.ReplicatedTableProvider; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReferenceFactory; +import com.ericsson.bss.cassandra.ecchronos.data.sync.EccNodesSync; +import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.ConfigurationException; +import java.io.Closeable; + +import com.datastax.oss.driver.api.core.CqlSession; +import java.util.Collections; +import org.springframework.context.ApplicationContext; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; + +@Configuration +public class ECChronos implements Closeable +{ + private final ECChronosInternals myECChronosInternals; + private final RepairSchedulerImpl myRepairSchedulerImpl; + private final TimeBasedRunPolicy myTimeBasedRunPolicy; + + public ECChronos( + final Config configuration, + final ApplicationContext applicationContext, + final DistributedNativeConnectionProvider nativeConnectionProvider, + final DistributedJmxConnectionProvider jmxConnectionProvider, + final ReplicationState replicationState, + final DefaultRepairConfigurationProvider defaultRepairConfigurationProvider, + final EccNodesSync eccNodesSync + ) + throws ConfigurationException + { + myECChronosInternals = new ECChronosInternals( + configuration, nativeConnectionProvider, jmxConnectionProvider, eccNodesSync); + + CqlSession session = nativeConnectionProvider.getCqlSession(); + + myTimeBasedRunPolicy = TimeBasedRunPolicy.builder() + .withSession(session) + .withKeyspaceName(configuration.getRunPolicy().getTimeBasedConfig().getKeyspaceName()) + .build(); + + myRepairSchedulerImpl = RepairSchedulerImpl.builder() + .withJmxProxyFactory(myECChronosInternals.getJmxProxyFactory()) + .withScheduleManager(myECChronosInternals.getScheduleManager()) + .withTableRepairMetrics(myECChronosInternals.getTableRepairMetrics()) + .withCassandraMetrics(myECChronosInternals.getCassandraMetrics()) + .withReplicationState(replicationState) + .withRepairPolicies(Collections.singletonList(myTimeBasedRunPolicy)) + .withCassandraMetrics(myECChronosInternals.getCassandraMetrics()) + .build(); + + AbstractRepairConfigurationProvider repairConfigurationProvider = new FileBasedRepairConfiguration(applicationContext); + + defaultRepairConfigurationProvider.fromBuilder(DefaultRepairConfigurationProvider.newBuilder() + .withRepairScheduler(myRepairSchedulerImpl) + .withSession(session) + .withNodesList(nativeConnectionProvider.getNodes()) + .withReplicatedTableProvider(myECChronosInternals.getReplicatedTableProvider()) + .withRepairConfiguration(repairConfigurationProvider::get) + .withTableReferenceFactory(myECChronosInternals.getTableReferenceFactory())); + + myECChronosInternals.addRunPolicy(myTimeBasedRunPolicy); + } + + @Bean + public TableReferenceFactory tableReferenceFactory() + { + return myECChronosInternals.getTableReferenceFactory(); + } + + @Bean(destroyMethod = "") + public RepairScheduler repairScheduler() + { + return myRepairSchedulerImpl; + } + + @Bean + public ReplicatedTableProvider replicatedTableProvider() + { + return myECChronosInternals.getReplicatedTableProvider(); + } + + @Override + public final void close() + { + myECChronosInternals.removeRunPolicy(myTimeBasedRunPolicy); + myTimeBasedRunPolicy.close(); + myRepairSchedulerImpl.close(); + myECChronosInternals.close(); + } +} + + diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/ECChronosInternals.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/ECChronosInternals.java new file mode 100644 index 000000000..24cd990db --- /dev/null +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/ECChronosInternals.java @@ -0,0 +1,180 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.application.spring; + +import com.datastax.oss.driver.api.core.CqlSession; +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.application.config.Config; +import com.ericsson.bss.cassandra.ecchronos.connection.DistributedJmxConnectionProvider; +import com.ericsson.bss.cassandra.ecchronos.connection.DistributedNativeConnectionProvider; +import com.ericsson.bss.cassandra.ecchronos.core.impl.jmx.DistributedJmxProxyFactoryImpl; +import com.ericsson.bss.cassandra.ecchronos.core.impl.metrics.CassandraMetrics; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.scheduler.ScheduleManagerImpl; +import com.ericsson.bss.cassandra.ecchronos.core.impl.table.ReplicatedTableProviderImpl; +import com.ericsson.bss.cassandra.ecchronos.core.impl.table.TableReferenceFactoryImpl; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.RunPolicy; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduleManager; +import com.ericsson.bss.cassandra.ecchronos.core.table.ReplicatedTableProvider; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReferenceFactory; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.ericsson.bss.cassandra.ecchronos.data.sync.EccNodesSync; +import java.io.Closeable; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ECChronosInternals implements Closeable +{ + private static final Logger LOG = LoggerFactory.getLogger(ECChronosInternals.class); + private static final NoOpRepairMetrics NO_OP_REPAIR_METRICS = new NoOpRepairMetrics(); + + private final ScheduleManagerImpl myScheduleManagerImpl; + private final ReplicatedTableProviderImpl myReplicatedTableProvider; + private final TableReferenceFactory myTableReferenceFactory; + private final DistributedJmxProxyFactory myJmxProxyFactory; + private final CassandraMetrics myCassandraMetrics; + + public ECChronosInternals( + final Config configuration, + final DistributedNativeConnectionProvider nativeConnectionProvider, + final DistributedJmxConnectionProvider jmxConnectionProvider, + final EccNodesSync eccNodesSync + ) + { + myJmxProxyFactory = DistributedJmxProxyFactoryImpl.builder() + .withJmxConnectionProvider(jmxConnectionProvider) + .withEccNodesSync(eccNodesSync) + .withNodesMap(generateNodesMap(nativeConnectionProvider.getNodes())) + .build(); + + CqlSession session = nativeConnectionProvider.getCqlSession(); + + myTableReferenceFactory = new TableReferenceFactoryImpl(session); + + myReplicatedTableProvider = new ReplicatedTableProviderImpl( + session, + myTableReferenceFactory, + nativeConnectionProvider.getNodes()); + + myCassandraMetrics = new CassandraMetrics(myJmxProxyFactory); + myScheduleManagerImpl = ScheduleManagerImpl.builder() + .withRunInterval(configuration.getSchedulerConfig().getFrequency().getInterval(TimeUnit.MILLISECONDS), + TimeUnit.MILLISECONDS) + .withNodeIDList(jmxConnectionProvider.getJmxConnections().keySet()) + .build(); + } + + public final TableReferenceFactory getTableReferenceFactory() + { + return myTableReferenceFactory; + } + + public final ReplicatedTableProvider getReplicatedTableProvider() + { + return myReplicatedTableProvider; + } + + public final ScheduleManager getScheduleManager() + { + return myScheduleManagerImpl; + } + + public final DistributedJmxProxyFactory getJmxProxyFactory() + { + return myJmxProxyFactory; + } + + public final CassandraMetrics getCassandraMetrics() + { + return myCassandraMetrics; + } + + public final TableRepairMetrics getTableRepairMetrics() + { + return NO_OP_REPAIR_METRICS; + } + + public final boolean addRunPolicy(final RunPolicy runPolicy) + { + return myScheduleManagerImpl.addRunPolicy(runPolicy); + } + + public final boolean removeRunPolicy(final RunPolicy runPolicy) + { + return myScheduleManagerImpl.removeRunPolicy(runPolicy); + } + + @Override + public final void close() + { + myScheduleManagerImpl.close(); + + myCassandraMetrics.close(); + } + + // In the future we should modify the DistributedNativeConnectionProvider + // to generate this nodesMap instead of a nodesList + private Map generateNodesMap(final List nodes) + { + Map nodesMap = new HashMap<>(); + nodes.forEach(node -> nodesMap.put(node.getHostId(), node)); + return nodesMap; + } + + private static final class NoOpRepairMetrics implements TableRepairMetrics + { + + @Override + public void repairState(final TableReference tableReference, + final int repairedRanges, + final int notRepairedRanges) + { + LOG.trace("Updated repair state of {}, {}/{} repaired ranges", tableReference, repairedRanges, + notRepairedRanges); + } + + @Override + public void lastRepairedAt(final TableReference tableReference, final long lastRepairedAt) + { + LOG.debug("Table {} last repaired at {}", tableReference, lastRepairedAt); + } + + @Override + public void remainingRepairTime(final TableReference tableReference, final long remainingRepairTime) + { + LOG.debug("Table {} remaining repair time {}", tableReference, remainingRepairTime); + } + + @Override + public void repairSession(final TableReference tableReference, + final long timeTaken, + final TimeUnit timeUnit, + final boolean successful) + { + if (LOG.isTraceEnabled()) + { + LOG.trace("Repair timing for table {} {}ms, it was {}", tableReference, + timeUnit.toMillis(timeTaken), successful ? "successful" : "not successful"); + } + } + } +} + diff --git a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/RetrySchedulerService.java b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/RetrySchedulerService.java index e41470487..2af7f7e23 100644 --- a/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/RetrySchedulerService.java +++ b/application/src/main/java/com/ericsson/bss/cassandra/ecchronos/application/spring/RetrySchedulerService.java @@ -35,7 +35,6 @@ import org.springframework.stereotype.Service; import javax.management.remote.JMXConnector; -import java.io.IOException; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; @@ -97,7 +96,6 @@ public void startScheduler() @VisibleForTesting void retryNodes() { - LOG.warn("Retrying unavailable nodes"); List unavailableNodes = findUnavailableNodes(); if (unavailableNodes.isEmpty()) @@ -134,6 +132,7 @@ private List findUnavailableNodes() private void retryConnectionForNode(final Node node) { UUID nodeId = node.getHostId(); + LOG.warn("Node {} is unavailable. Will retry to connect.", nodeId); for (int attempt = 1; attempt <= retryBackoffStrategy.getMaxAttempts(); attempt++) { if (tryReconnectToNode(node, nodeId, attempt)) @@ -173,7 +172,7 @@ private boolean establishConnectionToNode(final Node node) { UUID nodeId = node.getHostId(); JMXConnector jmxConnector = myJmxConnectionProvider.getJmxConnector(nodeId); - boolean isConnected = jmxConnector != null && isConnected(jmxConnector); + boolean isConnected = jmxConnector != null && myJmxConnectionProvider.isConnected(jmxConnector); if (isConnected) { @@ -188,20 +187,6 @@ private boolean establishConnectionToNode(final Node node) return isConnected; } - private boolean isConnected(final JMXConnector jmxConnector) - { - try - { - jmxConnector.getConnectionId(); - return true; - } - catch (IOException e) - { - LOG.error("Error while checking connection for JMX connector", e); - return false; - } - } - @Override public void destroy() { diff --git a/application/src/main/resources/ecc.yml b/application/src/main/resources/ecc.yml index 384ada9cc..fe8a017e5 100644 --- a/application/src/main/resources/ecc.yml +++ b/application/src/main/resources/ecc.yml @@ -240,6 +240,13 @@ repair: ## repair_type: incremental +run_policy: + time_based: + ## + ## The keyspace used for the time based run policy tables. + ## + keyspace: ecchronos + scheduler: ## ## Specifies the frequency the scheduler checks for work to be done diff --git a/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/TestConfig.java b/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/TestConfig.java index 18704c189..8089448cc 100644 --- a/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/TestConfig.java +++ b/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/TestConfig.java @@ -21,9 +21,7 @@ import com.ericsson.bss.cassandra.ecchronos.application.config.repair.Priority; import com.ericsson.bss.cassandra.ecchronos.application.providers.AgentJmxConnectionProvider; import com.ericsson.bss.cassandra.ecchronos.application.providers.AgentNativeConnectionProvider; -import com.ericsson.bss.cassandra.ecchronos.application.spring.AbstractRepairConfigurationProvider; import com.ericsson.bss.cassandra.ecchronos.connection.DataCenterAwarePolicy; -import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; import com.ericsson.bss.cassandra.ecchronos.utils.enums.connection.ConnectionType; import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairHistoryProvider; import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairType; @@ -225,7 +223,7 @@ public void testMaxDelayLessThanStartDelayThrowsException() @Test public void testConnectionDelay() { - Interval connectionDelay = config.getConnectionConfig().getConnectionDelay(); + Interval connectionDelay = nativeConnection.getConnectionDelay(); assertThat(connectionDelay.getUnit()).isEqualTo(TimeUnit.MINUTES); assertThat(connectionDelay.getTime()).isEqualTo(45L); } diff --git a/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/TestDefaultConfig.java b/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/TestDefaultConfig.java index aa8f9aa92..cb9bc520b 100644 --- a/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/TestDefaultConfig.java +++ b/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/TestDefaultConfig.java @@ -60,7 +60,7 @@ public void setup() throws IOException @Test public void testConnectionDelayDefault() { - Interval connectionDelay = config.getConnectionConfig().getConnectionDelay(); + Interval connectionDelay = config.getConnectionConfig().getCqlConnection().getConnectionDelay(); assertThat(connectionDelay.getUnit()).isEqualTo(TimeUnit.MINUTES); assertThat(connectionDelay.getTime()).isEqualTo(60l); } diff --git a/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/repair/TestRepairSchedule.java b/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/repair/TestRepairSchedule.java index 74acb81bf..22eb32782 100644 --- a/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/repair/TestRepairSchedule.java +++ b/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/config/repair/TestRepairSchedule.java @@ -19,7 +19,7 @@ import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; -import com.ericsson.bss.cassandra.ecchronos.utils.enums.converter.UnitConverter; +import com.ericsson.bss.cassandra.ecchronos.utils.converter.UnitConverter; import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairType; import java.io.File; import java.util.concurrent.TimeUnit; diff --git a/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/spring/RetrySchedulerServiceTest.java b/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/spring/RetrySchedulerServiceTest.java index e74640101..b657c5bfc 100644 --- a/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/spring/RetrySchedulerServiceTest.java +++ b/application/src/test/java/com/ericsson/bss/cassandra/ecchronos/application/spring/RetrySchedulerServiceTest.java @@ -130,7 +130,7 @@ void testRetryNodesWithUnavailableNodeWhenConnectionSuccessful() throws IOExcept // Mock JMX connector behavior JMXConnector mockJmxConnector = mock(JMXConnector.class); when(jmxConnectionProvider.getJmxConnector(nodeId)).thenReturn(mockJmxConnector); - when(mockJmxConnector.getConnectionId()).thenReturn("connected"); + when(jmxConnectionProvider.isConnected(eq(mockJmxConnector))).thenReturn(true); // Mock the JMX connections map ConcurrentHashMap mockJmxConnections = mock(ConcurrentHashMap.class); diff --git a/application/src/test/resources/all_set.yml b/application/src/test/resources/all_set.yml index d35560426..218c19722 100644 --- a/application/src/test/resources/all_set.yml +++ b/application/src/test/resources/all_set.yml @@ -43,9 +43,9 @@ connection: - host: 127.0.0.4 port: 9042 provider: com.ericsson.bss.cassandra.ecchronos.application.providers.AgentNativeConnectionProvider - connectionDelay: - time: 45 - unit: minutes + connectionDelay: + time: 45 + unit: minutes jmx: provider: com.ericsson.bss.cassandra.ecchronos.application.providers.AgentJmxConnectionProvider retryPolicy: diff --git a/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/providers/DistributedJmxConnectionProviderImpl.java b/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/providers/DistributedJmxConnectionProviderImpl.java index c40253a7c..816fd18d8 100644 --- a/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/providers/DistributedJmxConnectionProviderImpl.java +++ b/connection.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/connection/impl/providers/DistributedJmxConnectionProviderImpl.java @@ -125,9 +125,9 @@ public JMXConnector getJmxConnector(final UUID nodeID) @Override public void close() throws IOException { - for (int i = 0; i <= myNodesList.size(); i++) + for (Node node : myNodesList) { - close(myNodesList.get(i).getHostId()); + close(node.getHostId()); } } diff --git a/core.impl/pom.xml b/core.impl/pom.xml index c200e7664..d42db13e7 100644 --- a/core.impl/pom.xml +++ b/core.impl/pom.xml @@ -104,5 +104,9 @@ assertj-core test + + nl.jqno.equalsverifier + equalsverifier + \ No newline at end of file diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/logging/ThrottledLogMessage.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/logging/ThrottledLogMessage.java new file mode 100644 index 000000000..5f8b9c616 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/logging/ThrottledLogMessage.java @@ -0,0 +1,109 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.logging; + +import org.slf4j.Logger; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * A utility class for logging messages with throttling capabilities. + *

+ * This class ensures that a specific log message is only logged at specified intervals, + * preventing log flooding for high-frequency events. + *

+ */ +public class ThrottledLogMessage +{ + private final String myMessage; + private final long myIntervalNanos; + private final AtomicLong myLastLogTime; + + /** + * Constructs a ThrottledLogMessage with the specified message and interval. + * + * @param message the log message to be throttled. Must not be {@code null}. + * @param intervalNanos the minimum interval (in nanoseconds) between consecutive log messages. Must be greater than zero. + * @throws IllegalArgumentException if {@code message} is {@code null} or {@code intervalNanos} is less than or equal to zero. + */ + public ThrottledLogMessage(final String message, final long intervalNanos) + { + myMessage = message; + myIntervalNanos = intervalNanos; + myLastLogTime = new AtomicLong(Long.MIN_VALUE); + } + + /** + * Checks whether the logging of the message is allowed based on the specified time. + * + * @param timeInNanos the current time in nanoseconds. + * @return {@code true} if the message can be logged; {@code false} otherwise. + */ + private boolean isAllowedToLog(final long timeInNanos) + { + long lastLogTime = myLastLogTime.get(); + return timeInNanos >= lastLogTime && myLastLogTime.compareAndSet(lastLogTime, timeInNanos + myIntervalNanos); + } + + /** + * Logs an informational message if the logging is allowed based on the throttling interval. + * + * @param logger the logger to log the message to. Must not be {@code null}. + * @param timeInMs the current time in milliseconds. + * @param objects optional parameters to be included in the log message. + * @throws NullPointerException if {@code logger} is {@code null}. + */ + public final void info(final Logger logger, final long timeInMs, final Object... objects) + { + if (isAllowedToLog(timeInMs)) + { + logger.info(myMessage, objects); + } + } + + /** + * Logs a warning message if the logging is allowed based on the throttling interval. + * + * @param logger the logger to log the message to. Must not be {@code null}. + * @param timeInMs the current time in milliseconds. + * @param objects optional parameters to be included in the log message. + * @throws NullPointerException if {@code logger} is {@code null}. + */ + public final void warn(final Logger logger, final long timeInMs, final Object... objects) + { + if (isAllowedToLog(timeInMs)) + { + logger.warn(myMessage, objects); + } + } + + /** + * Logs an error message if the logging is allowed based on the throttling interval. + * + * @param logger the logger to log the message to. Must not be {@code null}. + * @param timeInMs the current time in milliseconds. + * @param objects optional parameters to be included in the log message. + * @throws NullPointerException if {@code logger} is {@code null}. + */ + public final void error(final Logger logger, final long timeInMs, final Object... objects) + { + if (isAllowedToLog(timeInMs)) + { + logger.error(myMessage, objects); + } + } +} + + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/logging/ThrottlingLogger.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/logging/ThrottlingLogger.java new file mode 100644 index 000000000..3e35e4866 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/logging/ThrottlingLogger.java @@ -0,0 +1,99 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.logging; + +import org.slf4j.Logger; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.TimeUnit; + +/** + * Logger that throttles log messages per interval. + * A log message uniqueness is based on the string message. + * This logger is thread safe. + */ +public class ThrottlingLogger +{ + private final Map myThrottledLogMessages = new ConcurrentHashMap<>(); + private final Logger myLogger; + private final long myIntervalInNanos; + + /** + * Constructs a ThrottlingLogger with the specified logger, interval, and time unit. + * + * @param logger the logger to which the messages will be sent. Must not be {@code null}. + * @param interval the interval duration for throttling messages. + * @param timeUnit the time unit for the interval duration. Must not be {@code null}. + * @throws NullPointerException if {@code logger} or {@code timeUnit} is {@code null}. + */ + public ThrottlingLogger(final Logger logger, final long interval, final TimeUnit timeUnit) + { + myLogger = logger; + myIntervalInNanos = timeUnit.toNanos(interval); + } + + /** + * Logs an informational message, throttled according to the specified interval. + * + * @param message the message to log. Must not be {@code null}. + * @param objects optional parameters to include in the log message. + */ + public final void info(final String message, final Object... objects) + { + ThrottledLogMessage throttledLogMessage = getThrottledLogMessage(message); + throttledLogMessage.info(myLogger, System.nanoTime(), objects); + } + + /** + * Logs a warning message, throttled according to the specified interval. + * + * @param message the message to log. Must not be {@code null}. + * @param objects optional parameters to include in the log message. + */ + public final void warn(final String message, final Object... objects) + { + ThrottledLogMessage throttledLogMessage = getThrottledLogMessage(message); + throttledLogMessage.warn(myLogger, System.nanoTime(), objects); + } + + /** + * Logs an error message, throttled according to the specified interval. + * + * @param message the message to log. Must not be {@code null}. + * @param objects optional parameters to include in the log message. + */ + public final void error(final String message, final Object... objects) + { + ThrottledLogMessage throttledLogMessage = getThrottledLogMessage(message); + throttledLogMessage.error(myLogger, System.nanoTime(), objects); + } + + private ThrottledLogMessage getThrottledLogMessage(final String message) + { + ThrottledLogMessage throttledLogMessage = myThrottledLogMessages.get(message); + if (throttledLogMessage == null) + { + throttledLogMessage = new ThrottledLogMessage(message, myIntervalInNanos); + ThrottledLogMessage addedMessage = myThrottledLogMessages.putIfAbsent(message, throttledLogMessage); + if (addedMessage != null) + { + throttledLogMessage = addedMessage; + } + } + return throttledLogMessage; + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/logging/package-info.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/logging/package-info.java new file mode 100644 index 000000000..ffeddc92d --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/logging/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Contains the implementations and resources for logging. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.logging; diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/metrics/CassandraMetrics.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/metrics/CassandraMetrics.java new file mode 100644 index 000000000..528d9685a --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/metrics/CassandraMetrics.java @@ -0,0 +1,170 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.metrics; + +import com.ericsson.bss.cassandra.ecchronos.core.impl.logging.ThrottlingLogger; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxy; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.LoadingCache; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import java.util.UUID; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.io.IOException; +import java.time.Duration; +import java.util.concurrent.CompletionException; +import java.util.concurrent.TimeUnit; + +/** + * Used to fetch metrics from Cassandra through JMX and keep them updated. + */ +public class CassandraMetrics implements Closeable +{ + private static final Logger LOG = LoggerFactory.getLogger(CassandraMetrics.class); + private static final ThrottlingLogger THROTTLED_LOGGER = new ThrottlingLogger(LOG, 5, TimeUnit.MINUTES); + private static final long DEFAULT_CACHE_EXPIRY_TIME_IN_MINUTES = 60; + private static final long DEFAULT_CACHE_REFRESH_TIME_IN_SECONDS = 30; + + private final LoadingCache myCache; + private final DistributedJmxProxyFactory myJmxProxyFactory; + + /** + * Constructs a CassandraMetrics instance with default cache refresh and expiry times. + * + * @param jmxProxyFactory the factory used to create connections to distributed JMX proxies. Must not be {@code null}. + */ + public CassandraMetrics(final DistributedJmxProxyFactory jmxProxyFactory) + { + this(jmxProxyFactory, Duration.ofSeconds(DEFAULT_CACHE_REFRESH_TIME_IN_SECONDS), + Duration.ofMinutes(DEFAULT_CACHE_EXPIRY_TIME_IN_MINUTES)); + } + + /** + * Constructs a CassandraMetrics instance. + * + * @param jmxProxyFactory the factory used to create connections to distributed JMX proxies. Must not be {@code null}. + * @param refreshAfter the duration after which the cache will refresh its entries. Must not be {@code null}. + * @param expireAfter the duration after which the cache entries will expire after access. Must not be {@code null}. + */ + public CassandraMetrics(final DistributedJmxProxyFactory jmxProxyFactory, final Duration refreshAfter, + final Duration expireAfter) + { + myJmxProxyFactory = Preconditions.checkNotNull(jmxProxyFactory, "JMX proxy factory must be set"); + myCache = Caffeine.newBuilder() + .refreshAfterWrite(Preconditions.checkNotNull(refreshAfter, "Refresh after must be set")) + .expireAfterAccess(Preconditions.checkNotNull(expireAfter, "Expire after must be set")) + .executor(Runnable::run) + .build(this::getMetrics); + } + + private CassandraMetric getMetrics(final MetricsKey key) throws IOException + { + try (DistributedJmxProxy jmxProxy = myJmxProxyFactory.connect()) + { + long maxRepairedAt = jmxProxy.getMaxRepairedAt(key.nodeId(), key.tableReference()); + double percentRepaired = jmxProxy.getPercentRepaired(key.nodeId(), key.tableReference()); + LOG.trace("{}, maxRepairedAt: {}, percentRepaired: {}", key.tableReference(), maxRepairedAt, percentRepaired); + return new CassandraMetric(percentRepaired, maxRepairedAt); + } + catch (IOException e) + { + THROTTLED_LOGGER.warn("Unable to fetch metrics from Cassandra, future metrics might contain stale values", + e); + throw e; + } + } + + @VisibleForTesting + final void refreshCache(final UUID nodeID, final TableReference tableReference) + { + MetricsKey key = new MetricsKey(nodeID, tableReference); + myCache.refresh(key); + } + + /** + * Return max repaired at for a table. + * @param nodeID the node ID + * @param tableReference The table + * @return Timestamp or 0 if not available. + */ + public long getMaxRepairedAt(final UUID nodeID, final TableReference tableReference) + { + try + { + MetricsKey key = new MetricsKey(nodeID, tableReference); + CassandraMetric cassandraMetric = myCache.get(key); + return cassandraMetric.myMaxRepairedAt; + } + catch (CompletionException e) + { + THROTTLED_LOGGER.error("Failed to fetch maxRepairedAt metric for {}", tableReference, e); + return 0L; + } + } + + /** + * Return percent repaired for a table. + * @param nodeID the node ID + * @param tableReference The table + * @return Percent repaired or 0 if not available. + */ + public double getPercentRepaired(final UUID nodeID, final TableReference tableReference) + { + try + { + MetricsKey key = new MetricsKey(nodeID, tableReference); + CassandraMetric cassandraMetric = myCache.get(key); + return cassandraMetric.myPercentRepaired; + } + catch (CompletionException e) + { + THROTTLED_LOGGER.error("Failed to fetch percentRepaired metric for {}", tableReference, e); + return 0.0d; + } + } + + /** + * Cleans the cache. + */ + @Override + public void close() + { + myCache.invalidateAll(); + myCache.cleanUp(); + } + + private static class CassandraMetric + { + private final double myPercentRepaired; + private final long myMaxRepairedAt; + + CassandraMetric(final Double percentRepaired, final Long maxRepairedAt) + { + myPercentRepaired = percentRepaired; + myMaxRepairedAt = maxRepairedAt; + } + } + + private record MetricsKey(UUID nodeId, TableReference tableReference) + { + } + +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/metrics/package-info.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/metrics/package-info.java new file mode 100644 index 000000000..5ff4b6381 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/metrics/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Contains the implementation and resources for Cassandra Metrics. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.metrics; diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/DefaultRepairConfigurationProvider.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/DefaultRepairConfigurationProvider.java new file mode 100644 index 000000000..180784159 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/DefaultRepairConfigurationProvider.java @@ -0,0 +1,578 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair; + +import com.ericsson.bss.cassandra.ecchronos.core.metadata.Metadata; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.RepairScheduler; +import com.ericsson.bss.cassandra.ecchronos.core.table.ReplicatedTableProvider; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReferenceFactory; +import java.util.Collections; +import java.util.List; +import java.util.Set; +import java.util.function.BiConsumer; +import java.util.function.Function; +import java.util.HashSet; +import java.util.Map; + +import com.datastax.oss.driver.api.core.CqlIdentifier; +import com.datastax.oss.driver.api.core.CqlSession; +import com.datastax.oss.driver.api.core.metadata.Node; +import com.datastax.oss.driver.api.core.metadata.NodeStateListenerBase; +import com.datastax.oss.driver.api.core.metadata.schema.AggregateMetadata; +import com.datastax.oss.driver.api.core.metadata.schema.FunctionMetadata; +import com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata; +import com.datastax.oss.driver.api.core.metadata.schema.SchemaChangeListener; +import com.datastax.oss.driver.api.core.metadata.schema.TableMetadata; +import com.datastax.oss.driver.api.core.metadata.schema.ViewMetadata; +import com.datastax.oss.driver.api.core.session.Session; +import com.datastax.oss.driver.api.core.type.UserDefinedType; + +import com.google.common.base.Preconditions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A repair configuration provider that adds configuration to {@link RepairScheduler} based on whether the table + * is replicated locally using the default repair configuration provided during construction of this object. + */ +@SuppressWarnings("PMD.GodClass") +public class DefaultRepairConfigurationProvider extends NodeStateListenerBase implements SchemaChangeListener +{ + private static final Logger LOG = LoggerFactory.getLogger(DefaultRepairConfigurationProvider.class); + + private CqlSession mySession; + private List myNodes; + private ReplicatedTableProvider myReplicatedTableProvider; + private RepairScheduler myRepairScheduler; + private Function> myRepairConfigurationFunction; + private TableReferenceFactory myTableReferenceFactory; + + /** + * Default constructor. + */ + public DefaultRepairConfigurationProvider() + { + //NOOP + } + + private DefaultRepairConfigurationProvider(final Builder builder) + { + mySession = builder.mySession; + myNodes = builder.myNodesList; + myReplicatedTableProvider = builder.myReplicatedTableProvider; + myRepairScheduler = builder.myRepairScheduler; + myRepairConfigurationFunction = builder.myRepairConfigurationFunction; + myTableReferenceFactory = Preconditions.checkNotNull(builder.myTableReferenceFactory, + "Table reference factory must be set"); + + setupConfiguration(); + } + + /** + * From builder. + * + * @param builder A builder + */ + public void fromBuilder(final Builder builder) + { + mySession = builder.mySession; + myNodes = builder.myNodesList; + myReplicatedTableProvider = builder.myReplicatedTableProvider; + myRepairScheduler = builder.myRepairScheduler; + myRepairConfigurationFunction = builder.myRepairConfigurationFunction; + myTableReferenceFactory = Preconditions.checkNotNull(builder.myTableReferenceFactory, + "Table reference factory must be set"); + + setupConfiguration(); + } + + /** + * Called when keyspace is created. + * + * @param keyspace Keyspace metadata + */ + @Override + public void onKeyspaceCreated(final KeyspaceMetadata keyspace) + { + String keyspaceName = keyspace.getName().asInternal(); + for (Node node : myNodes) + { + if (myReplicatedTableProvider.accept(node, keyspaceName)) + { + allTableOperation(keyspaceName, (tableReference, tableMetadata) -> updateConfiguration(node, tableReference, tableMetadata)); + } + else + { + allTableOperation(keyspaceName, (tableReference, tableMetadata) -> myRepairScheduler.removeConfiguration(node, tableReference)); + } + } + } + + /** + * Called when keyspace is updated. + * + * @param current Current keyspace metadata + * @param previous Previous keyspace metadata + */ + @Override + public void onKeyspaceUpdated(final KeyspaceMetadata current, + final KeyspaceMetadata previous) + { + onKeyspaceCreated(current); + } + + /** + * Called when keyspace is dropped. + * + * @param keyspace Keyspace metadata + */ + @Override + public void onKeyspaceDropped(final KeyspaceMetadata keyspace) + { + for (TableMetadata table : keyspace.getTables().values()) + { + onTableDropped(table); + } + } + + /** + * Called when table is created. + * + * @param table Table metadata + */ + @Override + public void onTableCreated(final TableMetadata table) + { + for (Node node : myNodes) + { + if (myReplicatedTableProvider.accept(node, table.getKeyspace().asInternal())) + { + TableReference tableReference = myTableReferenceFactory.forTable(table.getKeyspace().asInternal(), + table.getName().asInternal()); + updateConfiguration(node, tableReference, table); + } + } + + } + + /** + * Called when table is dropped. + * + * @param table Table metadata + */ + @Override + public void onTableDropped(final TableMetadata table) + { + TableReference tableReference = myTableReferenceFactory.forTable(table); + for (Node node : myNodes) + { + myRepairScheduler.removeConfiguration(node, tableReference); + } + } + + /** + * Called when table is updated. + * + * @param current Current table metadata + * @param previous Previous table metadata + */ + @Override + public void onTableUpdated(final TableMetadata current, final TableMetadata previous) + { + onTableCreated(current); + } + + /** + * Close. + */ + @Override + public void close() + { + if (mySession != null) + { + for (KeyspaceMetadata keyspaceMetadata : mySession.getMetadata().getKeyspaces().values()) + { + allTableOperation(keyspaceMetadata.getName().asInternal(), (tableReference, tableMetadata) -> + myNodes.forEach(node -> myRepairScheduler.removeConfiguration(node, tableReference))); + } + } + } + + private void allTableOperation( + final String keyspaceName, + final BiConsumer consumer) + { + for (TableMetadata tableMetadata : Metadata.getKeyspace(mySession, keyspaceName).get().getTables().values()) + { + String tableName = tableMetadata.getName().asInternal(); + TableReference tableReference = myTableReferenceFactory.forTable(keyspaceName, tableName); + + consumer.accept(tableReference, tableMetadata); + } + } + + private void updateConfiguration( + final Node node, + final TableReference tableReference, + final TableMetadata table) + { + Set repairConfigurations = myRepairConfigurationFunction.apply(tableReference); + Set enabledRepairConfigurations = new HashSet<>(); + for (RepairConfiguration repairConfiguration: repairConfigurations) + { + if (!RepairConfiguration.DISABLED.equals(repairConfiguration) + && !isTableIgnored(table, repairConfiguration.getIgnoreTWCSTables())) + { + enabledRepairConfigurations.add(repairConfiguration); + } + } + myRepairScheduler.putConfigurations(node, tableReference, enabledRepairConfigurations); + } + + private boolean isTableIgnored(final TableMetadata table, final boolean ignore) + { + Map tableOptions = table.getOptions(); + if (tableOptions == null) + { + return false; + } + Map compaction + = (Map) tableOptions.get(CqlIdentifier.fromInternal("compaction")); + if (compaction == null) + { + return false; + } + return ignore + && "org.apache.cassandra.db.compaction.TimeWindowCompactionStrategy".equals(compaction.get("class")); + } + + /** + * Create Builder for DefaultRepairConfigurationProvider. + * @return Builder the Builder instance for the class. + */ + public static Builder newBuilder() + { + return new Builder(); + } + + /** + * Called when user defined types are created. + * + * @param type User defined type + */ + @Override + public void onUserDefinedTypeCreated(final UserDefinedType type) + { + // NOOP + } + + /** + * Called when user defined types are dropped. + * + * @param type User defined type + */ + @Override + public void onUserDefinedTypeDropped(final UserDefinedType type) + { + // NOOP + } + + /** + * Called when user defined types are updated. + * + * @param current Current user defined type + * @param previous previous user defined type + */ + @Override + public void onUserDefinedTypeUpdated(final UserDefinedType current, final UserDefinedType previous) + { + // NOOP + } + + /** + * Called when functions are created. + * + * @param function Function metadata + */ + @Override + public void onFunctionCreated(final FunctionMetadata function) + { + // NOOP + } + + /** + * Called when functions are dropped. + * + * @param function Function metadata + */ + @Override + public void onFunctionDropped(final FunctionMetadata function) + { + // NOOP + } + + /** + * Called when functions are updated. + * + * @param current Current function metadata + * @param previous Previous function metadata + */ + @Override + public void onFunctionUpdated(final FunctionMetadata current, final FunctionMetadata previous) + { + // NOOP + } + + /** + * Called when aggregates are created. + * + * @param aggregate Aggregate metadata + */ + @Override + public void onAggregateCreated(final AggregateMetadata aggregate) + { + // NOOP + } + + /** + * Called when aggregates are dropped. + * + * @param aggregate Aggregate metadata + */ + @Override + public void onAggregateDropped(final AggregateMetadata aggregate) + { + // NOOP + } + + /** + * Called when aggregates are updated. + * + * @param current Current aggregate metadata + * @param previous previous aggregate metadata + */ + @Override + public void onAggregateUpdated(final AggregateMetadata current, final AggregateMetadata previous) + { + // NOOP + } + + /** + * Called when views are created. + * + * @param view View metadata + */ + @Override + public void onViewCreated(final ViewMetadata view) + { + // NOOP + } + + /** + * Called when views are dropped. + * + * @param view View metadata + */ + @Override + public void onViewDropped(final ViewMetadata view) + { + // NOOP + } + + /** + * Called when views are updated. + * + * @param current Current view metadata + * @param previous Previous view metadata + */ + @Override + public void onViewUpdated(final ViewMetadata current, final ViewMetadata previous) + { + // NOOP + } + + /** + * Called when the session is up and ready. Will invoke the listeners' onSessionReady methods. + * + * @param session The session + */ + @Override + public void onSessionReady(final Session session) + { + SchemaChangeListener.super.onSessionReady(session); + } + + /** + * Callback for when a node switches state to UP. + * + * @param node The node switching state to UP + */ + @Override + public void onUp(final Node node) + { + LOG.debug("{} switched state to UP.", node); + setupConfiguration(); + } + + /** + * Callback for when a node switches state to DOWN. + * + * @param node The node switching state to DOWN + */ + @Override + public void onDown(final Node node) + { + LOG.debug("{} switched state to DOWN.", node); + setupConfiguration(); + } + + /** + * This will go through all the configuration, given mySession is set, otherwise it will just silently + * return. + */ + private void setupConfiguration() + { + if (mySession == null) + { + LOG.debug("Session during setupConfiguration call was null."); + return; + } + + for (KeyspaceMetadata keyspaceMetadata : mySession.getMetadata().getKeyspaces().values()) + { + String keyspaceName = keyspaceMetadata.getName().asInternal(); + for (Node node : myNodes) + { + if (myReplicatedTableProvider.accept(node, keyspaceName)) + { + allTableOperation(keyspaceName, (tableReference, tableMetadata) -> updateConfiguration(node, tableReference, tableMetadata)); + } + } + + } + } + + /** + * Builder for DefaultRepairConfigurationProvider. + */ + public static class Builder + { + private CqlSession mySession; + private List myNodesList; + private ReplicatedTableProvider myReplicatedTableProvider; + private RepairScheduler myRepairScheduler; + private Function> myRepairConfigurationFunction; + private TableReferenceFactory myTableReferenceFactory; + + /** + * Build with session. + * + * @param session The CQl session + * @return Builder + */ + public Builder withSession(final CqlSession session) + { + mySession = session; + return this; + } + + /** + * Build with default repair configuration. + * + * @param defaultRepairConfiguration The default repair configuration + * @return Builder + */ + public Builder withDefaultRepairConfiguration(final RepairConfiguration defaultRepairConfiguration) + { + myRepairConfigurationFunction = (tableReference) -> Collections.singleton(defaultRepairConfiguration); + return this; + } + + /** + * Build with repair configuration. + * + * @param defaultRepairConfiguration The default repair configuration + * @return Builder + */ + public Builder withRepairConfiguration(final Function> + defaultRepairConfiguration) + { + myRepairConfigurationFunction = defaultRepairConfiguration; + return this; + } + + /** + * Build with replicated table provider. + * + * @param replicatedTableProvider The replicated table provider + * @return Builder + */ + public Builder withReplicatedTableProvider(final ReplicatedTableProvider replicatedTableProvider) + { + myReplicatedTableProvider = replicatedTableProvider; + return this; + } + + /** + * Build with table repair scheduler. + * + * @param repairScheduler The repair scheduler + * @return Builder + */ + public Builder withRepairScheduler(final RepairScheduler repairScheduler) + { + myRepairScheduler = repairScheduler; + return this; + } + + /** + * Build with table reference factory. + * + * @param tableReferenceFactory The table reference factory + * @return Builder + */ + public Builder withTableReferenceFactory(final TableReferenceFactory tableReferenceFactory) + { + myTableReferenceFactory = tableReferenceFactory; + return this; + } + + /** + * Build SchedulerManager with run interval. + * + * @param nodesList the interval to run a repair task + * @return Builder with nodes list + */ + public Builder withNodesList(final List nodesList) + { + myNodesList = nodesList; + return this; + } + + /** + * Build. + * + * @return DefaultRepairConfigurationProvider + */ + public DefaultRepairConfigurationProvider build() + { + DefaultRepairConfigurationProvider configurationProvider = new DefaultRepairConfigurationProvider(this); + return configurationProvider; + } + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/RepairGroup.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/RepairGroup.java new file mode 100644 index 000000000..520d86f2b --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/RepairGroup.java @@ -0,0 +1,259 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair; + +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.incremental.IncrementalRepairTask; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledTask; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicaRepairGroup; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairPolicy; +import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.ScheduledJobException; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.UUID; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class used to construct repair groups. + */ +public class RepairGroup extends ScheduledTask +{ + private static final Logger LOG = LoggerFactory.getLogger(RepairGroup.class); + + private final TableReference myTableReference; + private final RepairConfiguration myRepairConfiguration; + private final ReplicaRepairGroup myReplicaRepairGroup; + private final DistributedJmxProxyFactory myJmxProxyFactory; + private final TableRepairMetrics myTableRepairMetrics; + private final List myRepairPolicies; + + /** + * Constructs an IncrementalRepairTask for a specific node and table. + * + * @param priority the priority for job creation. + * @param builder the Builder to construct RepairGroup. + */ + public RepairGroup(final int priority, final Builder builder) + { + super(priority); + myTableReference = Preconditions + .checkNotNull(builder.myTableReference, "Table reference must be set"); + myRepairConfiguration = Preconditions + .checkNotNull(builder.myRepairConfiguration, "Repair configuration must be set"); + myReplicaRepairGroup = Preconditions + .checkNotNull(builder.myReplicaRepairGroup, "Replica repair group must be set"); + myJmxProxyFactory = Preconditions + .checkNotNull(builder.myJmxProxyFactory, "Jmx proxy factory must be set"); + myTableRepairMetrics = Preconditions + .checkNotNull(builder.myTableRepairMetrics, "Table repair metrics must be set"); + myRepairPolicies = new ArrayList<>(Preconditions + .checkNotNull(builder.myRepairPolicies, "Repair policies must be set")); + } + + /** + * Executes the repair tasks this repair group is responsible for. Repair tasks can succeed or fail. Repair + * tasks blocked by run policy are counted as failed. + * + * @return boolean + */ + @Override + public boolean execute(final UUID nodeID) + { + LOG.debug("Table {} running repair job {}", myTableReference, myReplicaRepairGroup); + boolean successful = true; + + for (RepairTask repairTask : getRepairTasks(nodeID)) + { + if (!shouldContinue()) + { + LOG.info("Repair of {} was stopped by policy, will continue later", this); + successful = false; + break; + } + try + { + repairTask.execute(); + } + catch (ScheduledJobException e) + { + LOG.warn("Encountered issue when running repair task {}, {}", repairTask, e.getMessage()); + LOG.debug("", e); + successful = false; + if (e.getCause() instanceof InterruptedException) + { + LOG.info("{} thread was interrupted", this); + break; + } + } + finally + { + repairTask.cleanup(); + } + } + + return successful; + } + + private boolean shouldContinue() + { + return myRepairPolicies.stream().allMatch(repairPolicy -> repairPolicy.shouldRun(myTableReference)); + } + + /** + * String representation. + * + * @return String + */ + @Override + public String toString() + { + return String.format("%s repair group of %s", myRepairConfiguration.getRepairType(), myTableReference); + } + + /** + * Get repair tasks. + * + * @param nodeID the Node id. + * @return a Collection of RepairTask + */ + @VisibleForTesting + public Collection getRepairTasks(final UUID nodeID) + { + Collection tasks = new ArrayList<>(); + tasks.add(new IncrementalRepairTask( + nodeID, + myJmxProxyFactory, + myTableReference, + myRepairConfiguration, + myTableRepairMetrics)); + + return tasks; + } + + /** + * Create instance of Builder to construct RepairGroup. + * + * @return Builder + */ + public static Builder newBuilder() + { + return new Builder(); + } + + /** + * Builder used to construct RepairGroup. + */ + public static class Builder + { + private TableReference myTableReference; + private RepairConfiguration myRepairConfiguration; + private ReplicaRepairGroup myReplicaRepairGroup; + private DistributedJmxProxyFactory myJmxProxyFactory; + private TableRepairMetrics myTableRepairMetrics; + private List myRepairPolicies = new ArrayList<>(); + + + /** + * Build with table reference. + * + * @param tableReference Table reference. + * @return Builder + */ + public Builder withTableReference(final TableReference tableReference) + { + myTableReference = tableReference; + return this; + } + + /** + * Build with repair configuration. + * + * @param repairConfiguration Repair configuration. + * @return Builder + */ + public Builder withRepairConfiguration(final RepairConfiguration repairConfiguration) + { + myRepairConfiguration = repairConfiguration; + return this; + } + + /** + * Build with replica repair group. + * + * @param replicaRepairGroup Replica repair group. + * @return Builder + */ + public Builder withReplicaRepairGroup(final ReplicaRepairGroup replicaRepairGroup) + { + myReplicaRepairGroup = replicaRepairGroup; + return this; + } + + /** + * Build with JMX proxy factory. + * + * @param jmxProxyFactory JMX proxy factory. + * @return Builder + */ + public Builder withJmxProxyFactory(final DistributedJmxProxyFactory jmxProxyFactory) + { + myJmxProxyFactory = jmxProxyFactory; + return this; + } + + /** + * Build with table repair metrics. + * + * @param tableRepairMetrics Table repair metrics. + * @return Builder + */ + public Builder withTableRepairMetrics(final TableRepairMetrics tableRepairMetrics) + { + myTableRepairMetrics = tableRepairMetrics; + return this; + } + + /** + * Build with repair policies. + * + * @param repairPolicies Repair policies. + * @return Builder + */ + public Builder withRepairPolicies(final List repairPolicies) + { + myRepairPolicies = repairPolicies; + return this; + } + + /** + * Build repair group. + * + * @param priority The priority. + * @return RepairGroup + */ + public RepairGroup build(final int priority) + { + return new RepairGroup(priority, this); + } + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/RepairTask.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/RepairTask.java new file mode 100644 index 000000000..d7ea302fc --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/RepairTask.java @@ -0,0 +1,471 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair; + +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxy; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairStatus; +import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.ScheduledJobException; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import java.util.UUID; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.management.Notification; +import javax.management.NotificationListener; +import javax.management.remote.JMXConnectionNotification; +import java.io.IOException; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Abstract Class used to represent repair tasks. + */ +public abstract class RepairTask implements NotificationListener +{ + private static final Logger LOG = LoggerFactory.getLogger(RepairTask.class); + private static final Pattern RANGE_PATTERN = Pattern.compile("\\((-?[0-9]+),(-?[0-9]+)\\]"); + private static final int HEALTH_CHECK_INTERVAL_IN_MINUTES = 10; + + private final UUID nodeID; + private final ScheduledExecutorService myExecutor = Executors.newSingleThreadScheduledExecutor( + new ThreadFactoryBuilder().setNameFormat("HangPreventingTask-%d").build()); + private final CountDownLatch myLatch = new CountDownLatch(1); + private final DistributedJmxProxyFactory myJmxProxyFactory; + private final TableReference myTableReference; + private final TableRepairMetrics myTableRepairMetrics; + private final RepairConfiguration myRepairConfiguration; + private volatile ScheduledFuture myHangPreventFuture; + private volatile ScheduledJobException myLastError; + private volatile boolean hasLostNotification = false; + private volatile int myCommand; + private volatile Set myFailedRanges = new HashSet<>(); + private volatile Set mySuccessfulRanges = new HashSet<>(); + + /** + * Constructs a RepairTask for the specified node and table with the given repair configuration and metrics. + * + * @param currentNodeID the UUID of the current node where the repair task is running. Must not be {@code null}. + * @param jmxProxyFactory the factory to create connections to distributed JMX proxies. Must not be {@code null}. + * @param tableReference the reference to the table that is being repaired. Must not be {@code null}. + * @param repairConfiguration the configuration specifying how the repair task should be executed. Must not be {@code null}. + * @param tableRepairMetrics the metrics associated with table repairs for monitoring and tracking purposes. May be {@code null}. + */ + protected RepairTask( + final UUID currentNodeID, + final DistributedJmxProxyFactory jmxProxyFactory, + final TableReference tableReference, + final RepairConfiguration repairConfiguration, + final TableRepairMetrics tableRepairMetrics + ) + { + nodeID = currentNodeID; + myJmxProxyFactory = Preconditions.checkNotNull(jmxProxyFactory, "Jmx proxy factory must be set"); + myTableReference = Preconditions.checkNotNull(tableReference, "Table reference must be set"); + myRepairConfiguration = Preconditions.checkNotNull(repairConfiguration, "Repair configuration must be set"); + myTableRepairMetrics = tableRepairMetrics; + } + + /** + * Execute the repair task. + * + * @throws ScheduledJobException + * Scheduled job exception if the repair fails. + */ + public void execute() throws ScheduledJobException + { + long start = System.nanoTime(); + long end; + long total; + boolean successful = true; + onExecute(); + try (DistributedJmxProxy proxy = myJmxProxyFactory.connect()) + { + rescheduleHangPrevention(); + repair(proxy); + onFinish(RepairStatus.SUCCESS); + } + catch (Exception e) + { + onFinish(RepairStatus.FAILED); + successful = false; + throw new ScheduledJobException("Unable to repair '" + this + "'", e); + } + finally + { + if (myHangPreventFuture != null) + { + myHangPreventFuture.cancel(false); + } + end = System.nanoTime(); + total = end - start; + myTableRepairMetrics.repairSession(myTableReference, total, TimeUnit.NANOSECONDS, successful); + } + lazySleep(total); + } + + /** + * Method called before the task is executed, default implementation is NOOP. + */ + protected void onExecute() + { + // NOOP + } + + private void repair(final DistributedJmxProxy proxy) throws ScheduledJobException + { + proxy.addStorageServiceListener(nodeID, this); + myCommand = proxy.repairAsync(nodeID, myTableReference.getKeyspace(), getOptions()); + if (myCommand > 0) + { + try + { + myLatch.await(); + proxy.removeStorageServiceListener(nodeID, this); + verifyRepair(proxy); + if (myLastError != null) + { + throw myLastError; + } + if (hasLostNotification) + { + String msg = String.format("Repair-%d of %s had lost notifications", myCommand, myTableReference); + LOG.warn(msg); + throw new ScheduledJobException(msg); + } + LOG.debug("{} completed successfully", this); + } + catch (InterruptedException e) + { + String msg = this + " was interrupted"; + LOG.warn(msg, e); + Thread.currentThread().interrupt(); + throw new ScheduledJobException(msg, e); + } + } + } + + /** + * Method used to construct options for the repair. + * + * @return Options + */ + protected abstract Map getOptions(); + + /** + * Method is called once a repair is completed. + * + * @param proxy + * The jmx proxy + * @throws ScheduledJobException + * In case when repair is deemed as failed. + */ + protected void verifyRepair(final DistributedJmxProxy proxy) throws ScheduledJobException + { + if (!myFailedRanges.isEmpty()) + { + proxy.forceTerminateAllRepairSessions(); + throw new ScheduledJobException("Repair has failed ranges '" + myFailedRanges + "'"); + } + } + + /** + * Method called when the task is finished. + * + * @param repairStatus + * The status of the finished task. + */ + protected abstract void onFinish(RepairStatus repairStatus); + + private void lazySleep(final long executionInNanos) throws ScheduledJobException + { + if (myRepairConfiguration.getRepairUnwindRatio() != RepairConfiguration.NO_UNWIND) + { + double sleepDurationInNanos = executionInNanos * myRepairConfiguration.getRepairUnwindRatio(); + long sleepDurationInMs = TimeUnit.NANOSECONDS.toMillis((long) sleepDurationInNanos); + sleepDurationInMs = Math.max(sleepDurationInMs, 1); + try + { + Thread.sleep(sleepDurationInMs); + } + catch (InterruptedException e) + { + Thread.currentThread().interrupt(); + throw new ScheduledJobException(e); + } + } + } + + /** + * Clean up the repair task. + */ + public void cleanup() + { + myExecutor.shutdown(); + } + + /** + * Notification handler. + * + * @param notification + * The notification. + * @param handback + * The handback. + */ + @SuppressWarnings("unchecked") + @Override + public void handleNotification(final Notification notification, final Object handback) + { + LOG.debug("Notification {}", notification.toString()); + switch (notification.getType()) + { + case "progress": + rescheduleHangPrevention(); + String tag = (String) notification.getSource(); + if (tag.equals("repair:" + myCommand)) + { + Map progress = (Map) notification.getUserData(); + + String message = notification.getMessage(); + ProgressEventType type = ProgressEventType.values()[progress.get("type")]; + + this.progress(type, message); + } + break; + + case JMXConnectionNotification.NOTIFS_LOST: + hasLostNotification = true; + break; + + case JMXConnectionNotification.FAILED: + case JMXConnectionNotification.CLOSED: + String errorMessage = String.format("Unable to repair %s, error: %s", myTableReference, notification.getType()); + LOG.error(errorMessage); + myLastError = new ScheduledJobException(errorMessage); + myLatch.countDown(); + break; + default: + LOG.debug("Unknown JMXConnectionNotification type: {}", notification.getType()); + break; + } + } + + private void rescheduleHangPrevention() + { + if (myHangPreventFuture != null) + { + myHangPreventFuture.cancel(false); + } + // Schedule the first check to happen after 10 minutes + myHangPreventFuture = myExecutor.schedule(new HangPreventingTask(), HEALTH_CHECK_INTERVAL_IN_MINUTES, + TimeUnit.MINUTES); + } + + /** + * Update progress. + * + * @param type + * Progress event type. + * @param message + * The message. + */ + @VisibleForTesting + void progress(final ProgressEventType type, final String message) + { + if (type == ProgressEventType.PROGRESS || type == ProgressEventType.ERROR) + { + if (message.contains("finished") || message.contains("failed")) + { + RepairStatus repairStatus = RepairStatus.SUCCESS; + if (message.contains("failed")) + { + repairStatus = RepairStatus.FAILED; + } + Matcher rangeMatcher = RANGE_PATTERN.matcher(message); + while (rangeMatcher.find()) + { + long start = Long.parseLong(rangeMatcher.group(1)); + long end = Long.parseLong(rangeMatcher.group(2)); + + LongTokenRange completedRange = new LongTokenRange(start, end); + onRangeFinished(completedRange, repairStatus); + } + } + else + { + LOG.warn("{} - Unknown progress message received: {}", this, message); + } + } + if (type == ProgressEventType.COMPLETE) + { + myLatch.countDown(); + } + } + + /** + * Method called once a range is finished successfully. In case of multiple ranges being repaired this will be + * called once per range. If this method is overriden make sure to call the super method. + * + * @param range The range + * @param repairStatus The status of the range + */ + protected void onRangeFinished(final LongTokenRange range, final RepairStatus repairStatus) + { + if (repairStatus.equals(RepairStatus.FAILED)) + { + myFailedRanges.add(range); + } + else + { + mySuccessfulRanges.add(range); + } + } + + /** + * Enum used to provide Event Progress for RepairTask. + */ + public enum ProgressEventType + { + /** + * Fired first when progress starts. Happens only once. + */ + START, + + /** + * Fire when progress happens. This can be zero or more time after START. + */ + PROGRESS, + + /** + * When observing process completes with error, this is sent once before COMPLETE. + */ + ERROR, + + /** + * When observing process is aborted by user, this is sent once before COMPLETE. + */ + ABORT, + + /** + * When observing process completes successfully, this is sent once before COMPLETE. + */ + SUCCESS, + + /** + * Fire when progress complete. This is fired once, after ERROR/ABORT/SUCCESS is fired. After this, no more + * ProgressEvent should be fired for the same event. + */ + COMPLETE, + + /** + * Used when sending message without progress. + */ + NOTIFICATION + } + + private final class HangPreventingTask implements Runnable + { + private static final int MAX_CHECKS = 3; + private static final String NORMAL_STATUS = "NORMAL"; + private int checkCount = 0; + + @Override + public void run() + { + try (DistributedJmxProxy proxy = myJmxProxyFactory.connect()) + { + if (checkCount < MAX_CHECKS) + { + String nodeStatus = proxy.getNodeStatus(nodeID); + if (!NORMAL_STATUS.equals(nodeStatus)) + { + LOG.error("Cassandra node {} is down, aborting repair task.", nodeID); + myLastError = new ScheduledJobException("Cassandra node " + nodeID + " is down"); + proxy.forceTerminateAllRepairSessionsInSpecificNode(nodeID); + myLatch.countDown(); // Signal to abort the repair task + } + else + { + checkCount++; + myHangPreventFuture = myExecutor.schedule(this, HEALTH_CHECK_INTERVAL_IN_MINUTES, TimeUnit.MINUTES); + } + } + else + { + // After 3 successful checks or 30 minutes if still task is running terminate all repair sessions + proxy.forceTerminateAllRepairSessionsInSpecificNode(nodeID); + myLatch.countDown(); + } + } + catch (IOException e) + { + LOG.error("Unable to check node status or prevent hanging repair task: {}", this, e); + } + } + } + + /** + * Returns the set of token ranges that have failed during the repair task. + * + *

This method is primarily intended for testing purposes.

+ * + * @return a set of {@link LongTokenRange} representing the failed token ranges. + */ + @VisibleForTesting + protected final Set getFailedRanges() + { + return myFailedRanges; + } + + @VisibleForTesting + final Set getSuccessfulRanges() + { + return mySuccessfulRanges; + } + + /** + * Get table reference. + * + * @return TableReference + */ + public TableReference getTableReference() + { + return myTableReference; + } + + /** + * Get the repair configuration. + * + * @return RepairConfiguration + */ + public RepairConfiguration getRepairConfiguration() + { + return myRepairConfiguration; + } +} diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/IncrementalRepairJob.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/IncrementalRepairJob.java new file mode 100644 index 000000000..8006f7a11 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/IncrementalRepairJob.java @@ -0,0 +1,338 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.incremental; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.core.impl.metrics.CassandraMetrics; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.RepairGroup; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledJob; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledRepairJob; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledRepairJobView; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledTask; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicaRepairGroup; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicationState; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairPolicy; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; +import java.util.Collection; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.concurrent.TimeUnit; + +/** + * Class used to run Incremental Repairs in Cassandra. + */ +public class IncrementalRepairJob extends ScheduledRepairJob +{ + private static final Logger LOG = LoggerFactory.getLogger(IncrementalRepairJob.class); + private static final int DAYS_IN_A_WEEK = 7; + private final Node myNode; + private final ReplicationState myReplicationState; + private final CassandraMetrics myCassandraMetrics; + + @SuppressWarnings("PMD.ConstructorCallsOverridableMethod") + IncrementalRepairJob(final Builder builder) + { + super(builder.myConfiguration, builder.myTableReference, builder.myJmxProxyFactory, + builder.myRepairConfiguration, builder.myRepairPolicies, builder.myTableRepairMetrics); + myNode = Preconditions.checkNotNull(builder.myNode, "Node must be set"); + myReplicationState = Preconditions.checkNotNull(builder.myReplicationState, "Replication state must be set"); + myCassandraMetrics = Preconditions.checkNotNull(builder.myCassandraMetrics, "Cassandra metrics must be set"); + setLastSuccessfulRun(); + } + + private void setLastSuccessfulRun() + { + myLastSuccessfulRun = myCassandraMetrics.getMaxRepairedAt(myNode.getHostId(), getTableReference()); + LOG.debug("{} - last successful run: {}", this, myLastSuccessfulRun); + } + + /** + * Get scheduled repair job view. + * + * @return ScheduledRepairJobView + */ + @Override + public ScheduledRepairJobView getView() + { + long now = System.currentTimeMillis(); + return new ScheduledRepairJobView(getId(), getTableReference(), getRepairConfiguration(), getStatus(now), + getProgress(), getNextRunInMs(), getLastSuccessfulRun(), getRepairConfiguration().getRepairType()); + } + + private ScheduledRepairJobView.Status getStatus(final long timestamp) + { + if (getRealPriority() != -1 && !super.runnable()) + { + return ScheduledRepairJobView.Status.BLOCKED; + } + long msSinceLastRepair = timestamp - myLastSuccessfulRun; + if (msSinceLastRepair >= getRepairConfiguration().getRepairErrorTimeInMs()) + { + return ScheduledRepairJobView.Status.OVERDUE; + } + if (msSinceLastRepair >= getRepairConfiguration().getRepairWarningTimeInMs()) + { + return ScheduledRepairJobView.Status.LATE; + } + if (msSinceLastRepair >= (getRepairConfiguration().getRepairIntervalInMs() - getRunOffset())) + { + return ScheduledRepairJobView.Status.ON_TIME; + } + return ScheduledRepairJobView.Status.COMPLETED; + } + + private long getNextRunInMs() + { + return getLastSuccessfulRun() + getRepairConfiguration().getRepairIntervalInMs(); + } + + @SuppressWarnings("checkstyle:MagicNumber") + private double getProgress() + { + return myCassandraMetrics.getPercentRepaired(myNode.getHostId(), getTableReference()) / 100d; + } + + /** + * Iterator for scheduled tasks built up by repair groups. + * + * @return Scheduled task iterator + */ + @Override + public Iterator iterator() + { + ReplicaRepairGroup replicaRepairGroup = new ReplicaRepairGroup( + myReplicationState.getReplicas(getTableReference(), myNode), + ImmutableList.of(), myLastSuccessfulRun); + RepairGroup.Builder builder = RepairGroup.newBuilder() + .withTableReference(getTableReference()) + .withRepairConfiguration(getRepairConfiguration()) + .withJmxProxyFactory(getJmxProxyFactory()) + .withTableRepairMetrics(getTableRepairMetrics()) + .withReplicaRepairGroup(replicaRepairGroup) + .withRepairPolicies(getRepairPolicies()); + List taskList = new ArrayList<>(); + taskList.add(builder.build(getRealPriority())); + return taskList.iterator(); + } + + /** + * Check if there's anything to repair, if not then just move the last run. + */ + @Override + public void refreshState() + { + boolean nothingToRepair = getProgress() >= 1.0; + if (nothingToRepair) + { + myLastSuccessfulRun = System.currentTimeMillis(); + } + } + + /** + * String representation. + * + * @return String + */ + @Override + public String toString() + { + return String.format("Incremental repair job of %s in node %s", getTableReference(), myNode.getHostId()); + } + + @Override + public final boolean equals(final Object o) + { + if (this == o) + { + return true; + } + else if (o == null || getClass() != o.getClass()) + { + return false; + } + if (!super.equals(o)) + { + return false; + } + IncrementalRepairJob that = (IncrementalRepairJob) o; + return Objects.equals(myReplicationState, that.myReplicationState) && Objects.equals( + myCassandraMetrics, that.myCassandraMetrics) && Objects.equals(myNode, that.myNode); + } + + @Override + public final int hashCode() + { + return Objects.hash(super.hashCode(), myReplicationState, myCassandraMetrics, myNode); + } + + /** + * Builder class to construct IncrementalRepairJob. + */ + @SuppressWarnings("VisibilityModifier") + public static class Builder + { + ScheduledJob.Configuration myConfiguration = new ScheduledJob.ConfigurationBuilder().withPriority( + ScheduledJob.Priority.LOW) + .withRunInterval(DAYS_IN_A_WEEK, TimeUnit.DAYS).build(); + private TableReference myTableReference; + private DistributedJmxProxyFactory myJmxProxyFactory; + private Node myNode; + private TableRepairMetrics myTableRepairMetrics = null; + private ReplicationState myReplicationState; + private RepairConfiguration myRepairConfiguration = RepairConfiguration.DEFAULT; + private final List myRepairPolicies = new ArrayList<>(); + private CassandraMetrics myCassandraMetrics; + + /** + * Build with configuration. + * + * @param configuration + * Configuration. + * @return Builder + */ + public Builder withConfiguration(final ScheduledJob.Configuration configuration) + { + myConfiguration = configuration; + return this; + } + + /** + * Build with configuration. + * + * @param node + * Node. + * @return Builder + */ + public Builder withNode(final Node node) + { + myNode = node; + return this; + } + + /** + * Build with table reference. + * + * @param tableReference + * Table reference. + * @return Builder + */ + public Builder withTableReference(final TableReference tableReference) + { + myTableReference = tableReference; + return this; + } + + /** + * Build with JMX proxy factory. + * + * @param jmxProxyFactory + * JMX proxy factory. + * @return Builder + */ + public Builder withJmxProxyFactory(final DistributedJmxProxyFactory jmxProxyFactory) + { + myJmxProxyFactory = jmxProxyFactory; + return this; + } + + /** + * Build with table repair metrics. + * + * @param tableRepairMetrics + * Table repair metrics. + * @return Builder + */ + public Builder withTableRepairMetrics(final TableRepairMetrics tableRepairMetrics) + { + myTableRepairMetrics = tableRepairMetrics; + return this; + } + + /** + * Build with repair configuration. + * + * @param repairConfiguration + * The repair configuration. + * @return Builder + */ + public Builder withRepairConfiguration(final RepairConfiguration repairConfiguration) + { + myRepairConfiguration = repairConfiguration; + return this; + } + + /** + * Build with replication state. + * + * @param replicationState + * Replication state. + * @return Builder + */ + public Builder withReplicationState(final ReplicationState replicationState) + { + myReplicationState = replicationState; + return this; + } + + /** + * Build table repair job with repair policies. + * + * @param repairPolicies + * The table repair policies. + * @return Builder + */ + public Builder withRepairPolices(final Collection repairPolicies) + { + myRepairPolicies.addAll(repairPolicies); + return this; + } + + /** + * Build with cassandra metrics. + * + * @param cassandraMetrics The Cassandra metrics. + * @return Builder + */ + public Builder withCassandraMetrics(final CassandraMetrics cassandraMetrics) + { + myCassandraMetrics = cassandraMetrics; + return this; + } + + /** + * Build table repair job. + * + * @return TableRepairJob + */ + public IncrementalRepairJob build() + { + Preconditions.checkNotNull(myTableReference, "Table reference must be set"); + + return new IncrementalRepairJob(this); + } + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/IncrementalRepairTask.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/IncrementalRepairTask.java new file mode 100644 index 000000000..2e209fdcc --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/IncrementalRepairTask.java @@ -0,0 +1,96 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.incremental; + +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.RepairTask; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairOptions; +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairStatus; +import java.util.UUID; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.Map; + +/** + * Class used to run Incremental Repairs in Cassandra. + */ +public class IncrementalRepairTask extends RepairTask +{ + private static final Logger LOG = LoggerFactory.getLogger(IncrementalRepairTask.class); + + /** + * Constructs an IncrementalRepairTask for a specific node and table. + * + * @param currentNode the UUID of the current node where the repair task is running. Must not be {@code null}. + * @param jmxProxyFactory the factory to create connections to distributed JMX proxies. Must not be {@code null}. + * @param tableReference the reference to the table that is being repaired. Must not be {@code null}. + * @param repairConfiguration the configuration specifying how the repair task should be executed. Must not be {@code null}. + * @param tableRepairMetrics the metrics associated with table repairs for monitoring and tracking purposes. Must not be {@code null}. + */ + public IncrementalRepairTask( + final UUID currentNode, + final DistributedJmxProxyFactory jmxProxyFactory, + final TableReference tableReference, + final RepairConfiguration repairConfiguration, + final TableRepairMetrics tableRepairMetrics) + { + super(currentNode, jmxProxyFactory, tableReference, repairConfiguration, tableRepairMetrics); + } + + @Override + protected final Map getOptions() + { + Map options = new HashMap<>(); + options.put(RepairOptions.PARALLELISM_KEY, getRepairConfiguration().getRepairParallelism().getName()); + options.put(RepairOptions.PRIMARY_RANGE_KEY, Boolean.toString(false)); + options.put(RepairOptions.COLUMNFAMILIES_KEY, getTableReference().getTable()); + options.put(RepairOptions.INCREMENTAL_KEY, Boolean.toString(true)); + return options; + } + + @Override + protected final void onFinish(final RepairStatus repairStatus) + { + if (repairStatus.equals(RepairStatus.FAILED)) + { + LOG.warn("Unable to repair '{}', affected ranges: '{}'", this, getFailedRanges()); + } + } + + @Override + protected final void onRangeFinished(final LongTokenRange range, final RepairStatus repairStatus) + { + super.onRangeFinished(range, repairStatus); + LOG.debug("{} for range {}", repairStatus, range); + } + + /** + * String representation. + * + * @return String + */ + @Override + public String toString() + { + return String.format("Incremental repairTask of %s", getTableReference()); + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/package-info.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/package-info.java new file mode 100644 index 000000000..83fb4aa6a --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Contains the implementations and resources for incremental repairs. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.incremental; diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/package-info.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/package-info.java new file mode 100644 index 000000000..4f255e0cc --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Contains the implementations and resources for repair operations. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair; diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/DefaultJobComparator.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/DefaultJobComparator.java new file mode 100644 index 000000000..16bb5b8ea --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/DefaultJobComparator.java @@ -0,0 +1,43 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.scheduler; + +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledJob; +import java.io.Serializable; +import java.util.Comparator; + +/** + * The default job comparator used for the {@link ScheduledJobQueue}. + *

+ * This comparator compares jobs based on their {@link ScheduledJob#getRealPriority() current priority} if equal, their + * {@link ScheduledJob#getPriority() configured priority}. + */ +public class DefaultJobComparator implements Comparator, Serializable +{ + private static final long serialVersionUID = 9107238791889095329L; + + @Override + public final int compare(final ScheduledJob j1, final ScheduledJob j2) + { + int result = Integer.compare(j2.getRealPriority(), j1.getRealPriority()); + + if (result == 0) + { + result = Integer.compare(j2.getPriority().getValue(), j1.getPriority().getValue()); + } + + return result; + } +} diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/RepairSchedulerImpl.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/RepairSchedulerImpl.java new file mode 100644 index 000000000..ed7642859 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/RepairSchedulerImpl.java @@ -0,0 +1,401 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.scheduler; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.core.impl.metrics.CassandraMetrics; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.incremental.IncrementalRepairJob; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.RepairScheduler; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduleManager; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledRepairJob; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledRepairJobView; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledJob; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicationState; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairPolicy; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import java.io.Closeable; + +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.HashMap; +import java.util.List; +import java.util.ArrayList; +import java.util.AbstractMap; +import java.util.Collection; + +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Class used to construct repair scheduler. + */ +public final class RepairSchedulerImpl implements RepairScheduler, Closeable +{ + private static final int DEFAULT_TERMINATION_WAIT_IN_SECONDS = 10; + + private static final Logger LOG = LoggerFactory.getLogger(RepairSchedulerImpl.class); + + private final Map>> myScheduledJobs = new ConcurrentHashMap<>(); + private final Object myLock = new Object(); + + private final ExecutorService myExecutor; + private final TableRepairMetrics myTableRepairMetrics; + + private final DistributedJmxProxyFactory myJmxProxyFactory; + private final ScheduleManager myScheduleManager; + private final ReplicationState myReplicationState; + private final CassandraMetrics myCassandraMetrics; + private final List myRepairPolicies; + + private Set validateScheduleMap(final UUID nodeID, final TableReference tableReference) + { + if (!myScheduledJobs.containsKey(nodeID)) + { + Map> scheduledJobs = new HashMap<>(); + scheduledJobs.put(tableReference, new HashSet<>()); + myScheduledJobs.put(nodeID, scheduledJobs); + return myScheduledJobs.get(nodeID).get(tableReference); + } + return myScheduledJobs.get(nodeID).get(tableReference); + } + + private RepairSchedulerImpl(final Builder builder) + { + myExecutor = Executors.newSingleThreadScheduledExecutor( + new ThreadFactoryBuilder().setNameFormat("RepairScheduler-%d").build()); + myTableRepairMetrics = builder.myTableRepairMetrics; + myJmxProxyFactory = builder.myJmxProxyFactory; + myScheduleManager = builder.myScheduleManager; + myReplicationState = builder.myReplicationState; + myRepairPolicies = new ArrayList<>(builder.myRepairPolicies); + myCassandraMetrics = builder.myCassandraMetrics; + } + + @Override + public String getCurrentJobStatus() + { + return myScheduleManager.getCurrentJobStatus(); + } + + @Override + public void close() + { + myExecutor.shutdown(); + try + { + if (!myExecutor.awaitTermination(DEFAULT_TERMINATION_WAIT_IN_SECONDS, TimeUnit.SECONDS)) + { + LOG.warn("Waited 10 seconds for executor to shutdown, still not shut down"); + } + } + catch (InterruptedException e) + { + LOG.error("Interrupted while waiting for executor to shutdown", e); + Thread.currentThread().interrupt(); + } + + synchronized (myLock) + { + myScheduledJobs.entrySet().stream() + .flatMap(nodeEntry -> nodeEntry.getValue().entrySet().stream() + .flatMap(tableEntry -> tableEntry.getValue().stream() + .map(job -> new AbstractMap.SimpleEntry<>(nodeEntry.getKey(), job)) + ) + ) + .forEach(entry -> descheduleTableJob(entry.getKey(), entry.getValue())); + + myScheduledJobs.clear(); + } + } + + @Override + public void putConfigurations( + final Node node, + final TableReference tableReference, + final Set repairConfiguration) + { + myExecutor.execute(() -> handleTableConfigurationChange(node, tableReference, repairConfiguration)); + } + + @Override + public void removeConfiguration(final Node node, final TableReference tableReference) + { + myExecutor.execute(() -> tableConfigurationRemoved(node, tableReference)); + } + + @Override + public List getCurrentRepairJobs() + { + synchronized (myLock) + { + return myScheduledJobs.values().stream() + .flatMap(tableJobs -> tableJobs.values().stream()) + .flatMap(Set::stream) + .map(ScheduledRepairJob::getView) + .collect(Collectors.toList()); + } + } + + private void handleTableConfigurationChange( + final Node node, + final TableReference tableReference, + final Set repairConfigurations) + { + synchronized (myLock) + { + try + { + if (configurationHasChanged(node, tableReference, repairConfigurations)) + { + LOG.info("Creating schedule for table {} in node {}", tableReference, node.getHostId()); + createTableSchedule(node, tableReference, repairConfigurations); + } + LOG.info("No configuration changes for table {} in node {}", tableReference, node.getHostId()); + } + catch (Exception e) + { + LOG.error("Unexpected error during schedule change of {}:", tableReference, e); + } + } + } + + private boolean configurationHasChanged( + final Node node, + final TableReference tableReference, + final Set repairConfigurations) + { + Set jobs = validateScheduleMap(node.getHostId(), tableReference); + if (repairConfigurations == null || repairConfigurations.isEmpty()) + { + return false; + } + + if (jobs == null || jobs.isEmpty()) + { + return true; + } + + int matching = 0; + + for (ScheduledRepairJob job : jobs) + { + for (RepairConfiguration repairConfiguration : repairConfigurations) + { + if (job.getRepairConfiguration().equals(repairConfiguration)) + { + matching++; + } + } + } + return matching != repairConfigurations.size(); + } + + private void createTableSchedule( + final Node node, + final TableReference tableReference, + final Set repairConfigurations) + { + Set currentJobs = myScheduledJobs.get(node.getHostId()).get(tableReference); + Map> tableJob = new HashMap<>(); + if (currentJobs != null) + { + for (ScheduledRepairJob job : currentJobs) + { + descheduleTableJob(node.getHostId(), job); + } + } + Set newJobs = new HashSet<>(); + for (RepairConfiguration repairConfiguration : repairConfigurations) + { + ScheduledRepairJob job = createScheduledRepairJob(node, tableReference, repairConfiguration); + newJobs.add(job); + myScheduleManager.schedule(node.getHostId(), job); + } + tableJob.put(tableReference, newJobs); + myScheduledJobs.put(node.getHostId(), tableJob); + } + + private void tableConfigurationRemoved(final Node node, final TableReference tableReference) + { + synchronized (myLock) + { + try + { + Set jobs = myScheduledJobs.get(node.getHostId()).remove(tableReference); + for (ScheduledRepairJob job : jobs) + { + descheduleTableJob(node.getHostId(), job); + } + } + catch (Exception e) + { + LOG.error("Unexpected error during schedule removal of {}:", tableReference, e); + } + } + } + + private void descheduleTableJob(final UUID nodeID, final ScheduledJob job) + { + if (job != null) + { + myScheduleManager.deschedule(nodeID, job); + } + } + + private ScheduledRepairJob createScheduledRepairJob( + final Node node, + final TableReference tableReference, + final RepairConfiguration repairConfiguration) + { + ScheduledJob.Configuration configuration = new ScheduledJob.ConfigurationBuilder() + .withPriority(ScheduledJob.Priority.LOW) + .withRunInterval(repairConfiguration.getRepairIntervalInMs(), TimeUnit.MILLISECONDS) + .withBackoff(repairConfiguration.getBackoffInMs(), TimeUnit.MILLISECONDS) + .withPriorityGranularity(repairConfiguration.getPriorityGranularityUnit()) + .build(); + ScheduledRepairJob job; + job = new IncrementalRepairJob.Builder() + .withConfiguration(configuration) + .withNode(node) + .withJmxProxyFactory(myJmxProxyFactory) + .withTableReference(tableReference) + .withRepairConfiguration(repairConfiguration) + .withTableRepairMetrics(myTableRepairMetrics) + .withCassandraMetrics(myCassandraMetrics) + .withReplicationState(myReplicationState) + .withRepairPolices(myRepairPolicies) + .build(); + job.refreshState(); + return job; + } + + /** + * Create instance of Builder to construct RepairSchedulerImpl. + * + * @return Builder + */ + public static Builder builder() + { + return new Builder(); + } + + /** + * Builder used to construct RepairSchedulerImpl. + */ + public static class Builder + { + private DistributedJmxProxyFactory myJmxProxyFactory; + private ScheduleManager myScheduleManager; + private ReplicationState myReplicationState; + private CassandraMetrics myCassandraMetrics; + private final List myRepairPolicies = new ArrayList<>(); + private TableRepairMetrics myTableRepairMetrics; + + + /** + * RepairSchedulerImpl build with JMX proxy factory. + * + * @param jmxProxyFactory JMX proxy factory. + * @return Builder + */ + public Builder withJmxProxyFactory(final DistributedJmxProxyFactory jmxProxyFactory) + { + myJmxProxyFactory = jmxProxyFactory; + return this; + } + + /** + * RepairSchedulerImpl build with schedule manager. + * + * @param scheduleManager Schedule manager. + * @return Builder + */ + public Builder withScheduleManager(final ScheduleManager scheduleManager) + { + myScheduleManager = scheduleManager; + return this; + } + + /** + * RepairSchedulerImpl build with replication state. + * + * @param theReplicationState Replication state. + * @return Builder + */ + public Builder withReplicationState(final ReplicationState theReplicationState) + { + myReplicationState = theReplicationState; + return this; + } + + /** + * RepairSchedulerImpl build with repair policies. + * + * @param tableRepairPolicies Table repair policies. + * @return Builder + */ + public Builder withRepairPolicies(final Collection tableRepairPolicies) + { + myRepairPolicies.addAll(tableRepairPolicies); + return this; + } + + /** + * Build with cassandra metrics. + * + * @param cassandraMetrics Cassandra metrics. + * @return Builder + */ + public Builder withCassandraMetrics(final CassandraMetrics cassandraMetrics) + { + myCassandraMetrics = cassandraMetrics; + return this; + } + + /** + * RepairSchedulerImpl build with table repair metrics. + * + * @param tableRepairMetrics Table repair metrics. + * @return Builder + */ + public Builder withTableRepairMetrics(final TableRepairMetrics tableRepairMetrics) + { + myTableRepairMetrics = tableRepairMetrics; + return this; + } + + /** + * RepairSchedulerImpl build. + * + * @return RepairSchedulerImpl + */ + public RepairSchedulerImpl build() + { + return new RepairSchedulerImpl(this); + } + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/ScheduleManagerImpl.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/ScheduleManagerImpl.java new file mode 100644 index 000000000..8d88807ee --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/ScheduleManagerImpl.java @@ -0,0 +1,358 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.scheduler; + +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.RunPolicy; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduleManager; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledJob; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledTask; +import java.io.Closeable; +import java.util.Collection; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledFuture; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.Executors; + +import java.util.concurrent.atomic.AtomicReference; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Sets; +import com.google.common.util.concurrent.ThreadFactoryBuilder; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * ScheduleManager handles the run scheduler and update scheduler. + */ +public final class ScheduleManagerImpl implements ScheduleManager, Closeable +{ + private static final Logger LOG = LoggerFactory.getLogger(ScheduleManagerImpl.class); + + static final long DEFAULT_RUN_DELAY_IN_MS = TimeUnit.SECONDS.toMillis(30); + + private static final String NO_RUNNING_JOB = "No job is currently running"; + + private final Map myQueue = new ConcurrentHashMap<>(); + private final Collection myNodeIDList; + private final AtomicReference currentExecutingJob = new AtomicReference<>(); + private final Set myRunPolicies = Sets.newConcurrentHashSet(); + private final Map> myRunFuture = new ConcurrentHashMap<>(); + private final Map myRunTasks = new ConcurrentHashMap<>(); + + private final ScheduledExecutorService myExecutor = Executors.newSingleThreadScheduledExecutor( + new ThreadFactoryBuilder().setNameFormat("TaskExecutor-%d").build()); + + private ScheduleManagerImpl(final Builder builder) + { + myNodeIDList = builder.myNodeIDList; + createScheduleFutureForNodeIDList(builder); + } + + private void createScheduleFutureForNodeIDList(final Builder builder) + { + for (UUID nodeID : myNodeIDList) + { + JobRunTask myRunTask = new JobRunTask(nodeID); + ScheduledFuture scheduledFuture = myExecutor.scheduleWithFixedDelay(myRunTask, + builder.myRunIntervalInMs, + builder.myRunIntervalInMs, + TimeUnit.MILLISECONDS); + myRunTasks.put(nodeID, myRunTask); + myRunFuture.put(nodeID, scheduledFuture); + } + } + + @Override + public String getCurrentJobStatus() + { + ScheduledJob job = currentExecutingJob.get(); + if (job != null) + { + String jobId = job.getId().toString(); + return "Job ID: " + jobId + ", Status: Running"; + } + else + { + return ScheduleManagerImpl.NO_RUNNING_JOB; + } + } + + /** + * Adds a run policy to the collection of run policies. + * + * @param runPolicy the {@link RunPolicy} to be added. Must not be {@code null}. + * @return {@code true} if the run policy was added successfully; {@code false} if it was already present. + */ + public boolean addRunPolicy(final RunPolicy runPolicy) + { + LOG.debug("Run policy {} added", runPolicy); + return myRunPolicies.add(runPolicy); + } + + /** + * Removes a run policy from the collection of run policies. + * + * @param runPolicy the {@link RunPolicy} to be removed. Must not be {@code null}. + * @return {@code true} if the run policy was successfully removed; {@code false} if it was not present. + */ + + public boolean removeRunPolicy(final RunPolicy runPolicy) + { + LOG.debug("Run policy {} removed", runPolicy); + return myRunPolicies.remove(runPolicy); + } + + @Override + public void schedule( + final UUID nodeID, + final ScheduledJob job) + { + ScheduledJobQueue queue = myQueue.get(nodeID); + if (queue == null) + { + myQueue.put(nodeID, new ScheduledJobQueue(new DefaultJobComparator())); + } + myQueue.get(nodeID).add(job); + } + + @Override + public void deschedule(final UUID nodeID, final ScheduledJob job) + { + myQueue.get(nodeID).remove(job); + } + + @Override + public void close() + { + for (ScheduledFuture future : myRunFuture.values()) + { + future.cancel(false); + } + myExecutor.shutdown(); + myRunPolicies.clear(); + } + + /** + * Made available for testing. + * + * @param nodeID the node id to run jobs. + */ + @VisibleForTesting + public void run(final UUID nodeID) + { + myRunTasks.get(nodeID).run(); + } + + /** + * Made available for testing. + * + * @param nodeID the node id to get queue size. + * @return int Queue size. + */ + @VisibleForTesting + public int getQueueSize(final UUID nodeID) + { + return myQueue.get(nodeID).size(); + } + + private Long validateJob(final ScheduledJob job) + { + for (RunPolicy runPolicy : myRunPolicies) + { + long nextRun = runPolicy.validate(job); + if (nextRun != -1L) + { + LOG.debug("Job {} rejected for {} ms by {}", job, nextRun, runPolicy); + return nextRun; + } + } + + return -1L; + } + + + /** + * Internal run task that is scheduled by the {@link ScheduleManagerImpl}. + *

+ * Retrieves a job from the queue and tries to run it provided that it's possible to get the required locks. + */ + private final class JobRunTask implements Runnable + { + private final UUID nodeID; + + private JobRunTask(final UUID currentNodeID) + { + nodeID = currentNodeID; + } + + @Override + public void run() + { + try + { + tryRunNext(); + } + catch (Exception e) + { + LOG.error("Exception while running job in node {}", nodeID, e); + } + } + + private void tryRunNext() + { + for (ScheduledJob next : myQueue.get(nodeID)) + { + if (validate(next)) + { + currentExecutingJob.set(next); + if (tryRunTasks(next)) + { + break; + } + } + } + currentExecutingJob.set(null); + } + + private boolean validate(final ScheduledJob job) + { + LOG.trace("Validating job {}", job); + long nextRun = validateJob(job); + + if (nextRun != -1L) + { + job.setRunnableIn(nextRun); + return false; + } + + return true; + } + + private boolean tryRunTasks( + final ScheduledJob next) + { + boolean hasRun = false; + + for (ScheduledTask task : next) + { + if (!validate(next)) + { + LOG.debug("Job {} was stopped, will continue later", next); + break; + } + hasRun |= tryRunTask(next, task); + } + + return hasRun; + } + + private boolean tryRunTask( + final ScheduledJob job, + final ScheduledTask task) + { + LOG.debug("Trying to run task {} in node {}", task, nodeID); + // TODO need to implement lock mechanism + try + { + boolean successful = runTask(task); + job.postExecute(successful); + return true; + } + catch (Exception e) + { + if (e.getCause() != null) + { + LOG.warn("Unable to get schedule lock on task {} in node {}", task, nodeID, e); + } + return false; + } + } + + private boolean runTask( + final ScheduledTask task) + { + try + { + LOG.info("Running task: {}, for node {}", task, nodeID); + return task.execute(nodeID); + } + catch (Exception e) + { + LOG.warn("Unable to run task: {} in node: {}", task, nodeID, e); + } + + return false; + } + } + + /** + * Create an instance of Builder to construct ScheduleManagerImpl. + * + * @return Builder + */ + public static Builder builder() + { + return new Builder(); + } + + /** + * Builder class to construct ScheduleManagerImpl. + */ + public static class Builder + { + private Collection myNodeIDList; + private long myRunIntervalInMs = DEFAULT_RUN_DELAY_IN_MS; + + /** + * Build SchedulerManager with run interval. + * + * @param runInterval the interval to run a repair task + * @param timeUnit the TimeUnit to specify the interval + * @return Builder with run interval + */ + public final Builder withRunInterval(final long runInterval, final TimeUnit timeUnit) + { + myRunIntervalInMs = timeUnit.toMillis(runInterval); + return this; + } + + /** + * Build SchedulerManager with run interval. + * + * @param nodeIDList the interval to run a repair task + * @return Builder with nodes list + */ + public Builder withNodeIDList(final Collection nodeIDList) + { + myNodeIDList = nodeIDList; + return this; + } + + /** + * Build SchedulerManager with the provided configuration. + * + * @return ScheduleManagerImpl with provided configuration. + */ + public final ScheduleManagerImpl build() + { + return new ScheduleManagerImpl(this); + } + } +} + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/ScheduledJobQueue.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/ScheduledJobQueue.java new file mode 100644 index 000000000..35f2fdf46 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/ScheduledJobQueue.java @@ -0,0 +1,163 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.scheduler; + +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledJob; +import com.ericsson.bss.cassandra.ecchronos.utils.converter.ManyToOneIterator; +import java.util.Collection; +import java.util.Comparator; +import java.util.EnumMap; +import java.util.Iterator; +import java.util.PriorityQueue; + +import com.google.common.collect.AbstractIterator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.annotations.VisibleForTesting; + +/** + * Dynamic priority queue for scheduled jobs. + *

+ * This queue is divided in several smaller queues, one for each {@link ScheduledJob.Priority priority type} and are + * then retrieved using a + * {@link ManyToOneIterator}. + */ +public class ScheduledJobQueue implements Iterable +{ + private static final Logger LOG = LoggerFactory.getLogger(ScheduledJobQueue.class); + + private final Comparator myComparator; + + private final EnumMap> myJobQueues + = new EnumMap<>(ScheduledJob.Priority.class); + + /** + * Construct a new job queue that prioritizes the jobs based on the provided comparator. + * + * @param comparator + * The comparator used to determine the job with the highest priority. + */ + public ScheduledJobQueue(final Comparator comparator) + { + this.myComparator = comparator; + + for (ScheduledJob.Priority priority : ScheduledJob.Priority.values()) + { + myJobQueues.put(priority, new PriorityQueue<>(1, comparator)); + } + } + + /** + * Add a job to the queue. + * + * @param job + * The job to add. + */ + public synchronized void add(final ScheduledJob job) + { + addJobInternal(job); + } + + /** + * Add a collection of jobs to the queue at once. + * + * @param jobs + * The collection of jobs. + */ + public synchronized void addAll(final Collection jobs) + { + for (ScheduledJob job : jobs) + { + addJobInternal(job); + } + } + + /** + * Remove the provided job from the queue. + * + * @param job + * The job to remove. + */ + public synchronized void remove(final ScheduledJob job) + { + LOG.debug("Removing job: {}", job); + myJobQueues.get(job.getPriority()).remove(job); + } + + private void addJobInternal(final ScheduledJob job) + { + LOG.debug("Adding job: {}, Priority: {}", job, job.getPriority()); + myJobQueues.get(job.getPriority()).add(job); + } + + @VisibleForTesting + final int size() + { + int size = 0; + + for (PriorityQueue queue : myJobQueues.values()) + { + size += queue.size(); + } + + return size; + } + + @Override + public final synchronized Iterator iterator() + { + myJobQueues.values().forEach(q -> q.forEach(ScheduledJob::refreshState)); + Iterator baseIterator = new ManyToOneIterator<>(myJobQueues.values(), myComparator); + + return new RunnableJobIterator(baseIterator); + } + + private class RunnableJobIterator extends AbstractIterator + { + private final Iterator myBaseIterator; + + RunnableJobIterator(final Iterator baseIterator) + { + myBaseIterator = baseIterator; + } + + @Override + protected ScheduledJob computeNext() + { + while (myBaseIterator.hasNext()) + { + ScheduledJob job = myBaseIterator.next(); + + ScheduledJob.State state = job.getState(); + if (state == ScheduledJob.State.FAILED || state == ScheduledJob.State.FINISHED) + { + LOG.info("{}: {}, descheduling", job, state); + job.finishJob(); + ScheduledJobQueue.this.remove(job); + } + else if (state != ScheduledJob.State.PARKED) + { + LOG.debug("Retrieving job: {}, Priority: {}", job, job.getPriority()); + return job; + } + } + + return endOfData(); + } + } +} + + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/package-info.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/package-info.java new file mode 100644 index 000000000..96866bd5d --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Contains the implementations and resources for repair scheduler. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.scheduler; diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/state/ReplicationStateImpl.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/ReplicationStateImpl.java similarity index 99% rename from core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/state/ReplicationStateImpl.java rename to core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/ReplicationStateImpl.java index adc108ade..4706526ff 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/state/ReplicationStateImpl.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/ReplicationStateImpl.java @@ -12,7 +12,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.ericsson.bss.cassandra.ecchronos.core.impl.state; +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.state; import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; import com.ericsson.bss.cassandra.ecchronos.core.metadata.NodeResolver; diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/state/package-info.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/package-info.java similarity index 90% rename from core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/state/package-info.java rename to core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/package-info.java index 860498539..ab899949e 100644 --- a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/state/package-info.java +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/state/package-info.java @@ -15,4 +15,4 @@ /** * Contains the implementation and resources for stateful declarations. */ -package com.ericsson.bss.cassandra.ecchronos.core.impl.state; +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.state; diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/table/ReplicatedTableProviderImpl.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/table/ReplicatedTableProviderImpl.java new file mode 100644 index 000000000..3122c3cb8 --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/table/ReplicatedTableProviderImpl.java @@ -0,0 +1,169 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.table; + +import com.datastax.oss.driver.api.core.CqlSession; +import com.datastax.oss.driver.api.core.metadata.Node; +import com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata; +import com.ericsson.bss.cassandra.ecchronos.core.metadata.Metadata; +import com.ericsson.bss.cassandra.ecchronos.core.table.ReplicatedTableProvider; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReferenceFactory; + +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.stream.Collectors; + +/** + * Implementation of ReplicatedTableProvider for retrieving tables replicated by the local node. + * The purpose of this is to abstract away java-driver related mocking from other components + * trying to retrieve the tables that should be repaired. + */ +public class ReplicatedTableProviderImpl implements ReplicatedTableProvider +{ + private static final Logger LOG = LoggerFactory.getLogger(ReplicatedTableProviderImpl.class); + + private static final String STRATEGY_CLASS = "class"; + private static final String SIMPLE_STRATEGY = "org.apache.cassandra.locator.SimpleStrategy"; + private static final String NETWORK_TOPOLOGY_STRATEGY = "org.apache.cassandra.locator.NetworkTopologyStrategy"; + + private final List myNodes; + + private static final String SIMPLE_STRATEGY_REPLICATION_FACTOR = "replication_factor"; + + private static final String SYSTEM_AUTH_KEYSPACE = "system_auth"; + + private final CqlSession mySession; + private final TableReferenceFactory myTableReferenceFactory; + + /** + * Constructs a ReplicatedTableProviderImpl to manage table references in a replicated environment. + * + * @param session the {@link CqlSession} used to connect to the Cassandra cluster. Must not be {@code null}. + * @param tableReferenceFactory the factory to create {@link TableReference} instances. Must not be {@code null}. + * @param nodes the list of {@link Node} objects representing the nodes in the Cassandra cluster. Must not be {@code null}. + */ + public ReplicatedTableProviderImpl( + final CqlSession session, + final TableReferenceFactory tableReferenceFactory, + final List nodes) + { + mySession = session; + myTableReferenceFactory = tableReferenceFactory; + myNodes = nodes; + } + + /** + * {@inheritDoc} + */ + @Override + public final Set getAll() + { + return myNodes.stream() + .flatMap(node -> mySession.getMetadata().getKeyspaces().values().stream() + .filter(k -> accept(node, k.getName().asInternal())) // Chama o accept com o Node e o nome da keyspace + .flatMap(k -> k.getTables().values().stream()) + .map(tb -> myTableReferenceFactory.forTable(tb.getKeyspace().asInternal(), tb.getName().asInternal())) + ) + .collect(Collectors.toSet()); + } + + /** + * {@inheritDoc} + */ + @Override + public boolean accept( + final Node node, + final String keyspace + ) + { + if (keyspace.startsWith("system") && !SYSTEM_AUTH_KEYSPACE.equals(keyspace)) + { + return false; + } + + Optional keyspaceMetadata = Metadata.getKeyspace(mySession, keyspace); + + if (keyspaceMetadata.isPresent()) + { + Map replication = keyspaceMetadata.get().getReplication(); + String replicationClass = replication.get(STRATEGY_CLASS); + + switch (replicationClass) + { + case SIMPLE_STRATEGY: + return validateSimpleStrategy(replication); + case NETWORK_TOPOLOGY_STRATEGY: + return validateNetworkTopologyStrategy(node, keyspace, replication); + default: + LOG.warn("Replication strategy of type {} is not supported", replicationClass); + break; + } + } + + return false; + } + + private boolean validateSimpleStrategy(final Map replication) + { + int replicationFactor = Integer.parseInt(replication.get(SIMPLE_STRATEGY_REPLICATION_FACTOR)); + + return replicationFactor > 1; + } + + private boolean validateNetworkTopologyStrategy( + final Node currentNode, + final String keyspace, final Map replication) + { + String localDc = currentNode.getDatacenter(); + + if (localDc == null) + { + LOG.error("Local data center is not defined, ignoring keyspace {}", keyspace); + return false; + } + + if (!replication.containsKey(localDc)) + { + LOG.warn("Keyspace {} not replicated by node, ignoring.", keyspace); + return false; + } + + return definedReplicationInNetworkTopologyStrategy(replication) > 1; + } + + private int definedReplicationInNetworkTopologyStrategy(final Map replication) + { + int replicationFactor = 0; + + for (Map.Entry replicationEntry : replication.entrySet()) + { + if (!STRATEGY_CLASS.equals(replicationEntry.getKey())) + { + replicationFactor += Integer.parseInt(replicationEntry.getValue()); + } + } + + return replicationFactor; + } +} + + diff --git a/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/table/TimeBasedRunPolicy.java b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/table/TimeBasedRunPolicy.java new file mode 100644 index 000000000..b31555b9e --- /dev/null +++ b/core.impl/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/impl/table/TimeBasedRunPolicy.java @@ -0,0 +1,397 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.table; + +import com.datastax.oss.driver.api.core.CqlSession; +import com.datastax.oss.driver.api.core.cql.PreparedStatement; +import com.datastax.oss.driver.api.core.cql.ResultSet; +import com.datastax.oss.driver.api.core.cql.Row; +import com.datastax.oss.driver.api.core.cql.Statement; +import com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata; +import com.datastax.oss.driver.api.querybuilder.QueryBuilder; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.RunPolicy; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledJob; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairPolicy; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.LoadingCache; +import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.Closeable; +import java.time.Clock; +import java.time.Duration; +import java.time.LocalDateTime; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +import static com.datastax.oss.driver.api.querybuilder.QueryBuilder.bindMarker; + +/** + * Time based run policy. + * + * Expected keyspace/table: + * CREATE KEYSPACE IF NOT EXISTS ecchronos WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1': 1}; + * + * CREATE TABLE IF NOT EXISTS ecchronos.reject_configuration( + * keyspace_name text, + * table_name text, + * start_hour int, + * start_minute int, + * end_hour int, + * end_minute int, + * PRIMARY KEY(keyspace_name, table_name, start_hour, start_minute)); + */ +public class TimeBasedRunPolicy implements TableRepairPolicy, RunPolicy, Closeable +{ + private static final Logger LOG = LoggerFactory.getLogger(TimeBasedRunPolicy.class); + + private static final String TABLE_REJECT_CONFIGURATION = "reject_configuration"; + + private static final long DEFAULT_REJECT_TIME_IN_MS = TimeUnit.MINUTES.toMillis(1); + + static final long DEFAULT_CACHE_EXPIRE_TIME_IN_MS = TimeUnit.SECONDS.toMillis(10); + + private final PreparedStatement myGetRejectionsStatement; + private final CqlSession mySession; + private final Clock myClock; + private final LoadingCache myTimeRejectionCache; + + /** + * Constructs a new instance of {@link TimeBasedRunPolicy} using the specified {@link Builder}. + * + * @param builder the {@link Builder} containing the configuration settings for the {@link TimeBasedRunPolicy}. + * Must not be {@code null}. + */ + public TimeBasedRunPolicy(final Builder builder) + { + mySession = builder.mySession; + myClock = builder.myClock; + + myGetRejectionsStatement = mySession.prepare( + QueryBuilder.selectFrom(builder.myKeyspaceName, TABLE_REJECT_CONFIGURATION) + .all() + .whereColumn("keyspace_name") + .isEqualTo(bindMarker()) + .whereColumn("table_name").isEqualTo(bindMarker()) + .build()); + + myTimeRejectionCache = createConfigCache(builder.myCacheExpireTime); + } + + private LoadingCache createConfigCache(final long expireAfterInMs) + { + return Caffeine.newBuilder() + .expireAfterWrite(expireAfterInMs, TimeUnit.MILLISECONDS) + .executor(Runnable::run) + .build(key -> load(key)); + } + + private TimeRejectionCollection load(final TableKey key) + { + Statement statement = myGetRejectionsStatement.bind(key.getKeyspace(), key.getTable()); + + ResultSet resultSet = mySession.execute(statement); + Iterator iterator = resultSet.iterator(); + return new TimeRejectionCollection(iterator); + } + + @Override + public final long validate(final ScheduledJob job) + { + return -1L; + } + + @Override + public final boolean shouldRun(final TableReference tableReference) + { + return getRejectionsForTable(tableReference) == -1L; + } + + @Override + public final void close() + { + myTimeRejectionCache.invalidateAll(); + myTimeRejectionCache.cleanUp(); + } + + /** + * Create an instance of Builder class to construct TimeBasedRunPolicy. + */ + public static Builder builder() + { + return new Builder(); + } + + /** + * Builder class to construct TimeBasedRunPolicy. + */ + public static class Builder + { + private static final String DEFAULT_KEYSPACE_NAME = "ecchronos"; + + private CqlSession mySession; + private String myKeyspaceName = DEFAULT_KEYSPACE_NAME; + private static long myCacheExpireTime = DEFAULT_CACHE_EXPIRE_TIME_IN_MS; + private final Clock myClock = Clock.systemDefaultZone(); + + /** + * Sets the {@link CqlSession} to be used by the {@link TimeBasedRunPolicy}. + * + * @param session the {@link CqlSession} to set. Must not be {@code null}. + * @return the current {@link Builder} instance for method chaining. + */ + public final Builder withSession(final CqlSession session) + { + mySession = session; + return this; + } + + /** + * Sets the keyspace name to be used by the {@link TimeBasedRunPolicy}. + * + * @param keyspaceName the name of the keyspace. Must not be {@code null}. + * @return the current {@link Builder} instance for method chaining. + */ + public final Builder withKeyspaceName(final String keyspaceName) + { + myKeyspaceName = keyspaceName; + return this; + } + + /** + * Builds a new instance of {@link TimeBasedRunPolicy} using the configured parameters. + * + * @return a new {@link TimeBasedRunPolicy} instance. + */ + public final TimeBasedRunPolicy build() + { + verifySchemasExists(); + return new TimeBasedRunPolicy(this); + } + + private void verifySchemasExists() + { + Optional keyspaceMetadata = mySession.getMetadata().getKeyspace(myKeyspaceName); + + if (keyspaceMetadata.isEmpty()) + { + String msg = String.format("Keyspace %s does not exist, it needs to be created", myKeyspaceName); + LOG.error(msg); + throw new IllegalStateException(msg); + } + + if (keyspaceMetadata.get().getTable(TABLE_REJECT_CONFIGURATION).isEmpty()) + { + String msg = String.format("Table %s.%s does not exist, it needs to be created", + myKeyspaceName, TABLE_REJECT_CONFIGURATION); + LOG.error(msg); + throw new IllegalStateException(msg); + } + } + } + + /** + * Also visible for testing. + */ + @VisibleForTesting + void clearCache() + { + myTimeRejectionCache.invalidateAll(); + } + + class TimeRejectionCollection + { + private final List myRejections = new ArrayList<>(); + + TimeRejectionCollection(final Iterator iterator) + { + while (iterator.hasNext()) + { + Row row = iterator.next(); + myRejections.add(new TimeRejection(row)); + } + } + + public long rejectionTime() + { + for (TimeRejection rejection : myRejections) + { + long rejectionTime = rejection.rejectionTime(); + + if (rejectionTime != -1L) + { + return rejectionTime; + } + } + + return -1L; + } + } + + class TimeRejection + { + private final LocalDateTime myStart; + private final LocalDateTime myEnd; + + TimeRejection(final Row row) + { + myStart = toDateTime(row.getInt("start_hour"), row.getInt("start_minute")); + myEnd = toDateTime(row.getInt("end_hour"), row.getInt("end_minute")); + } + + public long rejectionTime() + { + // 00:00->00:00 means that we pause the repair scheduling, + // so wait DEFAULT_REJECT_TIME instead of until 00:00 + if (myStart.getHour() == 0 + && myStart.getMinute() == 0 + && myEnd.getHour() == 0 + && myEnd.getMinute() == 0) + { + return DEFAULT_REJECT_TIME_IN_MS; + } + + return calculateRejectTime(); + } + + private long calculateRejectTime() + { + LocalDateTime now = LocalDateTime.now(myClock); + + if (isWraparound()) + { + if (now.isBefore(myEnd)) + { + return Duration.between(now, myEnd).toMillis(); + } + else if (now.isAfter(myStart)) + { + return Duration.between(now, myEnd.plusDays(1)).toMillis(); + } + } + else if (now.isAfter(myStart) && now.isBefore(myEnd)) + { + return Duration.between(now, myEnd).toMillis(); + } + + return -1L; + } + + private boolean isWraparound() + { + return myEnd.isBefore(myStart); + } + + private LocalDateTime toDateTime(final int h, final int m) + { + return LocalDateTime.now(myClock) + .withHour(h) + .withMinute(m) + .withSecond(0); + } + } + + private long getRejectionsForTable(final TableReference tableReference) + { + long rejectTime = -1L; + try + { + TableKey[] tableKeys = new TableKey[] + { + allKeyspaces(), + allKeyspaces(tableReference.getTable()), + forTable(tableReference) + }; + + for (int i = 0; i < tableKeys.length && rejectTime == -1L; i++) + { + rejectTime = myTimeRejectionCache.get(tableKeys[i]).rejectionTime(); + } + } + catch (Exception e) + { + LOG.error("Unable to parse/fetch rejection time for {}", tableReference, e); + rejectTime = DEFAULT_REJECT_TIME_IN_MS; + } + + return rejectTime; + } + + private TableKey allKeyspaces() + { + return new TableKey("*", "*"); + } + + private TableKey allKeyspaces(final String table) + { + return new TableKey("*", table); + } + + private TableKey forTable(final TableReference tableReference) + { + return new TableKey(tableReference.getKeyspace(), tableReference.getTable()); + } + + static class TableKey + { + private final String myKeyspace; + private final String myTable; + + TableKey(final String keyspace, final String table) + { + myKeyspace = keyspace; + myTable = table; + } + + String getKeyspace() + { + return myKeyspace; + } + + String getTable() + { + return myTable; + } + + @Override + public boolean equals(final Object o) + { + if (this == o) + { + return true; + } + if (o == null || getClass() != o.getClass()) + { + return false; + } + TableKey tableKey = (TableKey) o; + return myTable.equals(tableKey.myTable) && myKeyspace.equals(tableKey.myKeyspace); + } + + @Override + public int hashCode() + { + return Objects.hash(myKeyspace, myTable); + } + } +} + + diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/TestRepairGroup.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/TestRepairGroup.java new file mode 100644 index 000000000..a8812d5f7 --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/TestRepairGroup.java @@ -0,0 +1,213 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.doNothing; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doThrow; +import static org.mockito.Mockito.ignoreStubs; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.spy; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.when; + + +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.incremental.IncrementalRepairTask; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicaRepairGroup; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairParallelism; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairType; +import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.ScheduledJobException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; +import static com.ericsson.bss.cassandra.ecchronos.core.impl.table.MockTableReferenceFactory.tableReference; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +@RunWith(MockitoJUnitRunner.Silent.class) +public class TestRepairGroup +{ + private static final String KEYSPACE_NAME = "keyspace"; + private static final String TABLE_NAME = "table"; + private static final TableReference TABLE_REFERENCE = tableReference(KEYSPACE_NAME, TABLE_NAME); + private static final int PRIORITY = 1; + + private static final long RUN_INTERVAL_IN_DAYS = 1; + private static final long GC_GRACE_DAYS_IN_DAYS = 10; + + @Mock + private DistributedJmxProxyFactory myJmxProxyFactory; + + @Mock + private TableRepairMetrics myTableRepairMetrics; + + private final UUID myNodeID = UUID.randomUUID(); + + private RepairConfiguration myRepairConfiguration; + + @Before + public void init() + { + myRepairConfiguration = RepairConfiguration.newBuilder() + .withParallelism(RepairParallelism.PARALLEL) + .withRepairWarningTime(RUN_INTERVAL_IN_DAYS * 2, TimeUnit.DAYS) + .withRepairErrorTime(GC_GRACE_DAYS_IN_DAYS, TimeUnit.DAYS) + .build(); + } + + @After + public void finalVerification() + { + verifyNoMoreInteractions(ignoreStubs(myJmxProxyFactory)); + verifyNoMoreInteractions(ignoreStubs(myTableRepairMetrics)); + } + + @Test + public void testGetIncrementalRepairTask() + { + DriverNode node = mockNode("DC1"); + when(node.getId()).thenReturn(myNodeID); + ImmutableSet nodes = ImmutableSet.of(node); + ReplicaRepairGroup replicaRepairGroup = new ReplicaRepairGroup(nodes, ImmutableList.of(), System.currentTimeMillis()); + RepairConfiguration repairConfiguration = RepairConfiguration.newBuilder() + .withParallelism(RepairParallelism.PARALLEL) + .withRepairWarningTime(RUN_INTERVAL_IN_DAYS * 2, TimeUnit.DAYS) + .withRepairErrorTime(GC_GRACE_DAYS_IN_DAYS, TimeUnit.DAYS) + .withRepairType(RepairType.INCREMENTAL) + .build(); + + RepairGroup repairGroup = builderFor(replicaRepairGroup).withRepairConfiguration(repairConfiguration).build( + PRIORITY); + + Collection repairTasks = repairGroup.getRepairTasks(myNodeID); + + assertThat(repairTasks).hasSize(1); + IncrementalRepairTask repairTask = (IncrementalRepairTask) repairTasks.iterator().next(); + + assertThat(repairTask.getTableReference()).isEqualTo(TABLE_REFERENCE); + assertThat(repairTask.getRepairConfiguration().getRepairParallelism()).isEqualTo(RepairParallelism.PARALLEL); + assertThat(repairTask.getRepairConfiguration().getRepairType()).isEqualTo(RepairType.INCREMENTAL); + } + + @Test + public void testExecuteAllTasksSuccessful() throws ScheduledJobException + { + DriverNode node = mockNode("DC1"); + when(node.getId()).thenReturn(myNodeID); + LongTokenRange range = new LongTokenRange(1, 2); + ImmutableSet nodes = ImmutableSet.of(node); + ReplicaRepairGroup replicaRepairGroup = new ReplicaRepairGroup(nodes, ImmutableList.of(range), System.currentTimeMillis()); + + RepairGroup repairGroup = spy(builderFor(replicaRepairGroup).build(PRIORITY)); + RepairTask repairTask1 = mock(RepairTask.class); + RepairTask repairTask2 = mock(RepairTask.class); + RepairTask repairTask3 = mock(RepairTask.class); + Collection tasks = new ArrayList<>(); + tasks.add(repairTask1); + tasks.add(repairTask2); + tasks.add(repairTask3); + doReturn(tasks).when(repairGroup).getRepairTasks(myNodeID); + doNothing().when(repairTask1).execute(); + doNothing().when(repairTask2).execute(); + doNothing().when(repairTask3).execute(); + + boolean success = repairGroup.execute(myNodeID); + assertThat(success).isTrue(); + } + + @Test + public void testExecuteAllTasksFailed() throws ScheduledJobException + { + DriverNode node = mockNode("DC1"); + when(node.getId()).thenReturn(myNodeID); + LongTokenRange range = new LongTokenRange(1, 2); + ImmutableSet nodes = ImmutableSet.of(node); + ReplicaRepairGroup replicaRepairGroup = new ReplicaRepairGroup(nodes, ImmutableList.of(range), System.currentTimeMillis()); + + RepairGroup repairGroup = spy(builderFor(replicaRepairGroup).build(PRIORITY)); + RepairTask repairTask1 = mock(RepairTask.class); + RepairTask repairTask2 = mock(RepairTask.class); + RepairTask repairTask3 = mock(RepairTask.class); + Collection tasks = new ArrayList<>(); + tasks.add(repairTask1); + tasks.add(repairTask2); + tasks.add(repairTask3); + doReturn(tasks).when(repairGroup).getRepairTasks(myNodeID); + doThrow(new ScheduledJobException("foo")).when(repairTask1).execute(); + doThrow(new ScheduledJobException("foo")).when(repairTask2).execute(); + doThrow(new ScheduledJobException("foo")).when(repairTask3).execute(); + + boolean success = repairGroup.execute(myNodeID); + assertThat(success).isFalse(); + } + + @Test + public void testExecuteSomeTasksFailed() throws ScheduledJobException + { + DriverNode node = mockNode("DC1"); + when(node.getId()).thenReturn(myNodeID); + LongTokenRange range = new LongTokenRange(1, 2); + ImmutableSet nodes = ImmutableSet.of(node); + ReplicaRepairGroup replicaRepairGroup = new ReplicaRepairGroup(nodes, ImmutableList.of(range), System.currentTimeMillis()); + + RepairGroup repairGroup = spy(builderFor(replicaRepairGroup).build(PRIORITY)); + RepairTask repairTask1 = mock(RepairTask.class); + RepairTask repairTask2 = mock(RepairTask.class); + RepairTask repairTask3 = mock(RepairTask.class); + Collection tasks = new ArrayList<>(); + tasks.add(repairTask1); + tasks.add(repairTask2); + tasks.add(repairTask3); + doReturn(tasks).when(repairGroup).getRepairTasks(myNodeID); + doThrow(new ScheduledJobException("foo")).when(repairTask1).execute(); + doNothing().when(repairTask2).execute(); + doThrow(new ScheduledJobException("foo")).when(repairTask3).execute(); + + boolean success = repairGroup.execute(myNodeID); + assertThat(success).isFalse(); + } + + private RepairGroup.Builder builderFor(ReplicaRepairGroup replicaRepairGroup) + { + return RepairGroup.newBuilder() + .withTableReference(TABLE_REFERENCE) + .withRepairConfiguration(myRepairConfiguration) + .withReplicaRepairGroup(replicaRepairGroup) + .withJmxProxyFactory(myJmxProxyFactory) + .withTableRepairMetrics(myTableRepairMetrics); + } + + private DriverNode mockNode(String dataCenter) + { + DriverNode node = mock(DriverNode.class); + when(node.getDatacenter()).thenReturn(dataCenter); + return node; + } +} diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/TestIncrementalRepairJob.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/TestIncrementalRepairJob.java new file mode 100644 index 000000000..18e687eba --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/incremental/TestIncrementalRepairJob.java @@ -0,0 +1,305 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.incremental; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.datastax.oss.driver.api.core.metadata.schema.KeyspaceMetadata; +import com.ericsson.bss.cassandra.ecchronos.core.impl.metrics.CassandraMetrics; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.RepairGroup; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.RepairTask; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledJob; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledRepairJobView; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledTask; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicationState; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairParallelism; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairType; +import com.google.common.collect.ImmutableSet; +import java.util.UUID; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.util.Collection; +import java.util.Iterator; +import java.util.concurrent.TimeUnit; + +import static com.ericsson.bss.cassandra.ecchronos.core.impl.table.MockTableReferenceFactory.tableReference; +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.ignoreStubs; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; + +@RunWith(MockitoJUnitRunner.Silent.class) +public class TestIncrementalRepairJob +{ + private static final String keyspaceName = "keyspace"; + private static final String tableName = "table"; + private static final long RUN_INTERVAL_IN_DAYS = 1; + private static final long WARNING_IN_DAYS = 7; + private static final long ERROR_IN_DAYS = 10; + + @Mock + private DistributedJmxProxyFactory myJmxProxyFactory; + + @Mock + private KeyspaceMetadata myKeyspaceMetadata; + + @Mock + private TableRepairMetrics myTableRepairMetrics; + + @Mock + private ReplicationState myReplicationState; + + @Mock + private CassandraMetrics myCassandraMetrics; + + @Mock + private Node mockNode; + + private final TableReference myTableReference = tableReference(keyspaceName, tableName); + private RepairConfiguration myRepairConfiguration; + private final UUID mockNodeID = UUID.randomUUID(); + + @Before + public void startup() + { + doReturn(0L).when(myCassandraMetrics).getMaxRepairedAt(mockNodeID, myTableReference); + doReturn(mockNodeID).when(mockNode).getHostId(); + myRepairConfiguration = RepairConfiguration.newBuilder() + .withParallelism(RepairParallelism.PARALLEL) + .withRepairWarningTime(WARNING_IN_DAYS, TimeUnit.DAYS).withRepairErrorTime(ERROR_IN_DAYS, TimeUnit.DAYS) + .withRepairInterval(RUN_INTERVAL_IN_DAYS, TimeUnit.DAYS) + .withRepairType(RepairType.INCREMENTAL).build(); + } + + @After + public void finalVerification() + { + verifyNoMoreInteractions(ignoreStubs(myJmxProxyFactory)); + verifyNoMoreInteractions(ignoreStubs(myKeyspaceMetadata)); + verifyNoMoreInteractions(ignoreStubs(myReplicationState)); + verifyNoMoreInteractions(ignoreStubs(myTableRepairMetrics)); + } + + @Test + public void testGetViewNothingRepaired() + { + doReturn(0.0d).when(myCassandraMetrics).getPercentRepaired(mockNodeID, myTableReference); + long lastRepairedAt = System.currentTimeMillis(); + doReturn(lastRepairedAt).when(myCassandraMetrics).getMaxRepairedAt(mockNodeID, myTableReference); + IncrementalRepairJob job = getIncrementalRepairJob(); + + assertThat(job).isNotNull(); + ScheduledRepairJobView view = job.getView(); + assertThat(view).isNotNull(); + assertThat(view.getRepairConfiguration()).isEqualTo(job.getRepairConfiguration()); + assertThat(view.getTableReference()).isEqualTo(myTableReference); + assertThat(view.getProgress()).isEqualTo(0.0d); + assertThat(view.getNextRepair()).isEqualTo(lastRepairedAt + TimeUnit.DAYS.toMillis(RUN_INTERVAL_IN_DAYS)); + assertThat(view.getCompletionTime()).isEqualTo(lastRepairedAt); + assertThat(view.getStatus()).isEqualTo(ScheduledRepairJobView.Status.COMPLETED); + } + + @Test + public void testGetViewEverythingRepaired() + { + doReturn(100.0d).when(myCassandraMetrics).getPercentRepaired(mockNodeID, myTableReference); + long lastRepairedAt = System.currentTimeMillis(); + doReturn(lastRepairedAt).when(myCassandraMetrics).getMaxRepairedAt(mockNodeID, myTableReference); + IncrementalRepairJob job = getIncrementalRepairJob(); + + assertThat(job).isNotNull(); + ScheduledRepairJobView view = job.getView(); + assertThat(view).isNotNull(); + assertThat(view.getRepairConfiguration()).isEqualTo(job.getRepairConfiguration()); + assertThat(view.getTableReference()).isEqualTo(myTableReference); + assertThat(view.getProgress()).isEqualTo(1.0d); + assertThat(view.getNextRepair()).isEqualTo(lastRepairedAt + TimeUnit.DAYS.toMillis(RUN_INTERVAL_IN_DAYS)); + assertThat(view.getCompletionTime()).isEqualTo(lastRepairedAt); + assertThat(view.getStatus()).isEqualTo(ScheduledRepairJobView.Status.COMPLETED); + } + + @Test + public void testGetViewBlocked() + { + doReturn(0.0d).when(myCassandraMetrics).getPercentRepaired(mockNodeID, myTableReference); + long lastRepairedAt = System.currentTimeMillis() - TimeUnit.DAYS.toMillis(RUN_INTERVAL_IN_DAYS); + doReturn(lastRepairedAt).when(myCassandraMetrics).getMaxRepairedAt(mockNodeID, myTableReference); + IncrementalRepairJob job = getIncrementalRepairJob(); + job.setRunnableIn(TimeUnit.HOURS.toMillis(1)); + + assertThat(job).isNotNull(); + ScheduledRepairJobView view = job.getView(); + assertThat(view).isNotNull(); + assertThat(view.getRepairConfiguration()).isEqualTo(job.getRepairConfiguration()); + assertThat(view.getTableReference()).isEqualTo(myTableReference); + assertThat(view.getProgress()).isEqualTo(0.0d); + assertThat(view.getNextRepair()).isEqualTo(lastRepairedAt + TimeUnit.DAYS.toMillis(RUN_INTERVAL_IN_DAYS)); + assertThat(view.getCompletionTime()).isEqualTo(lastRepairedAt); + assertThat(view.getStatus()).isEqualTo(ScheduledRepairJobView.Status.BLOCKED); + } + + @Test + public void testGetViewOnTime() + { + doReturn(0.0d).when(myCassandraMetrics).getPercentRepaired(mockNodeID, myTableReference); + long lastRepairedAt = System.currentTimeMillis() - TimeUnit.DAYS.toMillis(RUN_INTERVAL_IN_DAYS); + doReturn(lastRepairedAt).when(myCassandraMetrics).getMaxRepairedAt(mockNodeID, myTableReference); + IncrementalRepairJob job = getIncrementalRepairJob(); + + assertThat(job).isNotNull(); + ScheduledRepairJobView view = job.getView(); + assertThat(view).isNotNull(); + assertThat(view.getRepairConfiguration()).isEqualTo(job.getRepairConfiguration()); + assertThat(view.getTableReference()).isEqualTo(myTableReference); + assertThat(view.getProgress()).isEqualTo(0.0d); + assertThat(view.getNextRepair()).isEqualTo(lastRepairedAt + TimeUnit.DAYS.toMillis(RUN_INTERVAL_IN_DAYS)); + assertThat(view.getCompletionTime()).isEqualTo(lastRepairedAt); + assertThat(view.getStatus()).isEqualTo(ScheduledRepairJobView.Status.ON_TIME); + } + + @Test + public void testGetViewLate() + { + doReturn(0.0d).when(myCassandraMetrics).getPercentRepaired(mockNodeID, myTableReference); + long lastRepairedAt = System.currentTimeMillis() - TimeUnit.DAYS.toMillis(WARNING_IN_DAYS); + doReturn(lastRepairedAt).when(myCassandraMetrics).getMaxRepairedAt(mockNodeID, myTableReference); + IncrementalRepairJob job = getIncrementalRepairJob(); + + assertThat(job).isNotNull(); + ScheduledRepairJobView view = job.getView(); + assertThat(view).isNotNull(); + assertThat(view.getRepairConfiguration()).isEqualTo(job.getRepairConfiguration()); + assertThat(view.getTableReference()).isEqualTo(myTableReference); + assertThat(view.getProgress()).isEqualTo(0.0d); + assertThat(view.getNextRepair()).isEqualTo(lastRepairedAt + TimeUnit.DAYS.toMillis(RUN_INTERVAL_IN_DAYS)); + assertThat(view.getCompletionTime()).isEqualTo(lastRepairedAt); + assertThat(view.getStatus()).isEqualTo(ScheduledRepairJobView.Status.LATE); + } + + @Test + public void testGetViewOverdue() + { + doReturn(0.0d).when(myCassandraMetrics).getPercentRepaired(mockNodeID, myTableReference); + long lastRepairedAt = System.currentTimeMillis() - TimeUnit.DAYS.toMillis(ERROR_IN_DAYS); + doReturn(lastRepairedAt).when(myCassandraMetrics).getMaxRepairedAt(mockNodeID, myTableReference); + IncrementalRepairJob job = getIncrementalRepairJob(); + + assertThat(job).isNotNull(); + ScheduledRepairJobView view = job.getView(); + assertThat(view).isNotNull(); + assertThat(view.getRepairConfiguration()).isEqualTo(job.getRepairConfiguration()); + assertThat(view.getTableReference()).isEqualTo(myTableReference); + assertThat(view.getProgress()).isEqualTo(0.0d); + assertThat(view.getNextRepair()).isEqualTo(lastRepairedAt + TimeUnit.DAYS.toMillis(RUN_INTERVAL_IN_DAYS)); + assertThat(view.getCompletionTime()).isEqualTo(lastRepairedAt); + assertThat(view.getStatus()).isEqualTo(ScheduledRepairJobView.Status.OVERDUE); + } + + @Test + public void testRunnableNothingRepaired() + { + doReturn(0.0d).when(myCassandraMetrics).getPercentRepaired(mockNodeID, myTableReference); + IncrementalRepairJob job = getIncrementalRepairJob(); + job.refreshState(); + + assertThat(job).isNotNull(); + assertThat(job.runnable()).isTrue(); + } + + @Test + public void testRunnableHalfRepaired() + { + doReturn(50.0d).when(myCassandraMetrics).getPercentRepaired(mockNodeID, myTableReference); + IncrementalRepairJob job = getIncrementalRepairJob(); + job.refreshState(); + + assertThat(job).isNotNull(); + assertThat(job.runnable()).isTrue(); + } + + @Test + public void testRunnableEverythingRepaired() + { + doReturn(100.0d).when(myCassandraMetrics).getPercentRepaired(mockNodeID, myTableReference); + IncrementalRepairJob job = getIncrementalRepairJob(); + job.refreshState(); + + assertThat(job).isNotNull(); + assertThat(job.runnable()).isFalse(); + } + + @Test + public void testRunnableIntervalNotYetPassed() + { + long lastRepairedAt = System.currentTimeMillis(); + doReturn(lastRepairedAt).when(myCassandraMetrics).getMaxRepairedAt(mockNodeID, myTableReference); + IncrementalRepairJob job = getIncrementalRepairJob(); + job.refreshState(); + + assertThat(job).isNotNull(); + assertThat(job.runnable()).isFalse(); + } + + @Test + public void testIterator() + { + DriverNode node1 = mock(DriverNode.class); + DriverNode node2 = mock(DriverNode.class); + ImmutableSet replicas = ImmutableSet.of(node1, node2); + doReturn(replicas).when(myReplicationState).getReplicas(myTableReference, mockNode); + IncrementalRepairJob job = getIncrementalRepairJob(); + + assertThat(job).isNotNull(); + Iterator iterator = job.iterator(); + ScheduledTask task = iterator.next(); + assertThat(task).isInstanceOf(RepairGroup.class); + Collection repairTasks = ((RepairGroup) task).getRepairTasks(mockNodeID); + assertThat(repairTasks).hasSize(1); + IncrementalRepairTask repairTask = (IncrementalRepairTask) repairTasks.iterator().next(); + assertThat(repairTask.getRepairConfiguration()).isEqualTo(myRepairConfiguration); + assertThat(repairTask.getTableReference()).isEqualTo(myTableReference); + verify(myReplicationState).getReplicas(myTableReference, mockNode); + } + + @Test + public void testEqualsAndHashcode() + { + EqualsVerifier.simple().forClass(IncrementalRepairJob.class).withRedefinedSuperclass().verify(); + } + + private IncrementalRepairJob getIncrementalRepairJob() + { + ScheduledJob.Configuration configuration = new ScheduledJob.ConfigurationBuilder().withPriority( + ScheduledJob.Priority.LOW).withRunInterval(RUN_INTERVAL_IN_DAYS, TimeUnit.DAYS).build(); + + return new IncrementalRepairJob.Builder().withConfiguration(configuration).withTableReference(myTableReference) + .withJmxProxyFactory(myJmxProxyFactory).withReplicationState(myReplicationState) + .withTableRepairMetrics(myTableRepairMetrics).withRepairConfiguration(myRepairConfiguration) + .withCassandraMetrics(myCassandraMetrics) + .withNode(mockNode).build(); + } +} diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/DummyJob.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/DummyJob.java new file mode 100644 index 000000000..6524a8ca6 --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/DummyJob.java @@ -0,0 +1,72 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.scheduler; + +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledJob; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledTask; +import java.util.Arrays; +import java.util.Iterator; +import java.util.UUID; +import java.util.concurrent.TimeUnit; + + +public class DummyJob extends ScheduledJob +{ + volatile boolean hasRun = false; + + public DummyJob(Priority priority) + { + super(new ConfigurationBuilder().withPriority(priority).withRunInterval(1, TimeUnit.SECONDS).build()); + } + + public DummyJob(Priority priority, UUID jobId) + { + super(new ConfigurationBuilder().withPriority(priority).build(), jobId); + } + + public boolean hasRun() + { + return hasRun; + } + + @Override + public Iterator iterator() + { + return Arrays. asList(new DummyTask()).iterator(); + } + + @Override + public String toString() + { + return "DummyJob " + getPriority(); + } + + public class DummyTask extends ScheduledTask + { + @Override + public boolean execute(UUID nodeID) + { + hasRun = true; + return true; + } + + @Override + public void cleanup() + { + // NOOP + } + } + +} diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/TestRepairSchedulerImpl.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/TestRepairSchedulerImpl.java new file mode 100644 index 000000000..a708c5e79 --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/TestRepairSchedulerImpl.java @@ -0,0 +1,283 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.scheduler; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.core.impl.metrics.CassandraMetrics; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.incremental.IncrementalRepairJob; +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.RepairScheduler; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduleManager; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledJob; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledRepairJobView; +import com.ericsson.bss.cassandra.ecchronos.core.state.ReplicationState; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairType; +import java.util.*; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.util.concurrent.TimeUnit; + +import static com.ericsson.bss.cassandra.ecchronos.core.impl.table.MockTableReferenceFactory.tableReference; +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.*; + +@RunWith (MockitoJUnitRunner.class) +public class TestRepairSchedulerImpl +{ + private static final TableReference TABLE_REFERENCE1 = tableReference("keyspace", "table1"); + private static final TableReference TABLE_REFERENCE2 = tableReference("keyspace", "table2"); + + @Mock + private DistributedJmxProxyFactory jmxProxyFactory; + + @Mock + private ScheduleManager scheduleManager; + + @Mock + private TableRepairMetrics myTableRepairMetrics; + + @Mock + private ReplicationState myReplicationState; + + @Mock + private CassandraMetrics myCassandraMetrics; + + @Mock + private Node mockNode; + + private final UUID mockNodeID = UUID.randomUUID(); + + @Before + public void setup() + { + when(mockNode.getHostId()).thenReturn(mockNodeID); + } + + @Test + public void testConfigureNewTable() + { + RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder() + .withReplicationState(myReplicationState).build(); + + repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, + Collections.singleton(RepairConfiguration.DEFAULT)); + + verify(scheduleManager, timeout(1000)).schedule(eq(mockNodeID), any(ScheduledJob.class)); + verify(scheduleManager, never()).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + assertOneTableViewExist(repairSchedulerImpl, TABLE_REFERENCE1, RepairConfiguration.DEFAULT); + + repairSchedulerImpl.close(); + verify(scheduleManager).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + + verifyNoMoreInteractions(ignoreStubs(myTableRepairMetrics)); + verifyNoMoreInteractions(scheduleManager); + } + + @Test + public void testConfigureTwoTables() + { + RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder() + .withReplicationState(myReplicationState).build(); + + repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, Collections.singleton(RepairConfiguration.DEFAULT)); + repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE2, Collections.singleton(RepairConfiguration.DEFAULT)); + + verify(scheduleManager, timeout(1000).times(2)).schedule(eq(mockNodeID), any(ScheduledJob.class)); + verify(scheduleManager, never()).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + + repairSchedulerImpl.close(); + verify(scheduleManager, times(1)).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + + verifyNoMoreInteractions(ignoreStubs(myTableRepairMetrics)); + verifyNoMoreInteractions(scheduleManager); + } + + @Test + public void testRemoveTableConfiguration() + { + RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder() + .withReplicationState(myReplicationState).build(); + + repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, Collections.singleton(RepairConfiguration.DEFAULT)); + + verify(scheduleManager, timeout(1000)).schedule(eq(mockNodeID), any(ScheduledJob.class)); + verify(scheduleManager, never()).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + assertOneTableViewExist(repairSchedulerImpl, TABLE_REFERENCE1, RepairConfiguration.DEFAULT); + + repairSchedulerImpl.removeConfiguration(mockNode, TABLE_REFERENCE1); + verify(scheduleManager, timeout(1000)).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + assertThat(repairSchedulerImpl.getCurrentRepairJobs()).isEmpty(); + + repairSchedulerImpl.close(); + verifyNoMoreInteractions(ignoreStubs(myTableRepairMetrics)); + verifyNoMoreInteractions(scheduleManager); + } + + @Test + public void testUpdateTableConfiguration() + { + RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder() + .withReplicationState(myReplicationState).build(); + + long expectedUpdatedRepairInterval = TimeUnit.DAYS.toMillis(1); + + RepairConfiguration updatedRepairConfiguration = RepairConfiguration.newBuilder() + .withRepairInterval(expectedUpdatedRepairInterval, TimeUnit.MILLISECONDS) + .build(); + + repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, Collections.singleton(RepairConfiguration.DEFAULT)); + + verify(scheduleManager, timeout(1000)).schedule(eq(mockNodeID), any(ScheduledJob.class)); + verify(scheduleManager, never()).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + assertOneTableViewExist(repairSchedulerImpl, TABLE_REFERENCE1, RepairConfiguration.DEFAULT); + + repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, Collections.singleton(updatedRepairConfiguration)); + + verify(scheduleManager, timeout(1000).times(2)).schedule(eq(mockNodeID), any(ScheduledJob.class)); + verify(scheduleManager, timeout(1000)).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + assertOneTableViewExist(repairSchedulerImpl, TABLE_REFERENCE1, updatedRepairConfiguration); + + repairSchedulerImpl.close(); + verify(scheduleManager, times(2)).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + assertThat(repairSchedulerImpl.getCurrentRepairJobs()).isEmpty(); + + verifyNoMoreInteractions(ignoreStubs(myTableRepairMetrics)); + verifyNoMoreInteractions(scheduleManager); + } + + @Test + public void testUpdateTableConfigurationToSame() + { + RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder() + .withReplicationState(myReplicationState).build(); + + repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, Collections.singleton(RepairConfiguration.DEFAULT)); + + verify(scheduleManager, timeout(1000)).schedule(eq(mockNodeID), any(ScheduledJob.class)); + verify(scheduleManager, never()).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + assertOneTableViewExist(repairSchedulerImpl, TABLE_REFERENCE1, RepairConfiguration.DEFAULT); + + repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, Collections.singleton(RepairConfiguration.DEFAULT)); + + assertOneTableViewExist(repairSchedulerImpl, TABLE_REFERENCE1, RepairConfiguration.DEFAULT); + + repairSchedulerImpl.close(); + verify(scheduleManager).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + assertThat(repairSchedulerImpl.getCurrentRepairJobs()).isEmpty(); + + verifyNoMoreInteractions(ignoreStubs(myTableRepairMetrics)); + verifyNoMoreInteractions(scheduleManager); + } + + @Test + public void testConfigureTwoSchedulesForOneTable() + { + RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder().withReplicationState(myReplicationState).build(); + + RepairConfiguration incrementalRepairConfiguration = RepairConfiguration.newBuilder().withRepairType( + RepairType.INCREMENTAL).build(); + Set repairConfigurations = new HashSet<>(); + repairConfigurations.add(RepairConfiguration.DEFAULT); + repairConfigurations.add(incrementalRepairConfiguration); + repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, repairConfigurations); + + verify(scheduleManager, timeout(1000).times(2)).schedule(eq(mockNodeID), any(IncrementalRepairJob.class)); + verify(scheduleManager, never()).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + + assertTableViewsExist(repairSchedulerImpl, TABLE_REFERENCE1, RepairConfiguration.DEFAULT, incrementalRepairConfiguration); + + repairSchedulerImpl.close(); + verify(scheduleManager, times(2)).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + assertThat(repairSchedulerImpl.getCurrentRepairJobs()).isEmpty(); + + verifyNoMoreInteractions(ignoreStubs(myTableRepairMetrics)); + verifyNoMoreInteractions(scheduleManager); + } + + @Test + public void testScheduleChangesToIncremental() + { + RepairSchedulerImpl repairSchedulerImpl = defaultRepairSchedulerImplBuilder().withReplicationState(myReplicationState).build(); + repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, Collections.singleton(RepairConfiguration.DEFAULT)); + + // Should change to TableRepairJob.class when implemented + verify(scheduleManager, timeout(1000)).schedule(eq(mockNodeID), any(IncrementalRepairJob.class)); + verify(scheduleManager, never()).deschedule(eq(mockNodeID), any(ScheduledJob.class)); + + assertTableViewsExist(repairSchedulerImpl, TABLE_REFERENCE1, RepairConfiguration.DEFAULT); + + RepairConfiguration incrementalRepairConfiguration = RepairConfiguration.newBuilder().withRepairType( + RepairType.INCREMENTAL).build(); + repairSchedulerImpl.putConfigurations(mockNode, TABLE_REFERENCE1, Collections.singleton(incrementalRepairConfiguration)); + + verify(scheduleManager, timeout(1000).times(2)).schedule(eq(mockNodeID), any(IncrementalRepairJob.class)); + + assertTableViewsExist(repairSchedulerImpl, TABLE_REFERENCE1, incrementalRepairConfiguration); + + repairSchedulerImpl.close(); + verify(scheduleManager, times(2)).deschedule(eq(mockNodeID), any(IncrementalRepairJob.class)); + assertThat(repairSchedulerImpl.getCurrentRepairJobs()).isEmpty(); + + verifyNoMoreInteractions(ignoreStubs(myTableRepairMetrics)); + verifyNoMoreInteractions(scheduleManager); + } + + private void assertOneTableViewExist(RepairScheduler repairScheduler, TableReference tableReference, RepairConfiguration repairConfiguration) + { + List repairJobViews = repairScheduler.getCurrentRepairJobs(); + assertThat(repairJobViews).hasSize(1); + + ScheduledRepairJobView repairJobView = repairJobViews.get(0); + assertThat(repairJobView.getTableReference()).isEqualTo(tableReference); + assertThat(repairJobView.getRepairConfiguration()).isEqualTo(repairConfiguration); + } + + private void assertTableViewsExist(RepairScheduler repairScheduler, TableReference tableReference, RepairConfiguration ...repairConfigurations) + { + List repairJobViews = repairScheduler.getCurrentRepairJobs(); + + assertThat(repairJobViews).hasSize(repairConfigurations.length); + + int matches = 0; + for (RepairConfiguration repairConfiguration : repairConfigurations) + { + for (ScheduledRepairJobView repairJobView: repairJobViews) + { + assertThat(repairJobView.getTableReference()).isEqualTo(tableReference); + if (repairJobView.getRepairConfiguration().equals(repairConfiguration)) + { + matches++; + } + } + } + assertThat(matches).isEqualTo(repairJobViews.size()); + } + + private RepairSchedulerImpl.Builder defaultRepairSchedulerImplBuilder() + { + return RepairSchedulerImpl.builder() + .withJmxProxyFactory(jmxProxyFactory) + .withTableRepairMetrics(myTableRepairMetrics) + .withScheduleManager(scheduleManager) + .withCassandraMetrics(myCassandraMetrics); + } +} diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/TestScheduleManager.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/TestScheduleManager.java new file mode 100644 index 000000000..6fd5eb1cd --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/TestScheduleManager.java @@ -0,0 +1,342 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.scheduler; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.RunPolicy; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledJob; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledTask; +import java.util.*; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +@RunWith (MockitoJUnitRunner.Silent.class) +public class TestScheduleManager +{ + @Mock + private RunPolicy myRunPolicy; + + private ScheduleManagerImpl myScheduler; + + private final UUID nodeID1 = UUID.randomUUID(); + + private final UUID nodeID2 = UUID.randomUUID(); + + private final Collection myNodes = Arrays.asList(nodeID1, nodeID2); + + @Before + public void startup() + { + myScheduler = ScheduleManagerImpl.builder() + .withNodeIDList(myNodes) + .build(); + myScheduler.addRunPolicy(job -> myRunPolicy.validate(job)); + + when(myRunPolicy.validate(any(ScheduledJob.class))).thenReturn(-1L); + } + + @After + public void cleanup() + { + myScheduler.close(); + } + + @Test + public void testRunningOneJob() + { + DummyJob job1 = new DummyJob(ScheduledJob.Priority.LOW); + myScheduler.schedule(nodeID1, job1); + + myScheduler.run(nodeID1); + + assertThat(job1.hasRun()).isTrue(); + assertThat(myScheduler.getQueueSize(nodeID1)).isEqualTo(1); + } + + @Test + public void testRunningJobWithFailingRunPolicy() + { + DummyJob job1 = new DummyJob(ScheduledJob.Priority.LOW); + myScheduler.schedule(nodeID1, job1); + + when(myRunPolicy.validate(any(ScheduledJob.class))).thenReturn(1L); + + myScheduler.run(nodeID1); + + assertThat(job1.hasRun()).isFalse(); + assertThat(myScheduler.getQueueSize(nodeID1)).isEqualTo(1); + } + + @Test + public void testRunningTwoTasksStoppedAfterFirstByPolicy() + { + TestJob job1 = new TestJob(ScheduledJob.Priority.LOW, 2, () -> { + when(myRunPolicy.validate(any(ScheduledJob.class))).thenReturn(1L); + }); + myScheduler.schedule(nodeID1, job1); + + myScheduler.run(nodeID1); + + assertThat(job1.getTaskRuns()).isEqualTo(1); + assertThat(myScheduler.getQueueSize(nodeID1)).isEqualTo(1); + } + + @Test + public void testRunningJobWithThrowingRunPolicy() + { + DummyJob job1 = new DummyJob(ScheduledJob.Priority.LOW); + myScheduler.schedule(nodeID1, job1); + + when(myRunPolicy.validate(any(ScheduledJob.class))).thenThrow(new IllegalStateException()); + + myScheduler.run(nodeID1); + + assertThat(job1.hasRun()).isFalse(); + assertThat(myScheduler.getQueueSize(nodeID1)).isEqualTo(1); + } + + @Test + public void testTwoJobsRejected() + { + DummyJob job1 = new DummyJob(ScheduledJob.Priority.LOW); + DummyJob job2 = new DummyJob(ScheduledJob.Priority.LOW); + myScheduler.schedule(nodeID1, job1); + myScheduler.schedule(nodeID1, job2); + + when(myRunPolicy.validate(any(ScheduledJob.class))).thenReturn(1L); + + myScheduler.run(nodeID1); + + assertThat(job1.hasRun()).isFalse(); + assertThat(job2.hasRun()).isFalse(); + assertThat(myScheduler.getQueueSize(nodeID1)).isEqualTo(2); + verify(myRunPolicy, times(2)).validate(any(ScheduledJob.class)); + } + + @Test (timeout = 2000L) + public void testDescheduleRunningJob() throws InterruptedException + { + CountDownLatch jobCdl = new CountDownLatch(1); + TestJob job1 = new TestJob(ScheduledJob.Priority.HIGH, jobCdl); + myScheduler.schedule(nodeID1, job1); + + new Thread(() -> myScheduler.run(nodeID1)).start(); + + waitForJobStarted(job1); + myScheduler.deschedule(nodeID1, job1); + jobCdl.countDown(); + waitForJobFinished(job1); + + assertThat(job1.hasRun()).isTrue(); + assertThat(myScheduler.getQueueSize(nodeID1)).isEqualTo(0); + } + + @Test + public void testGetCurrentJobStatus() throws InterruptedException + { + CountDownLatch latch = new CountDownLatch(1); + UUID jobId = UUID.randomUUID(); + ScheduledJob job1 = new TestScheduledJob( + new ScheduledJob.ConfigurationBuilder() + .withPriority(ScheduledJob.Priority.LOW) + .withRunInterval(1, TimeUnit.SECONDS) + .build(), + jobId, + latch); + myScheduler.schedule(nodeID1, job1); + new Thread(() -> myScheduler.run(nodeID1)).start(); + Thread.sleep(50); + assertThat(myScheduler.getCurrentJobStatus()).isEqualTo("Job ID: " + jobId.toString() + ", Status: Running"); + latch.countDown(); + } + + @Test + public void testGetCurrentJobStatusNoRunning() throws InterruptedException + { + CountDownLatch latch = new CountDownLatch(1); + UUID jobId = UUID.randomUUID(); + ScheduledJob job1 = new TestScheduledJob( + new ScheduledJob.ConfigurationBuilder() + .withPriority(ScheduledJob.Priority.LOW) + .withRunInterval(1, TimeUnit.SECONDS) + .build(), + jobId, + latch); + myScheduler.schedule(nodeID1, job1); + new Thread(() -> myScheduler.run(nodeID1)).start(); + assertThat(myScheduler.getCurrentJobStatus()).isNotEqualTo("Job ID: " + jobId.toString() + ", Status: Running"); + latch.countDown(); + } + private void waitForJobStarted(TestJob job) throws InterruptedException + { + while(!job.hasStarted()) + { + Thread.sleep(10); + } + } + + private void waitForJobFinished(TestJob job) throws InterruptedException + { + while(!job.hasRun()) + { + Thread.sleep(10); + } + } + + private class TestJob extends ScheduledJob + { + private volatile CountDownLatch countDownLatch; + private volatile boolean hasRun = false; + private volatile boolean hasStarted = false; + private final AtomicInteger taskRuns = new AtomicInteger(); + private final int numTasks; + private final Runnable onCompletion; + + + public TestJob(Priority priority, CountDownLatch cdl) + { + this(priority, cdl, 1, () -> {}); + } + + public TestJob(Priority priority, int numTasks) + { + this(priority, numTasks, () -> {}); + } + + public TestJob(Priority priority, int numTasks, Runnable onCompletion) + { + super(new ConfigurationBuilder().withPriority(priority).withRunInterval(1, TimeUnit.SECONDS).build()); + this.numTasks = numTasks; + this.onCompletion = onCompletion; + } + + public TestJob(Priority priority, CountDownLatch cdl, int numTasks, Runnable onCompletion) + { + super(new ConfigurationBuilder().withPriority(priority).withRunInterval(1, TimeUnit.SECONDS).build()); + this.numTasks = numTasks; + this.onCompletion = onCompletion; + countDownLatch = cdl; + } + + public int getTaskRuns() + { + return taskRuns.get(); + } + + public boolean hasStarted() + { + return hasStarted; + } + + public boolean hasRun() + { + return hasRun; + } + + @Override + public Iterator iterator() + { + List tasks = new ArrayList<>(); + + for (int i = 0; i < numTasks; i++) + { + tasks.add(new ShortRunningTask(onCompletion)); + } + + return tasks.iterator(); + } + + private class ShortRunningTask extends ScheduledTask + { + private final Runnable onCompletion; + + public ShortRunningTask(Runnable onCompletion) + { + this.onCompletion = onCompletion; + } + + @Override + public boolean execute(UUID nodeID) + { + hasStarted = true; + try + { + if (countDownLatch != null) + { + countDownLatch.await(); + } + } + catch (InterruptedException e) + { + // Intentionally left empty + } + onCompletion.run(); + taskRuns.incrementAndGet(); + hasRun = true; + return true; + } + } + } + + public class TestScheduledJob extends ScheduledJob + { + private final CountDownLatch taskCompletionLatch; + public TestScheduledJob(Configuration configuration, UUID id, CountDownLatch taskCompletionLatch) + { + super(configuration, id); + this.taskCompletionLatch = taskCompletionLatch; + } + @Override + public Iterator iterator() + { + return Collections. singleton(new ControllableTask(taskCompletionLatch)).iterator(); + } + class ControllableTask extends ScheduledTask + { + private final CountDownLatch latch; + public ControllableTask(CountDownLatch latch) + { + this.latch = latch; + } + @Override + public boolean execute(UUID nodeID) + { + try + { + latch.await(); + return true; + } + catch (InterruptedException e) + { + Thread.currentThread().interrupt(); + return false; + } + } + } + } +} diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/TestScheduledJobQueue.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/TestScheduledJobQueue.java new file mode 100644 index 000000000..f41ab2c02 --- /dev/null +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/repair/scheduler/TestScheduledJobQueue.java @@ -0,0 +1,192 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.impl.repair.scheduler; + +import static org.assertj.core.api.Assertions.assertThat; + +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledJob; +import com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler.ScheduledTask; +import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.ScheduledJobException; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.Iterator; +import java.util.concurrent.TimeUnit; + +import org.junit.Before; +import org.junit.Test; + +public class TestScheduledJobQueue +{ + private ScheduledJobQueue queue; + + @Before + public void setup() + { + queue = new ScheduledJobQueue(new Comp()); + } + + @Test + public void testInsertRemoveOne() + { + DummyJob job = new DummyJob(ScheduledJob.Priority.LOW); + + queue.add(job); + + assertThat(queue.iterator()).toIterable().containsExactly(job); + } + + @Test + public void testInsertDifferentPrio() + { + DummyJob job = new DummyJob(ScheduledJob.Priority.LOW); + DummyJob job2 = new DummyJob(ScheduledJob.Priority.HIGH); + + queue.add(job); + queue.add(job2); + + assertThat(queue.iterator()).toIterable().containsExactly(job2, job); + } + + @Test + public void testEmptyQueue() + { + assertThat(queue.iterator()).toIterable().isEmpty(); + } + + @Test + public void testNonRunnableQueueIsEmpty() throws ScheduledJobException + { + final int nJobs = 10; + + for (int i = 0; i < nJobs; i++) + { + queue.add(new RunnableOnce(ScheduledJob.Priority.LOW)); + } + + for (ScheduledJob job : queue) + { + job.postExecute(true); + } + + assertThat(queue.iterator()).toIterable().isEmpty(); + } + + @Test + public void testRemoveJobInQueueIsPossible() + { + DummyJob job = new DummyJob(ScheduledJob.Priority.HIGH); + DummyJob job2 = new DummyJob(ScheduledJob.Priority.LOW); + + queue.add(job); + queue.add(job2); + + Iterator iterator = queue.iterator(); + + queue.remove(job2); + + assertThat(iterator).toIterable().containsExactly(job, job2); + assertThat(queue.iterator()).toIterable().containsExactly(job); + } + + @Test + public void testRunOnceJobRemovedOnFinish() + { + StateJob job = new StateJob(ScheduledJob.Priority.LOW, ScheduledJob.State.FINISHED); + StateJob job2 = new StateJob(ScheduledJob.Priority.LOW, ScheduledJob.State.RUNNABLE); + + queue.add(job); + queue.add(job2); + + for (ScheduledJob next : queue) + { + assertThat(next.getState()).isEqualTo(ScheduledJob.State.RUNNABLE); + } + + assertThat(queue.size()).isEqualTo(1); + assertThat(queue.iterator()).toIterable().containsExactly(job2); + } + + @Test + public void testRunOnceJobRemovedOnFailure() + { + StateJob job = new StateJob(ScheduledJob.Priority.LOW, ScheduledJob.State.FAILED); + StateJob job2 = new StateJob(ScheduledJob.Priority.LOW, ScheduledJob.State.RUNNABLE); + + queue.add(job); + queue.add(job2); + + for (ScheduledJob next : queue) + { + assertThat(next.getState()).isEqualTo(ScheduledJob.State.RUNNABLE); + } + + assertThat(queue.size()).isEqualTo(1); + assertThat(queue.iterator()).toIterable().containsExactly(job2); + } + + private class Comp implements Comparator + { + + @Override + public int compare(ScheduledJob j1, ScheduledJob j2) + { + int ret = Integer.compare(j2.getRealPriority(), j1.getRealPriority()); + + if (ret == 0) + { + ret = Integer.compare(j2.getPriority().getValue(), j1.getPriority().getValue()); + } + + return ret; + } + + } + + private class RunnableOnce extends ScheduledJob + { + public RunnableOnce(Priority prio) + { + super(new ConfigurationBuilder().withPriority(prio).withRunInterval(1, TimeUnit.DAYS).build()); + } + + @Override + public Iterator iterator() + { + return new ArrayList().iterator(); + } + + @Override + public String toString() + { + return "RunnableOnce " + getPriority(); + } + } + + private class StateJob extends DummyJob + { + private State state; + StateJob(Priority priority, State state) + { + super(priority); + this.state = state; + } + + @Override + public State getState() + { + return state; + } + } +} diff --git a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/state/TestReplicationStateImpl.java b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/state/TestReplicationStateImpl.java index 4fbe4cd1c..8d227f329 100644 --- a/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/state/TestReplicationStateImpl.java +++ b/core.impl/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/impl/state/TestReplicationStateImpl.java @@ -19,6 +19,7 @@ import com.datastax.oss.driver.api.core.metadata.Node; import com.datastax.oss.driver.api.core.metadata.TokenMap; import com.datastax.oss.driver.api.core.metadata.token.TokenRange; +import com.ericsson.bss.cassandra.ecchronos.core.impl.repair.state.ReplicationStateImpl; import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; import com.ericsson.bss.cassandra.ecchronos.core.metadata.NodeResolver; import com.ericsson.bss.cassandra.ecchronos.core.state.LongTokenRange; diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/config/RepairConfiguration.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/config/RepairConfiguration.java index 37ad9d171..ab638974a 100644 --- a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/config/RepairConfiguration.java +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/config/RepairConfiguration.java @@ -316,6 +316,40 @@ public Builder withTargetRepairSizeInBytes(final long targetRepairSizeInBytes) return this; } + /** + * Set the time used to send a warning alarm that repair has not been running correctly. + * + * Normally this warning would be sent before gc_grace_seconds has passed to notify the + * user that some action might need to be taken to continue. + * + * @param repairWarningTime The time to use + * @param timeUnit The time unit + * @return The builder + * @see #withRepairErrorTime(long, TimeUnit) + */ + public Builder withRepairWarningTime(final long repairWarningTime, final TimeUnit timeUnit) + { + myRepairWarningTimeInMs = timeUnit.toMillis(repairWarningTime); + return this; + } + + /** + * Set the time used to send an error alarm that repair has not been running correctly. + * + * Normally this error would be sent after gc_grace_seconds has passed to notify the + * user. + * + * @param repairErrorTime The time to use + * @param timeUnit The time unit + * @return The builder + * @see #withRepairWarningTime(long, TimeUnit) + */ + public Builder withRepairErrorTime(final long repairErrorTime, final TimeUnit timeUnit) + { + myRepairErrorTimeInMs = timeUnit.toMillis(repairErrorTime); + return this; + } + /** * Build with ignore TWCS tables. * diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/package-info.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/package-info.java index 043ff631e..d3e5770af 100644 --- a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/package-info.java +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/package-info.java @@ -13,6 +13,6 @@ * limitations under the License. */ /** - * Contains the implementations and resources for repair operations. + * Contains the interfaces and resources for repair operations. */ package com.ericsson.bss.cassandra.ecchronos.core.repair; diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/RepairScheduler.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/RepairScheduler.java new file mode 100644 index 000000000..aa4eeec18 --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/RepairScheduler.java @@ -0,0 +1,63 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler; + +import com.datastax.oss.driver.api.core.metadata.Node; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import java.util.List; +import java.util.Set; + +/** + * A factory which takes repair configuration and schedules tables for repair based on the provided configuration. + * + * It is the responsibility of the configuration provider to remove the configuration. + */ +public interface RepairScheduler +{ + /** + * Create or update repair configurations for the specified table. + * + * @param node The node to put configurations + * @param tableReference The table to put configurations for. + * @param repairConfigurations The new or updated repair configurations. + */ + void putConfigurations(Node node, TableReference tableReference, Set repairConfigurations); + + /** + * Remove repair configuration for the specified table which effectively should remove the schedule. + * + * @param node The node to remove configurations + * @param tableReference The table to remove configuration for. + */ + void removeConfiguration(Node node, TableReference tableReference); + + /** + * @return the list of the currently scheduled repair jobs. + */ + List getCurrentRepairJobs(); + + /** + * Retrieves the current status of the job being managed by this scheduler. + *

+ * It's intended for monitoring and logging purposes, allowing users to query the job's current state + * without affecting its execution. + * + * @return A {@code String} representing the current status of the job. + */ + String getCurrentJobStatus(); +} + + diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/RunPolicy.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/RunPolicy.java new file mode 100644 index 000000000..295fc6481 --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/RunPolicy.java @@ -0,0 +1,27 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler; + +public interface RunPolicy +{ + /** + * Validate if the job is runnable or how long it should wait until it is tried again. + * + * @param job + * The job that wants to execute. + * @return The time until the job should be tried again in milliseconds or -1 if the job can run now. + */ + long validate(ScheduledJob job); +} diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduleManager.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduleManager.java new file mode 100644 index 000000000..7a6068c9c --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduleManager.java @@ -0,0 +1,46 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler; + +import java.util.UUID; + +public interface ScheduleManager +{ + /** + * Schedule the provided job for running. + * + * @param job + * The job to schedule. + */ + void schedule(UUID nodeID, ScheduledJob job); + + /** + * Remove the provided job from the scheduling. + * + * @param job + * The job to deschedule. + */ + void deschedule(UUID nodeID, ScheduledJob job); + + /** + * Retrieves the current status of the job being managed by this scheduler. + *

+ * It's intended for monitoring and logging purposes, allowing users to query the job's current state + * without affecting its execution. + * + * @return A {@code String} representing the current status of the job. + */ + String getCurrentJobStatus(); +} diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduledJob.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduledJob.java new file mode 100644 index 000000000..d0d1abafb --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduledJob.java @@ -0,0 +1,378 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler; + +import java.util.Objects; +import java.util.UUID; +import java.util.concurrent.TimeUnit; + +/** + * A scheduled job that should be managed by the {@link ScheduleManager}. + */ +@SuppressWarnings("VisibilityModifier") +public abstract class ScheduledJob implements Iterable +{ + private static final long DEFAULT_BACKOFF_IN_MINUTES = 30; + + private final Priority myPriority; + private final long myBackoffInMs; + protected final long myRunIntervalInMs; + + protected volatile long myLastSuccessfulRun = -1L; + private volatile long myNextRunTimeInMs = -1L; + private volatile long myRunOffset = 0; + private final UUID myId; + private final TimeUnit myPriorityGranularity; + + public ScheduledJob(final Configuration configuration) + { + this(configuration, UUID.randomUUID()); + } + + public ScheduledJob(final Configuration configuration, final UUID id) + { + myId = id; + myPriority = configuration.priority; + myRunIntervalInMs = configuration.runIntervalInMs; + myBackoffInMs = configuration.backoffInMs; + myLastSuccessfulRun = System.currentTimeMillis() - myRunIntervalInMs; + myPriorityGranularity = configuration.priorityGranularity; + } + + /** + * This method gets run after the execution of one task has completed. + *

+ * When overriding this method make sure to call super.postExecute(success, task) in the end. + * + * @param successful + * If the job ran successfully. + */ + public void postExecute(final boolean successful) + { + if (successful) + { + myLastSuccessfulRun = System.currentTimeMillis(); + myNextRunTimeInMs = -1L; + } + else + { + myNextRunTimeInMs = System.currentTimeMillis() + myBackoffInMs; + } + } + + /** + * This method gets run after the job is removed from the Queue. It will run whether the job fails or succeeds. + */ + public void finishJob() + { + // Do nothing + } + + /** + * This method is called every time the scheduler creates a list of jobs to run. + * Use this if you need to do some updates before priority is calculated. + * Default is noop. + */ + public void refreshState() + { + // NOOP by default + } + + /** + * Set the job to be runnable again after the given delay has elapsed. + * + * @param delay + * The delay in milliseconds to wait until the job is runnable again. + */ + public final void setRunnableIn(final long delay) + { + myNextRunTimeInMs = System.currentTimeMillis() + delay; + } + + /** + * Check if this job is runnable now. + * + * @return True if able to run now. + */ + public boolean runnable() + { + return myNextRunTimeInMs <= System.currentTimeMillis() && getRealPriority() > -1; + } + + /** + * Get current State of the job. + * + * @return current State + */ + public State getState() + { + if (runnable()) + { + return State.RUNNABLE; + } + return State.PARKED; + } + + /** + * Get the unix timestamp of the last time this job was run. + * + * @return The last time the job ran successfully. + */ + public long getLastSuccessfulRun() + { + return myLastSuccessfulRun; + } + + /** + * Get the configured priority of this job. + * + * @return The priority of this job. + * @see #getRealPriority() + */ + public Priority getPriority() + { + return myPriority; + } + + /** + * Get the current priority of the job. + *

+ * The current priority is calculated as the {@link #getPriority() configured priority} times the number of hours + * that has passed since it *could* start running. + * + * @return The current priority or -1 if the job shouldn't run now. + * @see #getPriority() + */ + public int getRealPriority() + { + return getRealPriority(getLastSuccessfulRun()); + } + + public final int getRealPriority(final long lastSuccessfulRun) + { + long now = System.currentTimeMillis(); + + long diff = now - (lastSuccessfulRun + myRunIntervalInMs - getRunOffset()); + + if (diff < 0) + { + return -1; + } + + long granularityInMs = myPriorityGranularity.toMillis(1); + long unitsPassed = diff / granularityInMs + 1; + + // Overflow protection + if (unitsPassed > Integer.MAX_VALUE / myPriority.getValue()) + { + return Integer.MAX_VALUE; + } + + return (int) unitsPassed * myPriority.getValue(); + } + + /** + * @return The offset for the job. + */ + public long getRunOffset() + { + return myRunOffset; + } + + /** + * @return unique identifier for Job + */ + public final UUID getId() + { + return myId; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(final Object o) + { + if (this == o) + { + return true; + } + if (o == null || getClass() != o.getClass()) + { + return false; + } + ScheduledJob that = (ScheduledJob) o; + return myBackoffInMs == that.myBackoffInMs + && myRunIntervalInMs == that.myRunIntervalInMs + && myLastSuccessfulRun == that.myLastSuccessfulRun + && myNextRunTimeInMs == that.myNextRunTimeInMs + && myRunOffset == that.myRunOffset + && myPriority == that.myPriority + && Objects.equals(myId, that.myId) + && Objects.equals(myPriorityGranularity, that.myPriorityGranularity); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return Objects.hash(myPriority, myBackoffInMs, myRunIntervalInMs, myLastSuccessfulRun, + myNextRunTimeInMs, myRunOffset, myId, myPriorityGranularity); + } + + /** + * The different priorities a job can have. + *

+ * The higher the value a job has the more the {@link ScheduledJob#getRealPriority() current priority} is increased + * each hour. + */ + public enum Priority + { + /** + * Low priority, steps the current priority by 1 each hour. + */ + LOW(1), + + /** + * Medium priority, steps the current priority by 2 each hour. + */ + MEDIUM(2), + + /** + * High priority, steps the current priority by 3 each hour. + */ + HIGH(3), + + /** + * Highest priority, steps the current priority by 100 each hour. + *

+ * Should be used later on for user defined operations. + */ + HIGHEST(100); + + private final int value; + + Priority(final int aValue) + { + this.value = aValue; + } + + public int getValue() + { + return value; + } + } + + public enum State + { + /** + * Job is pending to be run. + */ + RUNNABLE, + + /** + * Job is finished and can be discarded. + */ + FINISHED, + + /** + * The Job cannot be run currently. + */ + PARKED, + + /** + * The Job has failed and can be discarded. + */ + FAILED + } + + /** + * The configuration of a job. + */ + @SuppressWarnings("PMD.DataClass") + public static class Configuration + { + /** + * The priority of the job. + */ + public final Priority priority; + + /** + * The minimum amount of time to wait between each successful run. + */ + public final long runIntervalInMs; + + /** + * The amount of time to wait before marking job as runnable after failing. + */ + public final long backoffInMs; + + /** + * The unit of time granularity used for priority calculation in scheduling jobs. + */ + public final TimeUnit priorityGranularity; + + Configuration(final ConfigurationBuilder builder) + { + priority = builder.priority; + runIntervalInMs = builder.runIntervalInMs; + backoffInMs = builder.backoffInMs; + priorityGranularity = builder.granularityUnit; + } + } + + /** + * Builder class for the {@link Configuration}. + */ + public static class ConfigurationBuilder + { + private Priority priority = Priority.LOW; + private long runIntervalInMs = TimeUnit.DAYS.toMillis(1); + private long backoffInMs = TimeUnit.MINUTES.toMillis(DEFAULT_BACKOFF_IN_MINUTES); + private TimeUnit granularityUnit = TimeUnit.HOURS; + + public final ConfigurationBuilder withPriorityGranularity(final TimeUnit granularityTimeUnit) + { + this.granularityUnit = granularityTimeUnit; + return this; + } + + public final ConfigurationBuilder withPriority(final Priority aPriority) + { + this.priority = aPriority; + return this; + } + + public final ConfigurationBuilder withRunInterval(final long runInterval, final TimeUnit unit) + { + this.runIntervalInMs = unit.toMillis(runInterval); + return this; + } + + public final ConfigurationBuilder withBackoff(final long backoff, final TimeUnit unit) + { + this.backoffInMs = unit.toMillis(backoff); + return this; + } + + public final Configuration build() + { + return new Configuration(this); + } + } +} + diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduledRepairJob.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduledRepairJob.java new file mode 100644 index 000000000..1ac5a0b7d --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduledRepairJob.java @@ -0,0 +1,139 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler; + +import com.ericsson.bss.cassandra.ecchronos.core.jmx.DistributedJmxProxyFactory; +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairMetrics; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableRepairPolicy; +import com.google.common.base.Preconditions; +import java.util.List; +import java.util.Objects; +import java.util.UUID; + +public abstract class ScheduledRepairJob extends ScheduledJob +{ + private final TableReference myTableReference; + private final DistributedJmxProxyFactory myJmxProxyFactory; + private final RepairConfiguration myRepairConfiguration; + private final List myRepairPolicies; + private final TableRepairMetrics myTableRepairMetrics; + + public ScheduledRepairJob( + final Configuration configuration, + final TableReference tableReference, + final DistributedJmxProxyFactory jmxProxyFactory, + final RepairConfiguration repairConfiguration, + final List repairPolicies, + final TableRepairMetrics tableRepairMetrics) + { + super(configuration); + myTableReference = Preconditions.checkNotNull(tableReference, "Table reference must be set"); + myJmxProxyFactory = Preconditions.checkNotNull(jmxProxyFactory, "JMX proxy factory must be set"); + myRepairConfiguration = Preconditions.checkNotNull(repairConfiguration, "Repair configuration must be set"); + myRepairPolicies = Preconditions.checkNotNull(repairPolicies, "Repair policies must be set"); + myTableRepairMetrics = Preconditions.checkNotNull(tableRepairMetrics, "Table repair metrics must be set"); + } + + public ScheduledRepairJob( + final Configuration configuration, + final UUID id, + final TableReference tableReference, + final DistributedJmxProxyFactory jmxProxyFactory, + final RepairConfiguration repairConfiguration, + final List repairPolicies, + final TableRepairMetrics tableRepairMetrics) + { + super(configuration, id); + myTableReference = Preconditions.checkNotNull(tableReference, "Table reference must be set"); + myJmxProxyFactory = Preconditions.checkNotNull(jmxProxyFactory, "JMX proxy factory must be set"); + myRepairConfiguration = Preconditions.checkNotNull(repairConfiguration, "Repair configuration must be set"); + myRepairPolicies = Preconditions.checkNotNull(repairPolicies, "Repair policies must be set"); + myTableRepairMetrics = Preconditions.checkNotNull(tableRepairMetrics, "Table repair metrics must be set"); + } + + /** + * Get the table reference for this job. + * @return Table reference + */ + public TableReference getTableReference() + { + return myTableReference; + } + + protected final DistributedJmxProxyFactory getJmxProxyFactory() + { + return myJmxProxyFactory; + } + + public abstract ScheduledRepairJobView getView(); + + /** + * Get the repair configuration for this job. + * @return Repair configuration + */ + public RepairConfiguration getRepairConfiguration() + { + return myRepairConfiguration; + } + + protected final List getRepairPolicies() + { + return myRepairPolicies; + } + + protected final TableRepairMetrics getTableRepairMetrics() + { + return myTableRepairMetrics; + } + + /** + * {@inheritDoc} + */ + @Override + public boolean equals(final Object o) + { + if (this == o) + { + return true; + } + if (o == null || getClass() != o.getClass()) + { + return false; + } + if (!super.equals(o)) + { + return false; + } + ScheduledRepairJob that = (ScheduledRepairJob) o; + return Objects.equals(myTableReference, that.myTableReference) && Objects.equals( + myJmxProxyFactory, that.myJmxProxyFactory) && Objects.equals(myRepairConfiguration, + that.myRepairConfiguration) && Objects.equals( + myRepairPolicies, that.myRepairPolicies) && Objects.equals(myTableRepairMetrics, + that.myTableRepairMetrics); + } + + /** + * {@inheritDoc} + */ + @Override + public int hashCode() + { + return Objects.hash(super.hashCode(), myTableReference, myJmxProxyFactory, myRepairConfiguration, + myRepairPolicies, myTableRepairMetrics); + } +} + diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduledRepairJobView.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduledRepairJobView.java new file mode 100644 index 000000000..9e0a2512f --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduledRepairJobView.java @@ -0,0 +1,200 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler; + +import com.ericsson.bss.cassandra.ecchronos.core.repair.config.RepairConfiguration; +import com.ericsson.bss.cassandra.ecchronos.core.state.RepairStateSnapshot; +import com.ericsson.bss.cassandra.ecchronos.core.table.TableReference; +import com.ericsson.bss.cassandra.ecchronos.utils.enums.repair.RepairType; +import java.util.Objects; +import java.util.UUID; + +public class ScheduledRepairJobView +{ + public enum Status + { + COMPLETED, ON_TIME, LATE, OVERDUE, BLOCKED + } + + private final UUID myId; + private final TableReference myTableReference; + private final RepairConfiguration myRepairConfiguration; + private RepairStateSnapshot myRepairStateSnapshot; + private final Status myStatus; + private final double myProgress; + private final long myNextRepair; + private final long myCompletionTime; + private final RepairType myRepairType; + + public ScheduledRepairJobView(final UUID id, final TableReference tableReference, + final RepairConfiguration repairConfiguration, final Status status, final double progress, + final long nextRepair, final long completionTime, final RepairType repairType) + { + myId = id; + myTableReference = tableReference; + myRepairConfiguration = repairConfiguration; + myStatus = status; + myProgress = progress; + myNextRepair = nextRepair; + myCompletionTime = completionTime; + myRepairType = repairType; + } + + public ScheduledRepairJobView(final UUID id, final TableReference tableReference, + final RepairConfiguration repairConfiguration, final RepairStateSnapshot repairStateSnapshot, + final Status status, final double progress, final long nextRepair, + final RepairType repairType) + { + myId = id; + myTableReference = tableReference; + myRepairConfiguration = repairConfiguration; + myRepairStateSnapshot = repairStateSnapshot; + myStatus = status; + myProgress = progress; + myNextRepair = nextRepair; + myCompletionTime = repairStateSnapshot.lastCompletedAt(); + myRepairType = repairType; + } + + /** + * Get id. + * + * @return UUID + */ + public UUID getId() + { + return myId; + } + + /** + * Get table reference. + * + * @return TableReference + */ + public TableReference getTableReference() + { + return myTableReference; + } + + /** + * Get repair configuration. + * + * @return RepairConfiguration + */ + public RepairConfiguration getRepairConfiguration() + { + return myRepairConfiguration; + } + + /** + * Get repair snapshot. + * + * @return RepairStateSnapshot + */ + public RepairStateSnapshot getRepairStateSnapshot() + { + return myRepairStateSnapshot; + } + + /** + * Get status. + * + * @return Status + */ + public Status getStatus() + { + return myStatus; + } + + /** + * Get progress. + * + * @return double + */ + public double getProgress() + { + return myProgress; + } + + /** + * Get next repair. + * + * @return long + */ + public long getNextRepair() + { + return myNextRepair; + } + + /** + * Get completion time. + * + * @return long + */ + public long getCompletionTime() + { + return myCompletionTime; + } + + /** + * Get repair type. + * + * @return RepairType + */ + public RepairType getRepairType() + { + return myRepairType; + } + + /** + * Equality (completion time is not considered). + * + * @param o The object to compare to. + * @return boolean + */ + @Override + public boolean equals(final Object o) + { + if (this == o) + { + return true; + } + else if (o == null || getClass() != o.getClass()) + { + return false; + } + ScheduledRepairJobView that = (ScheduledRepairJobView) o; + return Double.compare(that.myProgress, myProgress) == 0 + && myNextRepair == that.myNextRepair + && Objects.equals(myId, that.myId) + && Objects.equals(myTableReference, that.myTableReference) + && Objects.equals(myRepairConfiguration, that.myRepairConfiguration) + && Objects.equals(myRepairStateSnapshot, that.myRepairStateSnapshot) + && Objects.equals(myStatus, that.myStatus) + && Objects.equals(myRepairType, that.myRepairType); + } + + /** + * Hash representation. + * + * @return int + */ + @Override + public int hashCode() + { + return Objects.hash(myId, myTableReference, myRepairConfiguration, myRepairStateSnapshot, myStatus, myProgress, + myNextRepair, myRepairType); + } +} diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduledTask.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduledTask.java new file mode 100644 index 000000000..1070cb03c --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/ScheduledTask.java @@ -0,0 +1,57 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler; + +import com.ericsson.bss.cassandra.ecchronos.utils.exceptions.ScheduledJobException; +import java.util.UUID; + +@SuppressWarnings("VisibilityModifier") +public abstract class ScheduledTask +{ + protected final int myPriority; + + protected ScheduledTask() + { + this(1); + } + + protected ScheduledTask(final int priority) + { + myPriority = priority; + } + + public final boolean preValidate() + { + return true; + } + + /** + * Run the task. + * + * @return True if the task was executed successfully. + * @throws ScheduledJobException + * if anything went wrong during running. + */ + public abstract boolean execute(UUID nodeID) throws ScheduledJobException; + + /** + * Cleanup of the task that should be run after the task has been executed. + */ + public void cleanup() + { + // Let subclasses override + } +} + diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/package-info.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/package-info.java new file mode 100644 index 000000000..f0317466c --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/repair/scheduler/package-info.java @@ -0,0 +1,18 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/** + * Contains the interfaces and resources for repair scheduler. + */ +package com.ericsson.bss.cassandra.ecchronos.core.repair.scheduler; diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairStateSnapshot.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairStateSnapshot.java new file mode 100644 index 000000000..05fa25c67 --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/RepairStateSnapshot.java @@ -0,0 +1,187 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * An immutable copy of the repair state. + * The repair state describes the current state of repairs for a table. + *

    + *
  • When the table was last repaired - {@link #lastCompletedAt()}
  • + *
  • The next repair(s) to run - {@link #getRepairGroups()}
  • + *
  • The vnodes for the table and when they were last repaired - {@link #getVnodeRepairStates()}
  • + *
  • If there is a repair available - {@link #canRepair()}
  • + *
+ */ +public final class RepairStateSnapshot +{ + private final boolean canRepair; + private final long myLastCompletedAt; + private final long myCreatedAt; + private final ImmutableList myReplicaRepairGroup; + private final VnodeRepairStates myVnodeRepairStates; + private final long myEstimatedRepairTime; + + private RepairStateSnapshot(final Builder builder) + { + myLastCompletedAt = builder.myLastCompletedAt; + myCreatedAt = builder.myCreatedAt; + myReplicaRepairGroup = builder.myReplicaRepairGroup; + myVnodeRepairStates = builder.myVnodeRepairStates; + myEstimatedRepairTime = VnodeRepairStateUtils.getRepairTime(myVnodeRepairStates.getVnodeRepairStates()); + canRepair = !myReplicaRepairGroup.isEmpty(); + } + + public long getRemainingRepairTime(final long now, final long repairIntervalMs) + { + return VnodeRepairStateUtils.getRemainingRepairTime(myVnodeRepairStates.getVnodeRepairStates(), + repairIntervalMs, now, myEstimatedRepairTime); + } + + /** + * Get the time this snapshot was created. + * @return The time this snapshot was created. + */ + public long getCreatedAt() + { + return myCreatedAt; + } + + /** + * Check if a repair can be performed based on the current state. + * + * @return True if repair can run. + */ + public boolean canRepair() + { + return canRepair; + } + + /** + * Get the time of the last successful repair of the table. + * + * @return The time the table was last repaired or -1 if no information is available. + */ + public long lastCompletedAt() + { + return myLastCompletedAt; + } + + public long getEstimatedRepairTime() + { + return myEstimatedRepairTime; + } + + /** + * Information needed to run the next repair(s). + * + * @return The next repair(s) or an empty list if none can be run. + */ + public List getRepairGroups() + { + return myReplicaRepairGroup; + } + + public VnodeRepairStates getVnodeRepairStates() + { + return myVnodeRepairStates; + } + + @Override + public String toString() + { + return "RepairStateSnapshot{" + + "canRepair=" + canRepair + + ", myLastCompletedAt=" + myLastCompletedAt + + ", myReplicaRepairGroup=" + myReplicaRepairGroup + + ", myEstimatedRepairTime=" + myEstimatedRepairTime + + '}'; + } + + public static Builder newBuilder() + { + return new Builder(); + } + + public static class Builder + { + private Long myLastCompletedAt; + private long myCreatedAt = System.currentTimeMillis(); + private ImmutableList myReplicaRepairGroup; + private VnodeRepairStates myVnodeRepairStates; + + /** + * Build repair state snapshot with last completed at. + * + * @param lastCompletedAt Time stamp of last completion. + * @return Builder + */ + public Builder withLastCompletedAt(final long lastCompletedAt) + { + myLastCompletedAt = lastCompletedAt; + return this; + } + + /** + * Build repair state snapshot with replica repair groups. + * + * @param replicaRepairGroup The repair replica group. + * @return Builder + */ + public Builder withReplicaRepairGroups(final List replicaRepairGroup) + { + myReplicaRepairGroup = ImmutableList.copyOf(replicaRepairGroup); + return this; + } + + /** + * Build repair state snapshot with vNode repair state. + * + * @param vnodeRepairStates The vnode repair states. + * @return Builder + */ + public Builder withVnodeRepairStates(final VnodeRepairStates vnodeRepairStates) + { + myVnodeRepairStates = vnodeRepairStates; + return this; + } + + /** + * Build repair state snapshot with created at timestamp. + * + * @param createdAt The created at timestamp. + * @return Builder + */ + public Builder withCreatedAt(final long createdAt) + { + myCreatedAt = createdAt; + return this; + } + + /** + * Build repair state snapshot. + * + * @return RepairStateSnapshot + */ + public RepairStateSnapshot build() + { + return new RepairStateSnapshot(this); + } + } +} + diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/ReplicaRepairGroup.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/ReplicaRepairGroup.java new file mode 100644 index 000000000..14e2806cc --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/ReplicaRepairGroup.java @@ -0,0 +1,102 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; + +import java.util.Iterator; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * A group of replicas and ranges that should be repaired together. + */ +public class ReplicaRepairGroup implements Iterable +{ + private final ImmutableSet myReplicas; + private final ImmutableList myVnodes; + private final long myLastCompletedAt; + + /** + * Constructor. + * + * @param replicas The nodes. + * @param vnodes The token ranges. + * @param lastCompletedAt last repair completed + */ + public ReplicaRepairGroup(final ImmutableSet replicas, final ImmutableList vnodes, + final long lastCompletedAt) + { + myReplicas = replicas; + myVnodes = vnodes; + myLastCompletedAt = lastCompletedAt; + } + + /** + * Get replicas. + * + * @return Replicas + */ + public Set getReplicas() + { + return myReplicas; + } + + /** + * Get datacenters. + * + * @return Datacenters + */ + public Set getDataCenters() + { + return myReplicas.stream().map(DriverNode::getDatacenter).collect(Collectors.toSet()); + } + + /** + * Get last completed at. + * + * @return Last completed at for this repair group. + */ + public long getLastCompletedAt() + { + return myLastCompletedAt; + } + + /** + * Iterate. + * + * @return Token range iterator + */ + @Override + public Iterator iterator() + { + return myVnodes.iterator(); + } + + /** + * String representation. + * + * @return String + */ + @Override + public String toString() + { + return String.format("(replicas=%s,vnodes=%s)", myReplicas, myVnodes); + } +} + + diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/VnodeRepairState.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/VnodeRepairState.java new file mode 100644 index 000000000..f86baac85 --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/VnodeRepairState.java @@ -0,0 +1,224 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.google.common.collect.ImmutableSet; + +import java.util.Objects; + +/** + * A class representing the repair state of a single vnode. + */ +public class VnodeRepairState +{ + public static final long UNREPAIRED = -1L; + + private final LongTokenRange myTokenRange; + private final ImmutableSet myReplicas; + private final long myStartedAt; + private final long myFinishedAt; + private final long myRepairTime; + + /** + * Constructor. + * + * @param tokenRange The token range. + * @param replicas The nodes. + * @param startedAt Started at timetamp. + */ + public VnodeRepairState(final LongTokenRange tokenRange, + final ImmutableSet replicas, + final long startedAt) + { + this(tokenRange, replicas, startedAt, UNREPAIRED); + } + + /** + * Constructor. + * + * @param tokenRange The token range. + * @param replicas The nodes. + * @param startedAt Started at timestamp. + * @param finishedAt Finished at timestamp. + * @param repairTime Repair time. + */ + public VnodeRepairState(final LongTokenRange tokenRange, + final ImmutableSet replicas, + final long startedAt, + final long finishedAt, + final long repairTime) + { + myTokenRange = tokenRange; + myReplicas = replicas; + myStartedAt = startedAt; + myFinishedAt = finishedAt; + myRepairTime = repairTime; + } + + /** + * Constructor. + * + * @param tokenRange The token range. + * @param replicas The nodes. + * @param startedAt Started at timestamp. + * @param finishedAt Finished at timestamp. + */ + public VnodeRepairState(final LongTokenRange tokenRange, + final ImmutableSet replicas, + final long startedAt, + final long finishedAt) + { + myTokenRange = tokenRange; + myReplicas = replicas; + myStartedAt = startedAt; + myFinishedAt = finishedAt; + if (myFinishedAt != UNREPAIRED) + { + myRepairTime = myFinishedAt - myStartedAt; + } + else + { + myRepairTime = 0; + } + + } + + /** + * Get token range. + * + * @return LongTokenRange + */ + public LongTokenRange getTokenRange() + { + return myTokenRange; + } + + /** + * Get replicas. + * + * @return The nodes + */ + public ImmutableSet getReplicas() + { + return myReplicas; + } + + /** + * Get last repaired at. + * + * @return long + */ + public long lastRepairedAt() + { + return myStartedAt; + } + + /** + * Get finished at. + * + * @return long + */ + public long getFinishedAt() + { + return myFinishedAt; + } + + /** + * Get started at. + * + * @return long + */ + public long getStartedAt() + { + return myStartedAt; + } + + /** + * Get repair time. + * + * @return long + */ + public long getRepairTime() + { + return myRepairTime; + } + + /** + * Check if the vnodes are the same. + * + * The vnodes are the same if both token range and replicas match. + * + * @param other The vnode to compare to. + * @return True if it represents the same vnode. + */ + public boolean isSameVnode(final VnodeRepairState other) + { + return getTokenRange().equals(other.getTokenRange()) && getReplicas().equals(other.getReplicas()); + } + + /** + * Returns a string representation. + * + * @return String + */ + @Override + public String toString() + { + return "VnodeRepairState{" + + "myTokenRange=" + myTokenRange + + ", myReplicas=" + myReplicas + + ", myStartedAt=" + myStartedAt + + ", myFinishedAt=" + myFinishedAt + + ", myRepairTime=" + myRepairTime + + '}'; + } + + /** + * Checks equality. + * + * @param o Object to compare to. + * @return boolean + */ + @Override + public boolean equals(final Object o) + { + if (this == o) + { + return true; + } + if (o == null || getClass() != o.getClass()) + { + return false; + } + VnodeRepairState that = (VnodeRepairState) o; + return myStartedAt == that.myStartedAt + && myFinishedAt == that.myFinishedAt + && myRepairTime == that.myRepairTime + && Objects.equals(myTokenRange, that.myTokenRange) + && Objects.equals(myReplicas, that.myReplicas); + } + + /** + * Return a hash representation. + * + * @return int + */ + @Override + public int hashCode() + { + return Objects.hash(myTokenRange, myReplicas, myStartedAt, myFinishedAt, myRepairTime); + } +} diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/VnodeRepairStateUtils.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/VnodeRepairStateUtils.java new file mode 100644 index 000000000..aca87d24b --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/VnodeRepairStateUtils.java @@ -0,0 +1,100 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +public final class VnodeRepairStateUtils +{ + private VnodeRepairStateUtils() + { + } + + /** + * Get estimated repair time for the vnodes. This is based on the last repair that was run for the vnodes. + * @param vnodeRepairStates The vnodes. + * @return The estimated repair time in ms. + */ + public static long getRepairTime(final Collection vnodeRepairStates) + { + Set vnodes = groupByStartedAt(vnodeRepairStates); + long sum = 0; + for (VnodeRepairState vnode : vnodes) + { + sum += vnode.getRepairTime(); + } + return sum; + } + + /** + * Calculate the remaining repair time for the vnodes. + * @param vnodeRepairStates The vnodes. + * @param repairIntervalMs The repair interval. + * @param now The time now. + * @param totalRepairTime The estimated repair time for the vnodes. + * @return The remaining repair time for the vnodes. + */ + public static long getRemainingRepairTime(final Collection vnodeRepairStates, + final long repairIntervalMs, final long now, final long totalRepairTime) + { + Set vnodes = groupByStartedAt(vnodeRepairStates); + long sum = 0; + for (VnodeRepairState vnodeRepairState : vnodes) + { + if (vnodeRepairState.lastRepairedAt() + (repairIntervalMs - totalRepairTime) <= now) + { + sum += vnodeRepairState.getRepairTime(); + } + } + return sum; + } + + /** + * Group vnodes by startedAt timestamp. + * @param vnodeRepairStates The vnodes. + * @return A single vnode per startedAt. + */ + private static Set groupByStartedAt(final Collection vnodeRepairStates) + { + Map> vnodesByStartedAt = new HashMap<>(); + + // Group vnodeRepairStates by startedAt + for (VnodeRepairState vnodeRepairState : vnodeRepairStates) + { + long startedAt = vnodeRepairState.getStartedAt(); + vnodesByStartedAt + .computeIfAbsent(startedAt, k -> new HashSet<>()) + .add(vnodeRepairState); + } + + // Collect the first element of each set + Set reducedVnodes = new HashSet<>(); + for (Set vnodes : vnodesByStartedAt.values()) + { + if (!vnodes.isEmpty()) + { + reducedVnodes.add(vnodes.iterator().next()); + } + } + + return reducedVnodes; + } +} + + diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/VnodeRepairStates.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/VnodeRepairStates.java new file mode 100644 index 000000000..39fbf2554 --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/state/VnodeRepairStates.java @@ -0,0 +1,71 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import java.util.Collection; + +/** + * A collection of {@link VnodeRepairState VnodeRepairStates} that contains utilities to easily combine + * with new entries. + */ +public interface VnodeRepairStates +{ + + Collection getVnodeRepairStates(); + + /** + * Create a new vnode repair states object with the minimum repaired at set to the provided value. + * + * Entries which contain a higher repaired at will keep that value. + * + * @param repairedAt The minimum repaired at to use. + * @return The created state. + */ + VnodeRepairStates combineWithRepairedAt(long repairedAt); + + interface Builder + { + /** + * Combine a collection of vnode repair states into this collection. + * + * @param vnodeRepairStates The vnode repair statuses to update. + * @return This builder + * @see #updateVnodeRepairState(VnodeRepairState) + */ + default Builder updateVnodeRepairStates(Collection vnodeRepairStates) + { + for (VnodeRepairState vnodeRepairState : vnodeRepairStates) + { + updateVnodeRepairState(vnodeRepairState); + } + return this; + } + + /** + * Combine the provided {@link VnodeRepairState} with the current representation. + * If there already was a higher timestamp recorded for the vnode, no change will be made. + * + * An entry will be replaced if it has a higher timestamp. + * No new entries will be added. + * + * @param vnodeRepairState The vnode repair status to update. + * @return This builder + */ + Builder updateVnodeRepairState(VnodeRepairState vnodeRepairState); + + VnodeRepairStates build(); + } +} + diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/table/ReplicatedTableProvider.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/table/ReplicatedTableProvider.java new file mode 100644 index 000000000..c34afdd6c --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/table/ReplicatedTableProvider.java @@ -0,0 +1,39 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.table; + +import com.datastax.oss.driver.api.core.metadata.Node; +import java.util.Set; + +/** + * Interface for retrieving tables replicated by the local node. + * The purpose of this interface is to abstract away java-driver related mocking from other components + * trying to retrieve the tables that should be repaired. + */ +public interface ReplicatedTableProvider +{ + /** + * @return The full set of tables replicated on the local node which should be repaired. + */ + Set getAll(); + + /** + * Check if a keyspace should be repaired. + * + * @param keyspace The keyspace to check. + * @return True if the provided keyspace should be repaired. + */ + boolean accept(Node node, String keyspace); +} diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/table/TableRepairMetrics.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/table/TableRepairMetrics.java new file mode 100644 index 000000000..dcd712ead --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/table/TableRepairMetrics.java @@ -0,0 +1,59 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.table; + +import java.util.concurrent.TimeUnit; + +/** + * Interface for reporting table based repair metrics. + */ +public interface TableRepairMetrics +{ + /** + * Report number of repaired/not repaired ranges for the provided table. + * + * @param tableReference The table + * @param repairedRanges The number of repaired ranges + * @param notRepairedRanges The number of not repaired ranges + */ + void repairState(TableReference tableReference, int repairedRanges, int notRepairedRanges); + + /** + * Report the time the table was last repaired. + * + * @param tableReference The table to update the last repaired at value for. + * @param lastRepairedAt The last time the table was repaired. + */ + void lastRepairedAt(TableReference tableReference, long lastRepairedAt); + + /** + * Report the effective remaining repair time for table (time ecChronos waits for cassandra to perform repair). + * + * @param tableReference The table to update the remaining repair time for. + * @param remainingRepairTime The remaining time to fully repair the table. + */ + void remainingRepairTime(TableReference tableReference, long remainingRepairTime); + + /** + * Report the time it took to issue one repair command (session) and whether it was successful or not. + * + * @param tableReference The table the repair was performed on. + * @param timeTaken The time it took to perform the repair. + * @param timeUnit The {@link TimeUnit} used for the time taken. + * @param successful If the repair was successful or not. + */ + void repairSession(TableReference tableReference, long timeTaken, TimeUnit timeUnit, boolean successful); +} + diff --git a/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/table/TableRepairPolicy.java b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/table/TableRepairPolicy.java new file mode 100644 index 000000000..0b0e785b0 --- /dev/null +++ b/core/src/main/java/com/ericsson/bss/cassandra/ecchronos/core/table/TableRepairPolicy.java @@ -0,0 +1,29 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.table; + +/** + * Interface for policies that can be used to control if repairs should run. + */ +public interface TableRepairPolicy +{ + /** + * Check with the policy if a repair of the provided table should run now. + * + * @param tableReference The table to verify. + * @return True if the repair should continue. + */ + boolean shouldRun(TableReference tableReference); +} diff --git a/core/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/state/TestRepairStateSnapshot.java b/core/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/state/TestRepairStateSnapshot.java new file mode 100644 index 000000000..5897507d4 --- /dev/null +++ b/core/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/state/TestRepairStateSnapshot.java @@ -0,0 +1,79 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.junit.MockitoJUnitRunner; + +import java.util.Collections; + +import static org.assertj.core.api.Assertions.assertThat; + +@RunWith(MockitoJUnitRunner.class) +public class TestRepairStateSnapshot +{ + @Mock + private ReplicaRepairGroup mockRepairGroup; + + @Mock + private VnodeRepairStates mockVnodeRepairStates; + + @Test + public void testCanRepairFalse() + { + RepairStateSnapshot repairStateSnapshot = RepairStateSnapshot.newBuilder() + .withReplicaRepairGroups(Collections.emptyList()) + .withVnodeRepairStates(mockVnodeRepairStates) + .withLastCompletedAt(VnodeRepairState.UNREPAIRED) + .build(); + + assertThat(repairStateSnapshot.canRepair()).isFalse(); + assertThat(repairStateSnapshot.getRepairGroups()).isEmpty(); + assertThat(repairStateSnapshot.getVnodeRepairStates()).isEqualTo(mockVnodeRepairStates); + assertThat(repairStateSnapshot.lastCompletedAt()).isEqualTo(VnodeRepairState.UNREPAIRED); + } + + @Test + public void testCanRepairTrue() + { + RepairStateSnapshot repairStateSnapshot = RepairStateSnapshot.newBuilder() + .withReplicaRepairGroups(Collections.singletonList(mockRepairGroup)) + .withVnodeRepairStates(mockVnodeRepairStates) + .withLastCompletedAt(VnodeRepairState.UNREPAIRED) + .build(); + + assertThat(repairStateSnapshot.canRepair()).isTrue(); + assertThat(repairStateSnapshot.getRepairGroups()).containsExactly(mockRepairGroup); + assertThat(repairStateSnapshot.getVnodeRepairStates()).isEqualTo(mockVnodeRepairStates); + assertThat(repairStateSnapshot.lastCompletedAt()).isEqualTo(VnodeRepairState.UNREPAIRED); + } + + @Test + public void testDifferentRepairedAt() + { + RepairStateSnapshot repairStateSnapshot = RepairStateSnapshot.newBuilder() + .withReplicaRepairGroups(Collections.emptyList()) + .withVnodeRepairStates(mockVnodeRepairStates) + .withLastCompletedAt(1234L) + .build(); + + assertThat(repairStateSnapshot.canRepair()).isFalse(); + assertThat(repairStateSnapshot.getRepairGroups()).isEmpty(); + assertThat(repairStateSnapshot.getVnodeRepairStates()).isEqualTo(mockVnodeRepairStates); + assertThat(repairStateSnapshot.lastCompletedAt()).isEqualTo(1234L); + } +} diff --git a/core/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/state/TestReplicaRepairGroup.java b/core/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/state/TestReplicaRepairGroup.java new file mode 100644 index 000000000..e4dc2ddce --- /dev/null +++ b/core/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/state/TestReplicaRepairGroup.java @@ -0,0 +1,67 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import org.junit.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.*; + +public class TestReplicaRepairGroup +{ + @Test + public void testMultipleDataCenters() + { + DriverNode node1 = mockNode("DC1"); + DriverNode node2 = mockNode("DC2"); + DriverNode node3 = mockNode("DC3"); + DriverNode node4 = mockNode("DC1"); + LongTokenRange range = new LongTokenRange(1, 2); + + ReplicaRepairGroup replicaRepairGroup = new ReplicaRepairGroup(ImmutableSet.of(node1, node2, node3, node4), ImmutableList.of(range), System.currentTimeMillis()); + + assertThat(replicaRepairGroup.getDataCenters()).containsExactlyInAnyOrder("DC1", "DC2", "DC3"); + assertThat(replicaRepairGroup.getReplicas()).containsExactlyInAnyOrder(node1, node2, node3, node4); + assertThat(replicaRepairGroup.iterator()).toIterable().containsExactly(range); + } + + @Test + public void testMultipleRanges() + { + DriverNode node1 = mockNode("DC1"); + DriverNode node2 = mockNode("DC1"); + DriverNode node3 = mockNode("DC1"); + LongTokenRange range1 = new LongTokenRange(1, 2); + LongTokenRange range2 = new LongTokenRange(3, 4); + LongTokenRange range3 = new LongTokenRange(5, 6); + + ReplicaRepairGroup replicaRepairGroup = new ReplicaRepairGroup(ImmutableSet.of(node1, node2, node3), ImmutableList.of( + range1, range2, range3), System.currentTimeMillis()); + + assertThat(replicaRepairGroup.getDataCenters()).containsExactlyInAnyOrder("DC1"); + assertThat(replicaRepairGroup.getReplicas()).containsExactlyInAnyOrder(node1, node2, node3); + assertThat(replicaRepairGroup.iterator()).toIterable().containsExactly(range1, range2, range3); + } + + private DriverNode mockNode(String dataCenter) + { + DriverNode node = mock(DriverNode.class); + when(node.getDatacenter()).thenReturn(dataCenter); + return node; + } +} diff --git a/core/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/state/TestVnodeRepairState.java b/core/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/state/TestVnodeRepairState.java new file mode 100644 index 000000000..a20199970 --- /dev/null +++ b/core/src/test/java/com/ericsson/bss/cassandra/ecchronos/core/state/TestVnodeRepairState.java @@ -0,0 +1,94 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.core.state; + +import com.ericsson.bss.cassandra.ecchronos.core.metadata.DriverNode; +import com.google.common.collect.ImmutableSet; +import nl.jqno.equalsverifier.EqualsVerifier; +import org.junit.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.mock; + +public class TestVnodeRepairState +{ + @Test + public void testVnodeRepairState() + { + LongTokenRange range = new LongTokenRange(1, 2); + DriverNode node1 = mock(DriverNode.class); + DriverNode node2 = mock(DriverNode.class); + DriverNode node3 = mock(DriverNode.class); + + VnodeRepairState vnodeRepairState = new VnodeRepairState(range, ImmutableSet.of(node1, node2, node3), VnodeRepairState.UNREPAIRED); + + assertThat(vnodeRepairState.getReplicas()).containsExactlyInAnyOrder(node1, node2, node3); + assertThat(vnodeRepairState.getTokenRange()).isEqualTo(range); + assertThat(vnodeRepairState.lastRepairedAt()).isEqualTo(VnodeRepairState.UNREPAIRED); + assertThat(vnodeRepairState.isSameVnode(vnodeRepairState)).isTrue(); + } + + @Test + public void testVnodeRepairStateRepairedAtIsSet() + { + LongTokenRange range = new LongTokenRange(1, 2); + DriverNode node1 = mock(DriverNode.class); + DriverNode node2 = mock(DriverNode.class); + DriverNode node3 = mock(DriverNode.class); + long repairedAt = 1234L; + + VnodeRepairState vnodeRepairState = new VnodeRepairState(range, ImmutableSet.of(node1, node2, node3), repairedAt); + + assertThat(vnodeRepairState.lastRepairedAt()).isEqualTo(repairedAt); + } + + @Test + public void testVnodeWithDifferentReplicasIsNotSame() + { + LongTokenRange range = new LongTokenRange(1, 2); + DriverNode node1 = mock(DriverNode.class); + DriverNode node2 = mock(DriverNode.class); + DriverNode node3 = mock(DriverNode.class); + + VnodeRepairState vnodeRepairState = new VnodeRepairState(range, ImmutableSet.of(node1, node2), VnodeRepairState.UNREPAIRED); + VnodeRepairState otherVnodeRepairState = new VnodeRepairState(range, ImmutableSet.of(node1, node3), VnodeRepairState.UNREPAIRED); + + assertThat(vnodeRepairState.isSameVnode(otherVnodeRepairState)).isFalse(); + } + + @Test + public void testDifferentVnodesAreNotSame() + { + LongTokenRange range = new LongTokenRange(1, 2); + LongTokenRange otherRange = new LongTokenRange(2, 3); + DriverNode node1 = mock(DriverNode.class); + DriverNode node2 = mock(DriverNode.class); + DriverNode node3 = mock(DriverNode.class); + + VnodeRepairState vnodeRepairState = new VnodeRepairState(range, ImmutableSet.of(node1, node2, node3), VnodeRepairState.UNREPAIRED); + VnodeRepairState otherVnodeRepairState = new VnodeRepairState(otherRange, ImmutableSet.of(node1, node2, node3), VnodeRepairState.UNREPAIRED); + + assertThat(vnodeRepairState.isSameVnode(otherVnodeRepairState)).isFalse(); + } + + @Test + public void testEqualsContract() + { + EqualsVerifier.forClass(VnodeRepairState.class) + .withPrefabValues(ImmutableSet.class, ImmutableSet.of(1), ImmutableSet.of(2)) + .usingGetClass() + .verify(); + } +} diff --git a/utils/pom.xml b/utils/pom.xml index 82421b3ef..1ab40cc7b 100644 --- a/utils/pom.xml +++ b/utils/pom.xml @@ -26,5 +26,11 @@ utils + + + com.google.guava + guava + + \ No newline at end of file diff --git a/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/converter/ManyToOneIterator.java b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/converter/ManyToOneIterator.java new file mode 100644 index 000000000..d91c52d3c --- /dev/null +++ b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/converter/ManyToOneIterator.java @@ -0,0 +1,66 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.utils.converter; + +import java.util.ArrayList; +import java.util.Collection; +import java.util.Comparator; +import java.util.Iterator; +import java.util.List; + +import com.google.common.collect.AbstractIterator; + +/** + * An iterator that takes multiple iterables and merge them together into one iterator by sorting the elements based on + * the provided comparator. + */ +public class ManyToOneIterator extends AbstractIterator +{ + private final Iterator myIterator; + + /** + * Construct a new iterator with the provided iterables and comparator. + * + * @param iterables + * The iterables to iterate over. + * @param comparator + * The comparator to use for comparing the elements. + */ + public ManyToOneIterator(final Collection> iterables, final Comparator comparator) + { + List elementList = new ArrayList<>(); + + for (Iterable iterable : iterables) + { + iterable.forEach(elementList::add); + } + + elementList.sort(comparator); + + myIterator = elementList.iterator(); + } + + @Override + protected final T computeNext() + { + if (myIterator.hasNext()) + { + return myIterator.next(); + } + + return endOfData(); + } +} + diff --git a/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/converter/UnitConverter.java b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/converter/UnitConverter.java similarity index 96% rename from utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/converter/UnitConverter.java rename to utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/converter/UnitConverter.java index 5557b189f..a779d9542 100644 --- a/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/converter/UnitConverter.java +++ b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/converter/UnitConverter.java @@ -12,7 +12,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package com.ericsson.bss.cassandra.ecchronos.utils.enums.converter; +package com.ericsson.bss.cassandra.ecchronos.utils.converter; import java.util.regex.Matcher; import java.util.regex.Pattern; diff --git a/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/converter/package-info.java b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/converter/package-info.java similarity index 90% rename from utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/converter/package-info.java rename to utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/converter/package-info.java index 26202701d..95c67661f 100644 --- a/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/converter/package-info.java +++ b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/converter/package-info.java @@ -15,4 +15,4 @@ /** * Contains the enums related with ecChronos operations. */ -package com.ericsson.bss.cassandra.ecchronos.utils.enums.converter; +package com.ericsson.bss.cassandra.ecchronos.utils.converter; diff --git a/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/repair/RepairStatus.java b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/repair/RepairStatus.java index 28034e807..b537b3599 100644 --- a/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/repair/RepairStatus.java +++ b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/enums/repair/RepairStatus.java @@ -14,6 +14,9 @@ */ package com.ericsson.bss.cassandra.ecchronos.utils.enums.repair; +/** + * Enum containing the different statuses a repair session can have. + */ public enum RepairStatus { STARTED, diff --git a/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/exceptions/ScheduledJobException.java b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/exceptions/ScheduledJobException.java new file mode 100644 index 000000000..6c6982741 --- /dev/null +++ b/utils/src/main/java/com/ericsson/bss/cassandra/ecchronos/utils/exceptions/ScheduledJobException.java @@ -0,0 +1,39 @@ +/* + * Copyright 2024 Telefonaktiebolaget LM Ericsson + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.ericsson.bss.cassandra.ecchronos.utils.exceptions; + + +/** + * Generic exception thrown by scheduled jobs to signal that something went wrong. + */ +public class ScheduledJobException extends Exception +{ + private static final long serialVersionUID = 4099709033677299583L; + + public ScheduledJobException(final String message) + { + super(message); + } + + public ScheduledJobException(final Throwable t) + { + super(t); + } + + public ScheduledJobException(final String message, final Throwable t) + { + super(message, t); + } +}