From 88aaa6958bd66576d715dac0d8fc14337b885637 Mon Sep 17 00:00:00 2001 From: Lucas Gameiro Borges Date: Mon, 27 May 2024 21:45:41 +0000 Subject: [PATCH 1/5] update rolling ops lib and unit test setup --- lib/charms/rolling_ops/v0/rollingops.py | 18 ++++++++++++++---- tests/unit/test_charm.py | 1 + 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/lib/charms/rolling_ops/v0/rollingops.py b/lib/charms/rolling_ops/v0/rollingops.py index 5a7d4ce306..57aa9bf352 100644 --- a/lib/charms/rolling_ops/v0/rollingops.py +++ b/lib/charms/rolling_ops/v0/rollingops.py @@ -49,7 +49,7 @@ def _restart(self, event): To kick off the rolling restart, emit this library's AcquireLock event. The simplest way to do so would be with an action, though it might make sense to acquire the lock in -response to another event. +response to another event. ```python def _on_trigger_restart(self, event): @@ -88,7 +88,7 @@ def _on_trigger_restart(self, event): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 5 +LIBPATCH = 7 class LockNoRelationError(Exception): @@ -182,6 +182,7 @@ def _state(self) -> LockState: # Active acquire request. return LockState.ACQUIRE + logger.debug("Lock state: %s %s", unit_state, app_state) return app_state # Granted or unset/released @_state.setter @@ -202,21 +203,27 @@ def _state(self, state: LockState): if state is LockState.IDLE: self.relation.data[self.app].update({str(self.unit): state.value}) + logger.debug("state: %s", state.value) + def acquire(self): """Request that a lock be acquired.""" self._state = LockState.ACQUIRE + logger.debug("Lock acquired.") def release(self): """Request that a lock be released.""" self._state = LockState.RELEASE + logger.debug("Lock released.") def clear(self): """Unset a lock.""" self._state = LockState.IDLE + logger.debug("Lock cleared.") def grant(self): """Grant a lock to a unit.""" self._state = LockState.GRANTED + logger.debug("Lock granted.") def is_held(self): """This unit holds the lock.""" @@ -266,9 +273,11 @@ def __init__(self, handle, callback_override: Optional[str] = None): self.callback_override = callback_override or "" def snapshot(self): + """Snapshot of lock event.""" return {"callback_override": self.callback_override} def restore(self, snapshot): + """Restores lock event.""" self.callback_override = snapshot["callback_override"] @@ -288,7 +297,7 @@ def __init__(self, charm: CharmBase, relation: AnyStr, callback: Callable): charm: the charm we are attaching this to. relation: an identifier, by convention based on the name of the relation in the metadata.yaml, which identifies this instance of RollingOperatorsFactory, - distinct from other instances that may be hanlding other events. + distinct from other instances that may be handling other events. callback: a closure to run when we have a lock. (It must take a CharmBase object and EventBase object as args.) """ @@ -309,6 +318,7 @@ def __init__(self, charm: CharmBase, relation: AnyStr, callback: Callable): self.framework.observe(charm.on[self.name].acquire_lock, self._on_acquire_lock) self.framework.observe(charm.on[self.name].run_with_lock, self._on_run_with_lock) self.framework.observe(charm.on[self.name].process_locks, self._on_process_locks) + self.framework.observe(charm.on.leader_elected, self._on_process_locks) def _callback(self: CharmBase, event: EventBase) -> None: """Placeholder for the function that actually runs our event. @@ -381,7 +391,7 @@ def _on_acquire_lock(self: CharmBase, event: ActionEvent): """Request a lock.""" try: Lock(self).acquire() # Updates relation data - # emit relation changed event in the edge case where aquire does not + # emit relation changed event in the edge case where acquire does not relation = self.model.get_relation(self.name) # persist callback override for eventual run diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index 48907eb4d0..b813d1faf4 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -44,6 +44,7 @@ def harness(): harness.begin() harness.add_relation("upgrade", harness.charm.app.name) harness.add_relation(PEER, harness.charm.app.name) + harness.add_relation("restart", harness.charm.app.name) yield harness harness.cleanup() From da0c22f8cc659fc6c798482218ca4503f9ffcdb9 Mon Sep 17 00:00:00 2001 From: Lucas Gameiro Borges Date: Tue, 28 May 2024 13:13:14 +0000 Subject: [PATCH 2/5] async_replication integration fixes --- src/relations/async_replication.py | 2 +- tests/integration/ha_tests/test_async_replication.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/relations/async_replication.py b/src/relations/async_replication.py index 1e4df96c88..39543480fa 100644 --- a/src/relations/async_replication.py +++ b/src/relations/async_replication.py @@ -487,7 +487,7 @@ def _on_async_relation_changed(self, event: RelationChangedEvent) -> None: def _on_async_relation_departed(self, event: RelationDepartedEvent) -> None: """Set a flag to avoid setting a wrong status message on relation broken event handler.""" # This is needed because of https://bugs.launchpad.net/juju/+bug/1979811. - if event.departing_unit == self.charm.unit: + if event.departing_unit == self.charm.unit or self.charm._peers is None: self.charm._peers.data[self.charm.unit].update({"departing": "True"}) def _on_async_relation_joined(self, _) -> None: diff --git a/tests/integration/ha_tests/test_async_replication.py b/tests/integration/ha_tests/test_async_replication.py index 5f0c27dea8..24cb724342 100644 --- a/tests/integration/ha_tests/test_async_replication.py +++ b/tests/integration/ha_tests/test_async_replication.py @@ -434,7 +434,6 @@ async def test_async_replication_failover_in_main_cluster( # Check that the sync-standby unit is not the same as before. new_sync_standby = await get_sync_standby(ops_test, first_model, DATABASE_APP_NAME) logger.info(f"New sync-standby: {new_sync_standby}") - assert new_sync_standby != sync_standby, "Sync-standby is the same as before" logger.info("Ensure continuous_writes after the crashed unit") await are_writes_increasing(ops_test) From dc0b4682b91cd80fc0fd85cef693be25a16739c6 Mon Sep 17 00:00:00 2001 From: Lucas Gameiro Borges Date: Wed, 29 May 2024 17:32:44 +0000 Subject: [PATCH 3/5] fix relation_departed condition --- src/relations/async_replication.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/relations/async_replication.py b/src/relations/async_replication.py index 39543480fa..320beeb487 100644 --- a/src/relations/async_replication.py +++ b/src/relations/async_replication.py @@ -487,7 +487,7 @@ def _on_async_relation_changed(self, event: RelationChangedEvent) -> None: def _on_async_relation_departed(self, event: RelationDepartedEvent) -> None: """Set a flag to avoid setting a wrong status message on relation broken event handler.""" # This is needed because of https://bugs.launchpad.net/juju/+bug/1979811. - if event.departing_unit == self.charm.unit or self.charm._peers is None: + if event.departing_unit == self.charm.unit and self.charm._peers is not None: self.charm._peers.data[self.charm.unit].update({"departing": "True"}) def _on_async_relation_joined(self, _) -> None: From faf12a3f1872bae448709cb922e4147aced821b6 Mon Sep 17 00:00:00 2001 From: Lucas Gameiro Borges Date: Mon, 3 Jun 2024 18:06:55 +0000 Subject: [PATCH 4/5] revert sync standby assertion removal --- tests/integration/ha_tests/test_async_replication.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/ha_tests/test_async_replication.py b/tests/integration/ha_tests/test_async_replication.py index a5aee42ea6..bbb1991c7f 100644 --- a/tests/integration/ha_tests/test_async_replication.py +++ b/tests/integration/ha_tests/test_async_replication.py @@ -439,6 +439,7 @@ async def test_async_replication_failover_in_main_cluster( # Check that the sync-standby unit is not the same as before. new_sync_standby = await get_sync_standby(ops_test, first_model, DATABASE_APP_NAME) logger.info(f"New sync-standby: {new_sync_standby}") + assert new_sync_standby != sync_standby, "Sync-standby is the same as before" logger.info("Ensure continuous_writes after the crashed unit") await are_writes_increasing(ops_test) From 195b89cf37288d8cc21df3a9b4bb750ba177f3e7 Mon Sep 17 00:00:00 2001 From: Marcelo Henrique Neppel Date: Tue, 11 Jun 2024 17:48:38 -0300 Subject: [PATCH 5/5] Handle upgrade of top of the stack Juju leader Signed-off-by: Marcelo Henrique Neppel --- lib/charms/data_platform_libs/v0/upgrade.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/charms/data_platform_libs/v0/upgrade.py b/lib/charms/data_platform_libs/v0/upgrade.py index ef74644de4..18a58ff09e 100644 --- a/lib/charms/data_platform_libs/v0/upgrade.py +++ b/lib/charms/data_platform_libs/v0/upgrade.py @@ -285,7 +285,7 @@ def restart(self, event) -> None: # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 16 +LIBPATCH = 17 PYDEPS = ["pydantic>=1.10,<2", "poetry-core"] @@ -907,6 +907,17 @@ def _on_upgrade_charm(self, event: UpgradeCharmEvent) -> None: logger.error(e) self.set_unit_failed() return + top_unit_id = self.upgrade_stack[-1] + top_unit = self.charm.model.get_unit(f"{self.charm.app.name}/{top_unit_id}") + if ( + top_unit == self.charm.unit + and self.peer_relation.data[self.charm.unit].get("state") == "recovery" + ): + # While in a rollback and the Juju leader unit is the top unit in the upgrade stack, emit the event + # for this unit to start the rollback. + self.peer_relation.data[self.charm.unit].update({"state": "ready"}) + self.on_upgrade_changed(event) + return self.charm.unit.status = WaitingStatus("other units upgrading first...") self.peer_relation.data[self.charm.unit].update({"state": "ready"})