diff --git a/lib/charms/mongodb/v0/mongo.py b/lib/charms/mongodb/v0/mongo.py index f8ef0e44f..9e0367427 100644 --- a/lib/charms/mongodb/v0/mongo.py +++ b/lib/charms/mongodb/v0/mongo.py @@ -31,7 +31,7 @@ class NotReadyError(PyMongoError): # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 1 +LIBPATCH = 2 ADMIN_AUTH_SOURCE = "authSource=admin" SYSTEM_DBS = ("admin", "local", "config") @@ -270,6 +270,11 @@ def get_users(self) -> Set[str]: ] ) + def get_all_users(self) -> Set[str]: + """Get all users, including the three charmed managed users.""" + users_info = self.client.admin.command("usersInfo") + return {user_obj["user"] for user_obj in users_info["users"]} + def get_databases(self) -> Set[str]: """Return list of all non-default databases.""" databases = self.client.list_database_names() @@ -280,3 +285,7 @@ def drop_database(self, database: str): if database in SYSTEM_DBS: return self.client.drop_database(database) + + def drop_local_database(self): + """DANGEROUS: Drops the local database.""" + self.client.drop_database("local") diff --git a/lib/charms/mongodb/v1/helpers.py b/lib/charms/mongodb/v1/helpers.py index 6d302a6f9..20cf80c64 100644 --- a/lib/charms/mongodb/v1/helpers.py +++ b/lib/charms/mongodb/v1/helpers.py @@ -23,7 +23,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 10 +LIBPATCH = 11 # path to store mongodb ketFile KEY_FILE = "keyFile" @@ -41,6 +41,9 @@ LOG_DIR = "/var/log/mongodb" CONF_DIR = "/etc/mongod" MONGODB_LOG_FILENAME = "mongodb.log" + +LOCALHOST = "127.0.0.1" + logger = logging.getLogger(__name__) @@ -171,6 +174,34 @@ def get_mongos_args( return " ".join(cmd) +def get_degraded_mongod_args(snap_install: bool = False): + """Builds a degraded MongoDB startup command line. + + This degraded command line starts MongoDB with minimal configuration, + only binds to localhost, without replica set configuration and has no + auth validation in order to be able to update the users and local database. + """ + full_data_dir = f"{MONGODB_COMMON_DIR}{DATA_DIR}" if snap_install else DATA_DIR + logging_options = _get_logging_options(snap_install) + cmd = [ + # Only bind to local IP + f"--bind_ip {LOCALHOST}", + # db must be located within the snap common directory since the + # snap is strictly confined + f"--dbpath={full_data_dir}", + # for simplicity we run the mongod daemon on shards, configsvrs, + # and replicas on the same port + f"--port={Config.MONGODB_PORT}", + "--setParameter processUmask=037", # required for log files perminission (g+r) + "--logRotate reopen", + "--logappend", + logging_options, + "--setParameter enableLocalhostAuthBypass=0", + "\n", + ] + return " ".join(cmd) + + def get_mongod_args( config: MongoConfiguration, auth: bool = True, @@ -266,6 +297,21 @@ def generate_keyfile() -> str: return "".join([secrets.choice(choices) for _ in range(1024)]) +def generate_lock_hash() -> str: + """Lock hash used to check if we are reusing storage in a different context or not. + + This string is written in the storage of all members of the replica set in + a file, and also as a secret. + Upon starting a new unit, it will check in the storage and in the secret to + compare and decide if we're reusing storage and if yes, in which case we + are. + + Returns: + An 8 character random hexadecimal string. + """ + return secrets.token_hex(8) + + def copy_licenses_to_unit(): """Copies licenses packaged in the snap to the charm's licenses directory.""" os.makedirs("src/licenses", exist_ok=True) diff --git a/lib/charms/mongodb/v1/mongodb.py b/lib/charms/mongodb/v1/mongodb.py index e06fd517c..5dbdd23eb 100644 --- a/lib/charms/mongodb/v1/mongodb.py +++ b/lib/charms/mongodb/v1/mongodb.py @@ -4,7 +4,7 @@ # See LICENSE file for licensing details. import logging -from typing import Dict, Set +from typing import Dict, Set, Tuple from bson.json_util import dumps from charms.mongodb.v0.mongo import MongoConfiguration, MongoConnection, NotReadyError @@ -27,7 +27,7 @@ # Increment this PATCH version before using `charmcraft publish-lib` or reset # to 0 if you are raising the major API version -LIBPATCH = 3 +LIBPATCH = 4 # path to store mongodb ketFile logger = logging.getLogger(__name__) @@ -95,6 +95,35 @@ def init_replset(self) -> None: # finished. logger.error("Cannot initialize replica set. error=%r", e) raise e + else: + logger.error("Error in init %s", e) + + @retry( + stop=stop_after_attempt(3), + wait=wait_fixed(5), + reraise=True, + before=before_log(logger, logging.DEBUG), + ) + def reconfigure_replset(self, hosts: set[str], version: int, force: bool = False) -> None: + """Create replica set config the first time. + + Raises: + ConfigurationError, ConfigurationError, OperationFailure + """ + config = { + "_id": self.config.replset, + "members": [{"_id": i, "host": h} for i, h in enumerate(hosts)], + "version": version, + } + try: + self.client.admin.command("replSetReconfig", config, force=force) + except OperationFailure as e: + # Unauthorized error can be raised only if initial user were + # created the step after this. + # AlreadyInitialized error can be raised only if this step + # finished. + logger.error("Cannot reconfigure replica set. error=%r", e) + raise e def get_replset_status(self) -> Dict: """Get a replica set status as a dict. @@ -128,6 +157,17 @@ def get_replset_members(self) -> Set[str]: ] return set(curr_members) + def get_replset_members_and_version(self) -> Tuple[Set[str], int]: + """Get replica set members through config.""" + rs_config = self.client.admin.command("replSetGetConfig") + curr_members = [ + self._hostname_from_hostport(member["host"]) + for member in rs_config["config"]["members"] + ] + version = rs_config["config"]["version"] + + return set(curr_members), version + def add_replset_member(self, hostname: str) -> None: """Add a new member to replica set config inside MongoDB. diff --git a/src/charm.py b/src/charm.py index 02447baa7..90f91faa6 100755 --- a/src/charm.py +++ b/src/charm.py @@ -18,12 +18,15 @@ from charms.mongodb.v0.set_status import MongoDBStatusHandler from charms.mongodb.v1.helpers import ( KEY_FILE, + LOCALHOST, TLS_EXT_CA_FILE, TLS_EXT_PEM_FILE, TLS_INT_CA_FILE, TLS_INT_PEM_FILE, + add_args_to_env, copy_licenses_to_unit, generate_keyfile, + generate_lock_hash, generate_password, get_create_user_cmd, safe_exec, @@ -81,6 +84,7 @@ ApplicationHostNotFoundError, NotConfigServerError, ) +from lock_hash import HASH_KEY, UNDEFINED, LockHashHandler from machine_helpers import ( MONGO_USER, ROOT_USER_GID, @@ -99,6 +103,9 @@ class MongodbOperatorCharm(CharmBase): """Charm the service.""" + # Lock Hash descriptor + lock_hash = LockHashHandler() + def __init__(self, *args): super().__init__(*args) self._port = Config.MONGODB_PORT @@ -257,11 +264,28 @@ def remote_mongodb_config(self, hosts, replset=None, standalone=None) -> MongoCo OperatorUser, hosts, replset=replset, standalone=standalone ) + @property + def _is_storage_from_different_cluster(self) -> bool: + """Returns True if we are reusing storage from a different cluster.""" + lock_hash = self.lock_hash # Avoid duplicate read of the file + return lock_hash != UNDEFINED and lock_hash != self.databag_lock_hash + @property def mongo_config(self) -> MongoConfiguration: """Returns a MongoConfiguration object for shared libs with agnostic mongo commands.""" return self.mongodb_config + @property + def standalone_config(self) -> MongoConfiguration: + """Generates a MongoConfiguration object for local authenticated standalone connection. + + This allows to connect to a mongodb node which is in a replicaset but + without going through replicaset checks. + """ + return self._get_mongodb_config_for_user( + OperatorUser, {self.unit_host(self.unit)}, standalone=True + ) + @property def mongodb_config(self) -> MongoConfiguration: """Generates a MongoConfiguration object for this deployment of MongoDB.""" @@ -390,6 +414,17 @@ def _on_install(self, event: InstallEvent) -> None: self.status.set_and_share_status(BlockedStatus("Could not install MongoDB")) return + # If we are a replica and the lock hash doesn't match + if self.is_role(Config.Role.REPLICATION) and self._is_storage_from_different_cluster: + self._fix_mongodb_for_reuse() + elif self.is_sharding_component() and self._is_storage_from_different_cluster: + self.status.set_and_share_status( + BlockedStatus( + f"Reusing storage from a different ReplicaSet is not allowed on a {self.role}." + ) + ) + return + # Construct the mongod startup commandline args for systemd and reload the daemon. update_mongod_service( machine_ip=self.unit_host(self.unit), @@ -424,6 +459,96 @@ def _on_config_changed(self, event: ConfigChangedEvent) -> None: f"Migration of sharding components not permitted, revert config role to {self.role}" ) + def __fix_users_for_reuse(self, direct_mongo: MongoDBConnection) -> None: + """Fix the users in the DB for storage reuse. + + Context: If we are reusing storage from a different Juju application, + the MongoDB Users need to be updated with new passwords because we + cannot assume that the juju operator has kept the credentials. + This method will update the credentials of the existing users that + should be patched in this case. + """ + users = direct_mongo.get_all_users() + # Update operator password since operator is present. + if self.unit.is_leader() and OperatorUser.get_username() in users: + logger.info("[Recovery] Update operator password (leader).") + direct_mongo.set_user_password( + OperatorUser.get_username(), direct_mongo.config.password + ) + # Operator user not present in users, need to create it. + elif self.unit.is_leader(): + logger.info("[Recovery] Create missing operator user (leader).") + direct_mongo.create_user(self.mongodb_config) + # Not leader and operator present, drop it. + elif OperatorUser.get_username() in users: + logger.info("[Recovering] Dropping operator user (not leader).") + direct_mongo.drop_user(OperatorUser.get_username()) + # Drop backup and monitor user if present. + logger.info("[Recovering] Dropping users backup and monitor.") + if BackupUser.get_username() in users: + direct_mongo.drop_user(BackupUser.get_username()) + if MonitorUser.get_username() in users: + direct_mongo.drop_user(MonitorUser.get_username()) + + def _fix_mongodb_for_reuse(self) -> None: + """If we are reusing storage, we need to fix multiple things in the mongodb data. + + This is done before starting (so with a downtime). + We need to: + * Start mongod in degraded mode (no auth validation, no replicaset + specified, bind only to local IP). + * Update operator password (on leader) and delete it on other units. + * Remove backup and monitor users on all units. + * Delete the local database (contains the replica set local information) on all units. + """ + logger.debug("Fixing MongoDB for reuse.") + self.status.set_and_share_status(MaintenanceStatus("Reusing provided storage.")) + try: + # Update with degraded configuration. + # Dangerous, this is only allowed here because we need to fix the DB configuration. + logger.info( + "[Recovering] Updating MongoDB to degraded configuration to update users passwords." + ) + update_mongod_service( + machine_ip=LOCALHOST, + config=self.mongodb_config, + role=self.role, + degraded=True, + ) + # Start the charm services in degraded mode. + logger.info("[Recovering] Starting the charm services in degraded mode.") + self.start_charm_services() + # Perform data modification + if self.unit.is_leader(): + # We can only create operator user password if we are the leader. + self._check_or_set_user_password(OperatorUser) + with MongoDBConnection( + self.mongodb_config, uri="localhost", direct=True + ) as direct_mongo: + for attempt in Retrying( + stop=stop_after_attempt(10), + wait=wait_fixed(5), + reraise=True, + before=before_log(logger, logging.DEBUG), + ): + with attempt: + if not direct_mongo.is_ready: + raise NotReadyError + self.__fix_users_for_reuse(direct_mongo) + # Drop local database + logger.info("[Recovering] Dropping local database.") + direct_mongo.drop_local_database() + except Exception as err: + logger.error(f"Error encountered while updating database config: {err}") + raise + finally: + # It is important that the charm services are stopped here to + # prevent the application running in degraded mode. + logging.info("[Recovering] Stopping degraded charm services") + self.stop_charm_services() + # Reset start args to empty string + add_args_to_env("MONGOD_ARGS", "") + def _on_start(self, event: StartEvent) -> None: """Enables MongoDB service and initialises replica set. @@ -519,46 +644,60 @@ def _on_relation_handler(self, event: RelationEvent) -> None: self._connect_mongodb_exporter() self._connect_pbm_agent() + # Update the lock hash on the file system if needed. + self._update_lock_hash() + # only leader should configure replica set and app-changed-events can trigger the relation # changed hook resulting in no JUJU_REMOTE_UNIT if this is the case we should return # further reconfiguration can be successful only if a replica set is initialised. if not (self.unit.is_leader() and event.unit) or not self.db_initialised: return + # If needed, reconfigure replicaset (eg : network cut) + try: + self._reconfigure_replica_set() + self.__add_members_to_replicaset(event) + except NotReadyError: + self.status.set_and_share_status(WaitingStatus("waiting to reconfigure replica set")) + logger.error("Deferring reconfigure: another member doing sync right now") + event.defer() + return + except PyMongoError as e: + self.status.set_and_share_status(WaitingStatus("waiting to reconfigure replica set")) + logger.error("Deferring reconfigure: error=%r", e) + event.defer() + return + + def __add_members_to_replicaset(self, event: RelationEvent): + """Adds new members to the replica set. + + This function is run on all Relation events and is in charge of adding + the new nodes to the replica set configuration. + + Raises: + NotReadyError: The node is not ready to add new members to the + replica set (eg: sync in progress). + PyMongoError: Any Operational error that can be raised when interacting with MongoDB. + """ with MongoDBConnection(self.mongodb_config) as mongo: - try: - replset_members = mongo.get_replset_members() - # compare set of mongod replica set members and juju hosts to avoid the unnecessary - # reconfiguration. - if replset_members == self.mongodb_config.hosts: - return + replset_members = mongo.get_replset_members() + # compare set of mongod replica set members and juju hosts to avoid the unnecessary + # reconfiguration. + if replset_members == self.mongodb_config.hosts: + return - for member in self.mongodb_config.hosts - replset_members: - logger.debug("Adding %s to replica set", member) - with MongoDBConnection( - self.mongodb_config, member, direct=True - ) as direct_mongo: - if not direct_mongo.is_ready: - self.status.set_and_share_status( - WaitingStatus("waiting to reconfigure replica set") - ) - logger.debug("Deferring reconfigure: %s is not ready yet.", member) - event.defer() - return - mongo.add_replset_member(member) - self.status.set_and_share_status(ActiveStatus()) - except NotReadyError: - self.status.set_and_share_status( - WaitingStatus("waiting to reconfigure replica set") - ) - logger.error("Deferring reconfigure: another member doing sync right now") - event.defer() - except PyMongoError as e: - self.status.set_and_share_status( - WaitingStatus("waiting to reconfigure replica set") - ) - logger.error("Deferring reconfigure: error=%r", e) - event.defer() + for member in self.mongodb_config.hosts - replset_members: + logger.debug("Adding %s to replica set", member) + with MongoDBConnection(self.mongodb_config, member, direct=True) as direct_mongo: + if not direct_mongo.is_ready: + self.status.set_and_share_status( + WaitingStatus("waiting to reconfigure replica set") + ) + logger.debug("Deferring reconfigure: %s is not ready yet.", member) + event.defer() + return + mongo.add_replset_member(member) + self.status.set_and_share_status(ActiveStatus()) def _on_leader_elected(self, event: LeaderElectedEvent) -> None: """Generates necessary keyfile and updates replica hosts.""" @@ -674,6 +813,12 @@ def _on_update_status(self, event: UpdateStatusEvent): self.status.set_and_share_status(WaitingStatus("Waiting for MongoDB to start")) return + try: + self._reconfigure_replica_set() + except PyMongoError as e: + logger.info(f"Could not reconfigure replica set due to {e}") + return + try: self.perform_self_healing(event) except ServerSelectionTimeoutError: @@ -1217,6 +1362,16 @@ def _connect_pbm_agent(self) -> None: self._get_service_status(Config.Backup.SERVICE_NAME) raise e + def _update_lock_hash(self): + """Lazily update lock hash file.""" + if ( + self.db_initialised + and self.is_role(Config.Role.REPLICATION) + and self.lock_hash != self.databag_lock_hash + ): + # We write the lock file if it wasn't written yet. + self.lock_hash = self.databag_lock_hash + def _get_service_status(self, service_name) -> None: logger.error(f"Getting status of {service_name} service:") self._run_diagnostic_command( @@ -1234,6 +1389,27 @@ def _run_diagnostic_command(self, cmd) -> None: except subprocess.CalledProcessError as e: logger.error(f"Exception occurred running '{cmd}'\n {e}") + def _reconfigure_replica_set(self) -> None: + """Optionally reconfigure replicaset if needed. + + This should happen only when we are reusing storage from one cluster to another cluster, + because all IPs have changed and all configurations are broken. + + Raises: + PyMongoError: Reconfiguring the replica set failed. + """ + if not self.db_initialised: + return + + with MongoDBConnection(self.standalone_config, direct=True) as direct_mongo: + replset_members, version = direct_mongo.get_replset_members_and_version() + related_hosts = set(self.app_hosts) + if replset_members and all(member not in related_hosts for member in replset_members): + logger.info(f"Reconfiguring replica set to {related_hosts}") + direct_mongo.reconfigure_replset( + hosts=related_hosts, version=version + 1, force=True + ) + def _initialise_replica_set(self, event: StartEvent) -> None: if self.db_initialised: # The replica set should be initialised only once. Check should be @@ -1242,7 +1418,16 @@ def _initialise_replica_set(self, event: StartEvent) -> None: # can be corrupted. return - with MongoDBConnection(self.mongodb_config, "localhost", direct=True) as direct_mongo: + # If we are reusing storage, the user operator already exists + # hence the need to authenticate. + if self.is_role(Config.Role.REPLICATION) and self._is_storage_from_different_cluster: + config = self.standalone_config + uri = None + else: + config = self.mongodb_config + uri = "localhost" + + with MongoDBConnection(config, uri, direct=True) as direct_mongo: try: logger.info("Replica Set initialization") direct_mongo.init_replset() @@ -1279,8 +1464,14 @@ def _initialise_replica_set(self, event: StartEvent) -> None: # replica set initialised properly and ready to go self.db_initialised = True + self.lock_hash = generate_lock_hash() self.status.set_and_share_status(ActiveStatus()) + @property + def databag_lock_hash(self) -> str: + """The data from the databag.""" + return self.get_secret(APP_SCOPE, HASH_KEY) or UNDEFINED + def unit_host(self, unit: Unit) -> str: """Returns the ip address of a given unit.""" # check if host is current host diff --git a/src/config.py b/src/config.py index d430e5073..986507919 100644 --- a/src/config.py +++ b/src/config.py @@ -4,7 +4,7 @@ # See LICENSE file for licensing details. from pathlib import Path -from typing import Literal, TypeAlias +from typing import List, Literal, TypeAlias from ops.model import BlockedStatus @@ -22,9 +22,10 @@ class Config: MONGOD_CONF_DIR = f"{MONGODB_SNAP_DATA_DIR}/etc/mongod" MONGOD_CONF_FILE_PATH = f"{MONGOD_CONF_DIR}/mongod.conf" CHARM_INTERNAL_VERSION_FILE = "charm_internal_version" - SNAP_PACKAGES = [("charmed-mongodb", "6/edge", 121)] + SNAP_PACKAGES: List[Package] = [("charmed-mongodb", "6/edge", 121)] MONGODB_COMMON_PATH = Path("/var/snap/charmed-mongodb/common") + MONGODB_DATA_DIR = Path("/var/snap/charmed-mongodb/common/var/lib/mongodb") # This is the snap_daemon user, which does not exist on the VM before the # snap install so creating it by UID diff --git a/src/lock_hash.py b/src/lock_hash.py new file mode 100644 index 000000000..04c53381b --- /dev/null +++ b/src/lock_hash.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python3 +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. +"""A Data descriptor that is in charge of handling the filesystem lock hash value.""" + + +import os +from logging import getLogger + +import ops + +from config import Config +from machine_helpers import ROOT_USER_GID + +logger = getLogger(__name__) + +HASH_KEY = "lockhash" +LOCK_PATH = Config.MONGODB_DATA_DIR / f".{HASH_KEY}" +UNDEFINED = "UNDEFINED" + + +class LockHashHandler: + """Descriptor class for the lock hash stored in the file. + + In order to safely reuse storage, we need to be able to detect if we are reusing storage. + This is done by maintaining two things: A file in the filesystem of the + unit which stores a string, shared in the cluster, and the exact same + string in the application secrets. + If they have the same value, we're reusing storage in the same application/replicaset. + If there's no file but a value in the secret, we're adding a new unit. + If there's a file but no value in the secret, we're reusing in a new application context. + """ + + def __set__(self, obj: ops.CharmBase, value: str): + """Sets the key in the dedicated file and in the storage.""" + logger.debug(f"Writing {value} in file for unit {obj.unit.name}") + with open(LOCK_PATH, "w") as write_file: + write_file.write(value) + os.chmod(LOCK_PATH, 0o644) + os.chown(LOCK_PATH, Config.SNAP_USER, ROOT_USER_GID) + if obj.unit.is_leader(): + obj.set_secret(Config.Relations.APP_SCOPE, HASH_KEY, value) + + def __get__(self, *unused) -> str: + """Optionally gets the key from the file.""" + try: + return LOCK_PATH.read_text() + except OSError as err: + logger.info(f"Unable to read file because of {err}") + return UNDEFINED diff --git a/src/machine_helpers.py b/src/machine_helpers.py index 14368c586..f91cd05a8 100644 --- a/src/machine_helpers.py +++ b/src/machine_helpers.py @@ -9,6 +9,7 @@ LOG_DIR, MONGODB_COMMON_DIR, add_args_to_env, + get_degraded_mongod_args, get_mongod_args, get_mongos_args, ) @@ -24,12 +25,24 @@ def update_mongod_service( - machine_ip: str, config: MongoConfiguration, role: str = "replication" + machine_ip: str, config: MongoConfiguration, role: str = "replication", degraded: bool = False ) -> None: - """Updates the mongod service file with the new options for starting.""" + """Updates the mongod service file with the new options for starting. + + `degraded` argument should be used only for maintenance situation, it + induces a downtime and a disconnection from the replicaset. + """ # write our arguments and write them to /etc/environment - the environment variable here is # read in in the charmed-mongob.mongod.service file. - mongod_start_args = get_mongod_args(config, auth=True, role=role, snap_install=True) + if degraded: + mongod_start_args = get_degraded_mongod_args(snap_install=True) + else: + mongod_start_args = get_mongod_args( + config, + auth=True, + role=role, + snap_install=True, + ) add_args_to_env("MONGOD_ARGS", mongod_start_args) if role == Config.Role.CONFIG_SERVER: diff --git a/tests/integration/ha_tests/conftest.py b/tests/integration/ha_tests/conftest.py new file mode 100644 index 000000000..8874340be --- /dev/null +++ b/tests/integration/ha_tests/conftest.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. +import pytest +from pytest_operator.plugin import OpsTest + +from ..helpers import get_app_name +from .helpers import clear_db_writes, start_continous_writes, update_restart_delay + +ORIGINAL_RESTART_DELAY = 5 + + +@pytest.fixture +async def continuous_writes(ops_test: OpsTest): + """Starts continuous write operations to MongoDB for test and clears writes at end of test.""" + await start_continous_writes(ops_test, 1) + yield + await clear_db_writes(ops_test) + + +@pytest.fixture +async def reset_restart_delay(ops_test: OpsTest): + """Resets service file delay on all units.""" + yield + app_name = await get_app_name(ops_test) + for unit in ops_test.model.applications[app_name].units: + await update_restart_delay(ops_test, unit, ORIGINAL_RESTART_DELAY) + + +@pytest.fixture(scope="module") +async def database_charm(ops_test: OpsTest): + """Build the database charm.""" + charm = await ops_test.build_charm(".") + return charm diff --git a/tests/integration/ha_tests/test_ha.py b/tests/integration/ha_tests/test_ha.py index 7d303e0b1..c50dc04b9 100644 --- a/tests/integration/ha_tests/test_ha.py +++ b/tests/integration/ha_tests/test_ha.py @@ -7,7 +7,6 @@ import time import pytest -from juju import tag from pymongo import MongoClient from pytest_operator.plugin import OpsTest from tenacity import RetryError, Retrying, stop_after_delay, wait_fixed @@ -22,7 +21,6 @@ ) from .helpers import ( all_db_processes_down, - clear_db_writes, count_primaries, count_writes, cut_network_from_unit, @@ -39,13 +37,9 @@ replica_set_primary, restore_network_for_unit, retrieve_entries, - reused_storage, scale_and_verify, secondary_up_to_date, - start_continous_writes, stop_continous_writes, - storage_id, - storage_type, update_restart_delay, verify_replica_set_configuration, verify_writes, @@ -55,7 +49,6 @@ ANOTHER_DATABASE_APP_NAME = "another-database-a" MEDIAN_REELECTION_TIME = 12 RESTART_DELAY = 60 * 3 -ORIGINAL_RESTART_DELAY = 5 logger = logging.getLogger(__name__) @@ -84,119 +77,6 @@ async def test_build_and_deploy(ops_test: OpsTest) -> None: await ops_test.model.wait_for_idle() -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_storage_re_use(ops_test, continuous_writes): - """Verifies that database units with attached storage correctly repurpose storage. - - It is not enough to verify that Juju attaches the storage. Hence test checks that the mongod - properly uses the storage that was provided. (ie. doesn't just re-sync everything from - primary, but instead computes a diff between current storage and primary storage.) - """ - app_name = await get_app_name(ops_test) - if storage_type(ops_test, app_name) == "rootfs": - pytest.skip( - "reuse of storage can only be used on deployments with persistent storage not on rootfs deployments" - ) - - # removing the only replica can be disastrous - if len(ops_test.model.applications[app_name].units) < 2: - await ops_test.model.applications[app_name].add_unit(count=1) - await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000) - - # remove a unit and attach it's storage to a new unit - unit = ops_test.model.applications[app_name].units[0] - unit_storage_id = storage_id(ops_test, unit.name) - expected_units = len(ops_test.model.applications[app_name].units) - 1 - removal_time = time.time() - await ops_test.model.destroy_unit(unit.name) - await ops_test.model.wait_for_idle( - apps=[app_name], status="active", timeout=1000, wait_for_exact_units=expected_units - ) - new_unit = ( - await ops_test.model.applications[app_name].add_unit( - count=1, attach_storage=[tag.storage(unit_storage_id)] - ) - )[0] - - await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=1000) - - assert await reused_storage( - ops_test, new_unit.name, removal_time - ), "attached storage not properly reused by MongoDB." - - # verify that the no writes were skipped - total_expected_writes = await stop_continous_writes(ops_test, app_name=app_name) - actual_writes = await count_writes(ops_test, app_name=app_name) - assert total_expected_writes["number"] == actual_writes - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) -@pytest.mark.group(1) -@pytest.mark.skip("This is currently unsupported on MongoDB charm.") -@pytest.mark.abort_on_fail -async def test_storage_re_use_different_cluster(ops_test: OpsTest, continuous_writes): - """Tests that we can reuse storage from a different cluster. - - For that, we completely remove the application while keeping the storages, - and then we deploy a new application with storage reuse and check that the - storage has been reused. - """ - app_name = await get_app_name(ops_test) - if storage_type(ops_test, app_name) == "rootfs": - pytest.skip( - "reuse of storage can only be used on deployments with persistent storage not on rootfs deployments" - ) - - writes_results = await stop_continous_writes(ops_test, app_name=app_name) - unit_ids = [unit.name for unit in ops_test.model.applications[app_name].units] - storage_ids = {} - - remaining_units = len(unit_ids) - for unit_id in unit_ids: - storage_ids[unit_id] = storage_id(ops_test, unit_id) - await ops_test.model.applications[app_name].destroy_unit(unit_id) - # Give some time to remove the unit. We don't use asyncio.sleep here to - # leave time for each unit to be removed before removing the next one. - # time.sleep(60) - remaining_units -= 1 - await ops_test.model.wait_for_idle( - apps=[app_name], - status="active", - timeout=1000, - idle_period=20, - wait_for_exact_units=remaining_units, - ) - - # Wait until all apps are cleaned up - await ops_test.model.wait_for_idle(apps=[app_name], timeout=1000, wait_for_exact_units=0) - - for unit_id in unit_ids: - n_units = len(ops_test.model.applications[app_name].units) - await ops_test.model.applications[app_name].add_unit( - count=1, attach_storage=[tag.storage(storage_ids[unit_id])] - ) - await ops_test.model.wait_for_idle( - apps=[app_name], - status="active", - timeout=1000, - idle_period=20, - wait_for_exact_units=n_units + 1, - ) - - await ops_test.model.wait_for_idle( - apps=[app_name], - status="active", - timeout=1000, - idle_period=20, - wait_for_exact_units=len(unit_ids), - ) - - actual_writes = await count_writes(ops_test, app_name=app_name) - assert writes_results["number"] == actual_writes - - @pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) @pytest.mark.group(1) @pytest.mark.abort_on_fail @@ -788,27 +668,3 @@ async def test_scale_up_down_removing_leader(ops_test: OpsTest, continuous_write for count in scales: await scale_and_verify(ops_test, count=count, remove_leader=True) await verify_writes(ops_test) - - -# TODO put this into a separate file -# Fixtures start - - -@pytest.fixture() -async def continuous_writes(ops_test: OpsTest): - """Starts continuous write operations to MongoDB for test and clears writes at end of test.""" - await start_continous_writes(ops_test, 1) - yield - await clear_db_writes(ops_test) - - -@pytest.fixture() -async def reset_restart_delay(ops_test: OpsTest): - """Resets service file delay on all units.""" - yield - app_name = await get_app_name(ops_test) - for unit in ops_test.model.applications[app_name].units: - await update_restart_delay(ops_test, unit, ORIGINAL_RESTART_DELAY) - - -# Fixtures end diff --git a/tests/integration/ha_tests/test_storage.py b/tests/integration/ha_tests/test_storage.py new file mode 100644 index 000000000..cdb05f24b --- /dev/null +++ b/tests/integration/ha_tests/test_storage.py @@ -0,0 +1,333 @@ +#!/usr/bin/env python3 +# Copyright 2024 Canonical Ltd. +# See LICENSE file for licensing details. +import time + +import pytest +from juju import tag +from pytest_operator.plugin import OpsTest + +from ..helpers import check_or_scale_app, get_app_name +from .helpers import ( + APP_NAME, + count_writes, + reused_storage, + stop_continous_writes, + storage_id, + storage_type, +) + +OTHER_MONGODB_APP_NAME = "mongodb-new" +TIMEOUT = 1000 + + +@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_build_and_deploy(ops_test: OpsTest, database_charm) -> None: + """Build and deploy one unit of MongoDB.""" + # it is possible for users to provide their own cluster for HA testing. Hence check if there + # is a pre-existing cluster. + required_units = 3 + user_app_name = await get_app_name(ops_test) + if user_app_name: + await check_or_scale_app(ops_test, user_app_name, required_units) + return + + storage = {"mongodb": {"pool": "lxd", "size": 2048}} + + await ops_test.model.deploy( + database_charm, application_name=APP_NAME, num_units=required_units, storage=storage + ) + await ops_test.model.wait_for_idle() + + +@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_storage_re_use(ops_test, continuous_writes): + """Verifies that database units with attached storage correctly repurpose storage. + + It is not enough to verify that Juju attaches the storage. Hence test checks that the mongod + properly uses the storage that was provided. (ie. doesn't just re-sync everything from + primary, but instead computes a diff between current storage and primary storage.) + """ + app_name = await get_app_name(ops_test) + if storage_type(ops_test, app_name) == "rootfs": + pytest.skip( + "reuse of storage can only be used on deployments with persistent storage not on rootfs deployments" + ) + + # remove a unit and attach it's storage to a new unit + unit = ops_test.model.applications[app_name].units[0] + unit_storage_id = storage_id(ops_test, unit.name) + expected_units = len(ops_test.model.applications[app_name].units) - 1 + removal_time = time.time() + await ops_test.model.destroy_unit(unit.name) + await ops_test.model.wait_for_idle( + apps=[app_name], status="active", timeout=TIMEOUT, wait_for_exact_units=expected_units + ) + new_unit = ( + await ops_test.model.applications[app_name].add_unit( + count=1, attach_storage=[tag.storage(unit_storage_id)] + ) + )[0] + + await ops_test.model.wait_for_idle(apps=[app_name], status="active", timeout=TIMEOUT) + + assert await reused_storage( + ops_test, new_unit.name, removal_time + ), "attached storage not properly reused by MongoDB." + + # verify that the no writes were skipped + total_expected_writes = await stop_continous_writes(ops_test, app_name=app_name) + actual_writes = await count_writes(ops_test, app_name=app_name) + assert total_expected_writes["number"] == actual_writes + + +@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_storage_re_use_after_scale_down_to_zero(ops_test: OpsTest, continuous_writes): + """Tests that we can reuse storage after a scale down to zero. + + For that, we completely scale down to zero while keeping the storages, + and then we add units back with storage reuse and check that the + storage has been reused. + """ + app_name = await get_app_name(ops_test) + if storage_type(ops_test, app_name) == "rootfs": + pytest.skip( + "reuse of storage can only be used on deployments with persistent storage not on rootfs deployments" + ) + + writes_results = await stop_continous_writes(ops_test, app_name=app_name) + unit_ids = [unit.name for unit in ops_test.model.applications[app_name].units] + storage_ids = {} + + remaining_units = len(unit_ids) + for unit_id in unit_ids: + storage_ids[unit_id] = storage_id(ops_test, unit_id) + await ops_test.model.applications[app_name].destroy_unit(unit_id) + # Give some time to remove the unit. We don't use asyncio.sleep here to + # leave time for each unit to be removed before removing the next one. + # time.sleep(60) + remaining_units -= 1 + await ops_test.model.wait_for_idle( + apps=[app_name], + status="active", + timeout=TIMEOUT, + idle_period=20, + wait_for_exact_units=remaining_units, + ) + + # Wait until all apps are cleaned up + await ops_test.model.wait_for_idle(apps=[app_name], timeout=TIMEOUT, wait_for_exact_units=0) + + for unit_id in unit_ids: + n_units = len(ops_test.model.applications[app_name].units) + await ops_test.model.applications[app_name].add_unit( + count=1, attach_storage=[tag.storage(storage_ids[unit_id])] + ) + await ops_test.model.wait_for_idle( + apps=[app_name], + status="active", + timeout=TIMEOUT, + idle_period=20, + wait_for_exact_units=n_units + 1, + ) + + await ops_test.model.wait_for_idle( + apps=[app_name], + status="active", + timeout=TIMEOUT, + idle_period=20, + wait_for_exact_units=len(unit_ids), + ) + + actual_writes = await count_writes(ops_test, app_name=app_name) + assert writes_results["number"] == actual_writes + + +@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_storage_reuse_new_app_same_name( + ops_test: OpsTest, database_charm, continuous_writes +): + """Tests that we can reuse storage from a different cluster, with the same app name. + + For that, we completely remove the application while keeping the storages, + and then we deploy a new application with storage reuse and check that the + storage has been reused. + """ + app_name = await get_app_name(ops_test) + if storage_type(ops_test, app_name) == "rootfs": + pytest.skip( + "reuse of storage can only be used on deployments with persistent storage not on rootfs deployments" + ) + + writes_results = await stop_continous_writes(ops_test, app_name=app_name) + + time.sleep(70) # Leave some time to write the oplog + lock (60s + a few more) + unit_ids = [unit.name for unit in ops_test.model.applications[app_name].units] + storage_ids = {} + + remaining_units = len(unit_ids) + for unit_id in unit_ids: + storage_ids[unit_id] = storage_id(ops_test, unit_id) + await ops_test.model.applications[app_name].destroy_unit(unit_id) + + remaining_units -= 1 + await ops_test.model.wait_for_idle( + apps=[app_name], + status="active", + timeout=TIMEOUT, + idle_period=20, + wait_for_exact_units=remaining_units, + ) + + # Wait until all apps are cleaned up + await ops_test.model.wait_for_idle(apps=[app_name], timeout=TIMEOUT, wait_for_exact_units=0) + + await ops_test.model.remove_application( + app_name, + block_until_done=True, + destroy_storage=False, + ) + + await ops_test.model.deploy( + database_charm, + application_name=app_name, + num_units=1, + attach_storage=[tag.storage(storage_ids[unit_ids[0]])], + ) + + deployed_units = 1 + await ops_test.model.wait_for_idle( + apps=[app_name], + status="active", + timeout=TIMEOUT, + idle_period=60, + wait_for_exact_units=deployed_units, + ) + + for unit_id in unit_ids[1:]: + deployed_units += 1 + await ops_test.model.applications[app_name].add_unit( + count=1, attach_storage=[tag.storage(storage_ids[unit_id])] + ) + await ops_test.model.wait_for_idle( + apps=[app_name], + status="active", + timeout=TIMEOUT, + idle_period=60, + wait_for_exact_units=deployed_units, + ) + + await ops_test.model.wait_for_idle( + apps=[app_name], + status="active", + timeout=TIMEOUT, + idle_period=120, + wait_for_exact_units=deployed_units, + ) + + assert len(ops_test.model.applications[app_name].units) == len(storage_ids) + + # check if previous volumes are attached to the units of the new cluster + new_storage_ids = [] + for unit in ops_test.model.applications[app_name].units: + new_storage_ids.append(storage_id(ops_test, unit.name)) + + assert sorted(storage_ids.values()) == sorted(new_storage_ids), "Storage IDs mismatch" + + actual_writes = await count_writes(ops_test, app_name=app_name) + assert writes_results["number"] == actual_writes + + +@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) +@pytest.mark.group(1) +@pytest.mark.abort_on_fail +async def test_storage_reuse_new_app_different_name( + ops_test: OpsTest, database_charm, continuous_writes +): + """Tests that we can reuse storage from a different cluster, with the same app name. + + For that, we completely remove the application while keeping the storages, + and then we deploy a new application with storage reuse and check that the + storage has been reused. + """ + app_name = await get_app_name(ops_test) + if storage_type(ops_test, app_name) == "rootfs": + pytest.skip( + "reuse of storage can only be used on deployments with persistent storage not on rootfs deployments" + ) + + writes_results = await stop_continous_writes(ops_test, app_name=app_name) + time.sleep(70) # Leave some time to write the oplog + lock (60s + a few more) + unit_ids = [unit.name for unit in ops_test.model.applications[app_name].units] + storage_ids = {} + + remaining_units = len(unit_ids) + for unit_id in unit_ids: + storage_ids[unit_id] = storage_id(ops_test, unit_id) + await ops_test.model.applications[app_name].destroy_unit(unit_id) + # Give some time to remove the unit. We don't use asyncio.sleep here to + # leave time for each unit to be removed before removing the next one. + # time.sleep(60) + remaining_units -= 1 + await ops_test.model.wait_for_idle( + apps=[app_name], + status="active", + timeout=TIMEOUT, + idle_period=20, + wait_for_exact_units=remaining_units, + ) + + # Wait until all apps are cleaned up + await ops_test.model.wait_for_idle(apps=[app_name], timeout=TIMEOUT, wait_for_exact_units=0) + + await ops_test.model.remove_application(app_name, block_until_done=True, destroy_storage=False) + + new_app_name = OTHER_MONGODB_APP_NAME + + await ops_test.model.deploy( + database_charm, + application_name=new_app_name, + num_units=1, + attach_storage=[tag.storage(storage_ids[unit_ids[0]])], + ) + deployed_units = 1 + await ops_test.model.wait_for_idle( + apps=[new_app_name], + status="active", + timeout=TIMEOUT, + idle_period=60, + wait_for_exact_units=deployed_units, + ) + + for unit_id in unit_ids[1:]: + deployed_units += 1 + await ops_test.model.applications[new_app_name].add_unit( + count=1, attach_storage=[tag.storage(storage_ids[unit_id])] + ) + await ops_test.model.wait_for_idle( + apps=[new_app_name], + status="active", + timeout=TIMEOUT, + idle_period=60, + wait_for_exact_units=deployed_units, + ) + + assert len(ops_test.model.applications[new_app_name].units) == len(storage_ids) + + # check if previous volumes are attached to the units of the new cluster + new_storage_ids = [] + for unit in ops_test.model.applications[new_app_name].units: + new_storage_ids.append(storage_id(ops_test, unit.name)) + + assert sorted(storage_ids.values()) == sorted(new_storage_ids), "Storage IDs mismatch" + + actual_writes = await count_writes(ops_test, app_name=new_app_name) + assert writes_results["number"] == actual_writes diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index e63b3ac43..67c74af36 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -3,7 +3,9 @@ import logging import re +import tempfile import unittest +from pathlib import Path from unittest import mock from unittest.mock import MagicMock, call, patch @@ -39,6 +41,8 @@ class TestCharm(unittest.TestCase): @patch("charm.get_charm_revision") def setUp(self, *unused): self.harness = Harness(MongodbOperatorCharm) + self.test_dir = tempfile.NamedTemporaryFile() + self.addCleanup(self.harness.cleanup) self.harness.begin() self.peer_rel_id = self.harness.add_relation("database-peers", "database-peers") @@ -188,6 +192,51 @@ def test_install_snap_packages_failure(self, _call, snap_cache, update_mongod_se self.harness.charm.on.install.emit() self.assertTrue(isinstance(self.harness.charm.unit.status, BlockedStatus)) + @patch_network_get(private_address="1.1.1.1") + @patch("charm.update_mongod_service") + @patch("charm.Config") + @patch("charm.LockHashHandler") + def test_install_fix_db(self, mock_lock, mock_config, update_mongod_service): + """Test verifies that if we come from a different cluster, we call mongodb fix method.""" + mock_config.MONGOD_CONF_FILE_PATH = Path(self.test_dir.name) + + def is_replication_mock_call(*args): + return args == ("replication",) + + update_mongod_service.return_value = None + self.harness.charm.is_role = is_replication_mock_call + self.harness.charm.install_snap_packages = mock.Mock() + self.harness.charm._fix_mongodb_for_reuse = mock.Mock(side_effect=Exception) + mock_lock.__get__ = mock.Mock(return_value="deadbeef") + # We need to patch the type and not the object because of how mocks are stored. + # (see https://docs.python.org/3/library/unittest.mock.html#unittest.mock.PropertyMock) + type(self.harness.charm).lock_hash = mock_lock + self.harness.charm.get_secret = mock.Mock(return_value="feeddead") + + with self.assertRaises(Exception): + self.harness.charm.on.install.emit() + self.harness.charm._fix_mongodb_for_reuse.assert_called() + + @parameterized.expand( + [ + ["deadbeef", "deadbeef", False], + ["deadbeef", "feeddead", True], + ["UNDEFINED", "deadbeef", False], + ["UNDEFINED", "UNDEFINED", False], + ] + ) + @patch_network_get(private_address="1.1.1.1") + @patch("charm.LockHashHandler") + def test_is_storage_from_different_cluster(self, lock_hash, db_lock_hash, expected, mock_lock): + """Test verifies that the `_is_storage_from_different_cluster` property works.""" + mock_lock.__get__ = mock.Mock(return_value=lock_hash) + # We need to patch the type and not the object because of how mocks are stored. + # (see https://docs.python.org/3/library/unittest.mock.html#unittest.mock.PropertyMock) + type(self.harness.charm).lock_hash = mock_lock + self.harness.charm.get_secret = mock.Mock(return_value=db_lock_hash) + assert self.harness.charm.lock_hash == lock_hash + assert self.harness.charm._is_storage_from_different_cluster == expected + @patch_network_get(private_address="1.1.1.1") def test_app_hosts(self): rel_id = self.harness.charm.model.get_relation("database-peers").id @@ -224,6 +273,10 @@ def test_mongodb_relation_joined_all_replicas_not_ready( self.harness.set_leader(True) self.harness.charm.app_peer_data["db_initialised"] = "true" connection.return_value.__enter__.return_value.is_ready = False + connection.return_value.__enter__.return_value.get_replset_members_and_version.return_value = ( + {"1.1.1.1:27017"}, + 1, + ) connection.return_value.__enter__.return_value.get_replset_members.return_value = { "1.1.1.1" } @@ -258,6 +311,10 @@ def test_relation_joined_get_members_failure( self.harness.set_leader(True) self.harness.charm.app_peer_data["db_initialised"] = "true" rel = self.harness.charm.model.get_relation("database-peers") + connection.return_value.__enter__.return_value.get_replset_members_and_version.return_value = ( + {"1.1.1.1:27017"}, + 1, + ) for exception in PYMONGO_EXCEPTIONS: connection.return_value.__enter__.return_value.get_replset_members.side_effect = ( @@ -294,6 +351,10 @@ def test_reconfigure_add_member_failure(self, _, rev, local, is_local, connectio # presets self.harness.set_leader(True) self.harness.charm.app_peer_data["db_initialised"] = "true" + connection.return_value.__enter__.return_value.get_replset_members_and_version.return_value = ( + {"1.1.1.1:27017"}, + 1, + ) connection.return_value.__enter__.return_value.get_replset_members.return_value = { "1.1.1.1" } @@ -372,6 +433,10 @@ def test_update_status_mongodb_error( self.harness.set_leader(True) self.harness.charm.app_peer_data["db_initialised"] = "true" connection.return_value.__enter__.return_value.is_ready = True + connection.return_value.__enter__.return_value.get_replset_members_and_version.return_value = ( + {"1.1.1.1:27017"}, + 1, + ) pbm_statuses = [ ActiveStatus("pbm"), @@ -418,6 +483,10 @@ def test_update_status_pbm_error( self.harness.set_leader(True) self.harness.charm.app_peer_data["db_initialised"] = "true" connection.return_value.__enter__.return_value.is_ready = True + connection.return_value.__enter__.return_value.get_replset_members_and_version.return_value = ( + {"1.1.1.1:27017"}, + 1, + ) pbm_statuses = [ BlockedStatus("pbm"), @@ -459,6 +528,10 @@ def test_update_status_pbm_and_mongodb_ready( self.harness.set_leader(True) self.harness.charm.app_peer_data["db_initialised"] = "true" connection.return_value.__enter__.return_value.is_ready = True + connection.return_value.__enter__.return_value.get_replset_members_and_version.return_value = ( + {"1.1.1.1:27017"}, + 1, + ) self.harness.add_relation(S3_RELATION_NAME, "s3-integrator") @@ -492,6 +565,10 @@ def test_update_status_no_s3( self.harness.set_leader(True) self.harness.charm.app_peer_data["db_initialised"] = "true" connection.return_value.__enter__.return_value.is_ready = True + connection.return_value.__enter__.return_value.get_replset_members_and_version.return_value = ( + {"1.1.1.1:27017"}, + 1, + ) has_backup_service.return_value = True get_mongodb_status.return_value = ActiveStatus("mongodb") @@ -524,6 +601,10 @@ def test_update_status_primary( self.harness.set_leader(False) connection.return_value.__enter__.return_value.is_ready = True + connection.return_value.__enter__.return_value.get_replset_members_and_version.return_value = ( + {"1.1.1.1:27017"}, + 1, + ) status_connection.return_value.__enter__.return_value.get_replset_status.return_value = { "1.1.1.1": "PRIMARY" } @@ -556,6 +637,10 @@ def test_update_status_secondary( self.harness.set_leader(False) connection.return_value.__enter__.return_value.is_ready = True + connection.return_value.__enter__.return_value.get_replset_members_and_version.return_value = ( + {"1.1.1.1:27017"}, + 1, + ) status_connection.return_value.__enter__.return_value.get_replset_status.return_value = { "1.1.1.1": "SECONDARY" } @@ -589,6 +674,10 @@ def test_update_status_additional_messages( # Case 1: Unit has not been added to replica set yet self.harness.set_leader(False) connection.return_value.__enter__.return_value.is_ready = True + connection.return_value.__enter__.return_value.get_replset_members_and_version.return_value = ( + {"1.1.1.1:27017"}, + 1, + ) status_connection.return_value.__enter__.return_value.get_replset_status.return_value = {} self.harness.charm.on.update_status.emit() self.assertEqual(self.harness.charm.unit.status, WaitingStatus("Member being added..")) @@ -627,6 +716,10 @@ def test_update_status_not_ready( """Tests that if mongod is not running on this unit it restarts it.""" get_secret.return_value = "pass123" connection.return_value.__enter__.return_value.is_ready = False + connection.return_value.__enter__.return_value.get_replset_members_and_version.return_value = ( + {"1.1.1.1:27017"}, + 1, + ) self.harness.charm.app_peer_data["db_initialised"] = "true" self.harness.charm.on.update_status.emit()