Skip to content

Commit

Permalink
DPE-2485 Resolve race condition when restarting after configure_insta…
Browse files Browse the repository at this point in the history
…nce (#321)

* Ensure mysqld PID changes when restarting during configure_instance

* Fix + add unit tests

* Update cos_agent charm lib to v0.6 + address PR feedback

* Update mysql charm lib patch to v0.48
  • Loading branch information
shayancanonical authored Sep 7, 2023
1 parent eb3c315 commit f0abc40
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 7 deletions.
27 changes: 25 additions & 2 deletions lib/charms/mysql/v0/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def wait_until_mysql_connection(self) -> None:

# Increment this PATCH version before using `charmcraft publish-lib` or reset
# to 0 if you are raising the major API version
LIBPATCH = 47
LIBPATCH = 48

UNIT_TEARDOWN_LOCKNAME = "unit-teardown"
UNIT_ADD_LOCKNAME = "unit-add"
Expand Down Expand Up @@ -2501,6 +2501,29 @@ def kill_unencrypted_sessions(self) -> None:
logger.exception("Failed to kill external sessions")
raise MySQLKillSessionError

def check_mysqlsh_connection(self) -> bool:
"""Checks if it is possible to connect to the server with mysqlsh."""
connect_commands = (
f"shell.connect('{self.server_config_user}:{self.server_config_password}@{self.instance_address}')",
'session.run_sql("SELECT 1")',
)

try:
self._run_mysqlsh_script("\n".join(connect_commands))
return True
except MySQLClientError:
return False

def get_pid_of_port_3306(self) -> Optional[str]:
"""Retrieves the PID of the process that is bound to port 3306."""
get_pid_command = ["fuser", "3306/tcp"]

try:
stdout, _ = self._execute_commands(get_pid_command)
return stdout
except MySQLExecError:
return None

@abstractmethod
def is_mysqld_running(self) -> bool:
"""Returns whether mysqld is running."""
Expand All @@ -2527,7 +2550,7 @@ def restart_mysql_exporter(self) -> None:
raise NotImplementedError

@abstractmethod
def wait_until_mysql_connection(self) -> None:
def wait_until_mysql_connection(self, check_port: bool = True) -> None:
"""Wait until a connection to MySQL has been obtained.
Implemented in subclasses, test for socket file existence.
Expand Down
23 changes: 23 additions & 0 deletions src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from charms.grafana_agent.v0.cos_agent import COSAgentProvider
from charms.mysql.v0.backups import MySQLBackups
from charms.mysql.v0.mysql import (
Error,
MySQLAddInstanceToClusterError,
MySQLCharmBase,
MySQLConfigureInstanceError,
Expand Down Expand Up @@ -47,8 +48,10 @@
RetryError,
Retrying,
retry_if_exception_type,
stop_after_attempt,
stop_after_delay,
wait_exponential,
wait_fixed,
)

from constants import (
Expand Down Expand Up @@ -92,6 +95,10 @@
logger = logging.getLogger(__name__)


class MySQLDNotRestartedError(Error):
"""Exception raised when MySQLD is not restarted after configuring instance."""


class MySQLOperatorCharm(MySQLCharmBase):
"""Operator framework charm for MySQL."""

Expand Down Expand Up @@ -213,6 +220,9 @@ def _on_start(self, event: StartEvent) -> None:
except MySQLCreateCustomMySQLDConfigError:
self.unit.status = BlockedStatus("Failed to create custom mysqld config")
return
except MySQLDNotRestartedError:
self.unit.status = BlockedStatus("Failed to restart mysqld after configuring instance")
return
except MySQLGetMySQLVersionError:
logger.debug("Fail to get MySQL version")

Expand Down Expand Up @@ -510,8 +520,21 @@ def _workload_initialise(self) -> None:
self._mysql.write_mysqld_config(profile=self.config["profile"])
self._mysql.reset_root_password_and_start_mysqld()
self._mysql.configure_mysql_users()

current_mysqld_pid = self._mysql.get_pid_of_port_3306()
self._mysql.configure_instance()

for attempt in Retrying(wait=wait_fixed(30), stop=stop_after_attempt(20), reraise=True):
with attempt:
new_mysqld_pid = self._mysql.get_pid_of_port_3306()
if not new_mysqld_pid:
raise MySQLDNotRestartedError("mysqld process not yet up after restart")

if current_mysqld_pid == new_mysqld_pid:
raise MySQLDNotRestartedError("mysqld not yet shutdown")

self._mysql.wait_until_mysql_connection()

self.unit_peer_data["unit-configured"] = "True"
self.unit_peer_data["instance-hostname"] = f"{instance_hostname()}:3306"
if workload_version := self._mysql.get_mysql_version():
Expand Down
10 changes: 8 additions & 2 deletions src/mysql_vm_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,19 +289,25 @@ def reset_root_password_and_start_mysqld(self) -> None:
raise MySQLResetRootPasswordAndStartMySQLDError("Failed to restart mysqld")

try:
self.wait_until_mysql_connection()
# Do not try to connect over port as we may not have configured user/passwords
self.wait_until_mysql_connection(check_port=False)
except MySQLServiceNotRunningError:
raise MySQLResetRootPasswordAndStartMySQLDError("mysqld service not running")

@retry(reraise=True, stop=stop_after_delay(120), wait=wait_fixed(5))
def wait_until_mysql_connection(self) -> None:
def wait_until_mysql_connection(self, check_port: bool = True) -> None:
"""Wait until a connection to MySQL has been obtained.
Retry every 5 seconds for 120 seconds if there is an issue obtaining a connection.
"""
logger.debug("Waiting for MySQL connection")

if not os.path.exists(MYSQLD_SOCK_FILE):
raise MySQLServiceNotRunningError("MySQL socket file not found")

if check_port and not self.check_mysqlsh_connection():
raise MySQLServiceNotRunningError("Connection with mysqlsh not possible")

logger.debug("MySQL connection possible")

def execute_backup_commands(
Expand Down
20 changes: 17 additions & 3 deletions tests/unit/test_charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,12 @@ def test_on_config_changed_sets_random_cluster_name_in_peer_databag(self):
@patch("mysql_vm_helpers.MySQL.initialize_juju_units_operations_table")
@patch("mysql_vm_helpers.MySQL.create_cluster")
@patch("mysql_vm_helpers.MySQL.reset_root_password_and_start_mysqld")
@patch("mysql_vm_helpers.MySQL.get_pid_of_port_3306", side_effect=["1234", "5678"])
@patch("mysql_vm_helpers.MySQL.write_mysqld_config")
def test_on_start(
self,
_create_custom_mysqld_config,
_write_mysqld_config,
_get_pid_of_port_3306,
_reset_root_password_and_start_mysqld,
_create_cluster,
_initialize_juju_units_operations_table,
Expand Down Expand Up @@ -178,10 +180,12 @@ def test_on_start(
@patch("mysql_vm_helpers.MySQL.initialize_juju_units_operations_table")
@patch("mysql_vm_helpers.MySQL.create_cluster")
@patch("mysql_vm_helpers.MySQL.reset_root_password_and_start_mysqld")
@patch("mysql_vm_helpers.MySQL.get_pid_of_port_3306")
@patch("mysql_vm_helpers.MySQL.write_mysqld_config")
def test_on_start_exceptions(
self,
_create_custom_mysqld_config,
_write_mysqld_config,
_get_pid_of_port_3306,
_reset_root_password_and_start_mysqld,
_create_cluster,
_initialize_juju_units_operations_table,
Expand All @@ -191,6 +195,8 @@ def test_on_start_exceptions(
_check_call,
_stop_mysqld,
):
patch("tenacity.BaseRetrying.wait", side_effect=lambda *args, **kwargs: 0)

# execute on_leader_elected and config_changed to populate the peer databag
self.harness.set_leader(True)
self.charm.on.config_changed.emit()
Expand All @@ -211,6 +217,14 @@ def test_on_start_exceptions(

_configure_instance.reset_mock()

# test mysqld not restarting after configure instance
_get_pid_of_port_3306.side_effect = ["1234", "1234"]

self.charm.on.start.emit()
self.assertTrue(isinstance(self.harness.model.unit.status, BlockedStatus))

_get_pid_of_port_3306.reset_mock()

# test an exception initializing the mysql.juju_units_operations table
_initialize_juju_units_operations_table.side_effect = (
MySQLInitializeJujuOperationsTableError
Expand All @@ -236,7 +250,7 @@ def test_on_start_exceptions(
self.assertTrue(isinstance(self.harness.model.unit.status, BlockedStatus))

# test an exception creating a custom mysqld config
_create_custom_mysqld_config.side_effect = MySQLCreateCustomMySQLDConfigError
_write_mysqld_config.side_effect = MySQLCreateCustomMySQLDConfigError

self.charm.on.start.emit()
self.assertTrue(isinstance(self.harness.model.unit.status, BlockedStatus))
Expand Down

0 comments on commit f0abc40

Please sign in to comment.