Skip to content

Commit

Permalink
Add hacluster integration
Browse files Browse the repository at this point in the history
  • Loading branch information
George Kraft committed Nov 17, 2023
1 parent ca2e882 commit acecc84
Show file tree
Hide file tree
Showing 6 changed files with 214 additions and 4 deletions.
13 changes: 13 additions & 0 deletions config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,19 @@ options:
description: |
Space-separated list of extra SAN entries to add to the x509 certificate
created for the control plane nodes.
ha-cluster-vip:
type: string
description: |
Virtual IP for the charm to use with the HA Cluster subordinate charm
Mutually exclusive with ha-cluster-dns. Multiple virtual IPs are
separated by spaces.
default: ""
ha-cluster-dns:
type: string
description: |
DNS entry to use with the HA Cluster subordinate charm.
Mutually exclusive with ha-cluster-vip.
default: ""
image-registry:
type: string
default: "rocks.canonical.com:443/cdk"
Expand Down
2 changes: 2 additions & 0 deletions metadata.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ requires:
interface: kube-dns
etcd:
interface: etcd
ha:
interface: hacluster
loadbalancer-external:
# Indicates that the LB should be public facing. Intended for clients which
# must reach the API server via external networks.
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ charm-lib-node-base @ git+https://github.com/charmed-kubernetes/layer-kubernetes
charm-lib-reconciler @ git+https://github.com/charmed-kubernetes/charm-lib-reconciler
cosl == 0.0.7
gunicorn >= 20.0.0,<21.0.0
interface_hacluster @ git+https://github.com/openstack/charm-interface-hacluster
jinja2
loadbalancer_interface
ops >= 2.2.0
Expand Down
33 changes: 32 additions & 1 deletion src/charm.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,11 @@
from charms.node_base import LabelMaker
from charms.reconciler import Reconciler
from cos_integration import COSIntegration
from hacluster import HACluster
from k8s_api_endpoints import K8sApiEndpoints
from kubectl import kubectl
from loadbalancer_interface import LBProvider
from ops import BlockedStatus, ModelError, WaitingStatus
from ops import BlockedStatus, MaintenanceStatus, ModelError, WaitingStatus
from ops.interface_kube_control import KubeControlProvides
from ops.interface_tls_certificates import CertificatesRequires

Expand Down Expand Up @@ -64,6 +65,7 @@ def __init__(self, *args):
)
self.etcd = EtcdReactiveRequires(self)
self.node_base = LabelMaker(self, kubeconfig_path="/root/.kube/config")
self.hacluster = HACluster(self, self.config)
self.k8s_api_endpoints = K8sApiEndpoints(self)
self.kube_control = KubeControlProvides(self, endpoint="kube-control")
self.kube_dns = KubeDnsRequires(self, endpoint="dns-provider")
Expand All @@ -72,6 +74,7 @@ def __init__(self, *args):
self.external_cloud_provider = ExternalCloudProvider(self, "external-cloud-provider")
self.reconciler = Reconciler(self, self.reconcile)
self.tokens = TokensProvider(self, endpoint="tokens")
self.framework.observe(self.on.update_status, self.update_status)

def api_dependencies_ready(self):
common_name = kubernetes_snaps.get_public_address()
Expand Down Expand Up @@ -143,6 +146,17 @@ def configure_controller_manager(self):
external_cloud_provider=self.external_cloud_provider,
)

def configure_hacluster(self):
if self.hacluster.is_ready:
status.add(MaintenanceStatus("Configuring HACluster"))
self.hacluster.update_vips()
self.hacluster.configure_hacluster()
# Note that we do not register any systemd services with HACluster.
# We used to register the Kubernetes control plane services, but
# that meant Pacemaker would take over managing the services, and
# often would not start them when it should. Long history of bugs
# there.

def configure_kernel_parameters(self):
sysctl = yaml.safe_load(self.model.config["sysctl"])
kubernetes_snaps.configure_kernel_parameters(sysctl)
Expand Down Expand Up @@ -429,6 +443,7 @@ def reconcile(self, event):
self.configure_kubelet()
self.configure_kube_proxy()
self.configure_kube_control()
self.configure_hacluster()
self.generate_tokens()
self.configure_observability()

Expand All @@ -443,6 +458,11 @@ def request_certificates(self):

bind_addrs = kubernetes_snaps.get_bind_addresses()
common_name = kubernetes_snaps.get_public_address()
config_addrs = [
address
for option in ["loadbalancer-ips", "ha-cluster-vip", "ha-cluster-dns"]
for address in self.config[option].split()
]
domain = self.get_dns_domain()
extra_sans = self.config["extra_sans"].split()
k8s_service_addrs = kubernetes_snaps.get_kubernetes_service_addresses(
Expand All @@ -464,6 +484,7 @@ def request_certificates(self):
f"kubernetes.default.svc.{domain}",
]
sans += bind_addrs
sans += config_addrs
sans += ingress_addrs
sans += k8s_service_addrs
sans += extra_sans
Expand All @@ -472,6 +493,16 @@ def request_certificates(self):
self.certificates.request_client_cert("system:kube-apiserver")
self.certificates.request_server_cert(cn=common_name, sans=sans)

def update_status(self, event):
if self.hacluster.is_ready:
apiserver_running = (
subprocess.call(["systemctl", "is-active", "snap.kube-apiserver.daemon"]) == 0
)
if apiserver_running:
self.hacluster.set_node_online()
else:
self.hacluster.set_node_standby()

def write_service_account_key(self):
peer_relation = self.model.get_relation("peer")
key = peer_relation.data[self.app].get("service-account-key")
Expand Down
152 changes: 152 additions & 0 deletions src/hacluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
"""HACluster integration module."""

import logging
import subprocess
from typing import List, Optional

import ops
from cached_property import cached_property
from interface_hacluster.ops_ha_interface import HAServiceRequires
from ops.framework import Object, StoredState
from ops.model import Relation

log = logging.getLogger(__name__)


class HAClusterConfigMismatchError(Exception):
"""A custom exception to represent a HA cluster config conflict."""

def __init__(self, message):
super().__init__(message)
self.message = message


class HACluster(Object):
"""A class for integrate HA in the charm."""

state = StoredState()

def __init__(self, charm: ops.CharmBase, config, endpoint="ha"):
super().__init__(charm, f"relation-{endpoint}")
self.charm = charm
self.endpoint = endpoint
self.config = config
self.interface = HAServiceRequires(self.charm, endpoint)

self.state.set_default(
current_services={}, desired_services={}, deleted_services={}, vips=set(), dns=set()
)

def _configure_dns(self, dns_records: List[str]):
binding = self.charm.model.get_binding(self.endpoint)
address = binding.network.ingress_address
for dns_record in dns_records:
self.interface.add_dnsha(self._unit_name, address, dns_record, "public")

self.state.dns = set(dns_records)

def _configure_vips(self, vips: List[str]):
for vip in vips:
self.interface.add_vip(self._unit_name, vip)
self.state.vips = set(vips)

@cached_property
def _unit_name(self):
"""Return the name of the unit."""
return self.charm.unit.name.split("/")[0]

def _update_services(self):
"""Update the systemd services."""
current_services = self.state.current_services
deleted_services = self.state.deleted_services
desired_services = self.state.desired_services

for name, service in deleted_services.items():
self.interface.remove_systemd_service(name, service)

for name, service in desired_services.items():
self.interface.add_systemd_service(name, service)
current_services[name] = service

deleted_services.clear()
desired_services.clear()

def add_service(self, name, service_name):
"""Add a service to the desired services in the HA cluster.
Args:
name (str): The key name of the service.
service_name (str): The name of the service to be added.
"""
current_services = self.state.current_services
if name not in current_services:
self.state.desired_services[name] = service_name

def configure_hacluster(self):
"""Configure the HACluster relation with VIPs of DNS records."""
vips = self.config.get("ha-cluster-vip").split()
dns_records = self.config.get("ha-cluster-dns").split()
if vips and dns_records:
msg = "Unsupported config. ha-cluster-vip and ha-cluster-dns cannot both be set."
log.warning(msg)
raise HAClusterConfigMismatchError(msg)
if vips:
self._configure_vips(vips)
elif dns_records:
self._configure_dns(dns_records)

self._update_services()

self.interface.bind_resources()

@property
def is_ready(self):
"""Check if the HACluster integration is ready.
Returns:
bool: True if the HACluster relation is ready, False otherwise.
"""
if self.relation and self.relation.units:
return True
return False

@property
def relation(self) -> Optional[Relation]:
"""Get the HACluster relation."""
return self.model.get_relation(self.endpoint)

def remove_service(self, name, service_name):
"""Remove a service from the desired services in the HA cluster.
Args:
name (str): The key name of the service.
service_name (str): The name of the service to be removed.
"""
current_services = self.state.current_services
deleted_services = self.state.deleted_services
desired_services = self.state.desired_services

if name in current_services:
deleted_services[name] = service_name

if name in desired_services:
del desired_services[name]

def set_node_online(self):
"""Set pacemaker node to online."""
log.info("Setting pacemaker node status to online")
subprocess.check_call(["crm", "-w", "-F", "node", "online"])

def set_node_standby(self):
"""Set pacemaker node to standby, forcing VIPs to failover to other nodes."""
log.warning("Setting pacemaker node status to standby")
subprocess.check_call(["crm", "-w", "-F", "node", "standby"])

def update_vips(self):
"""Update the Virtual IP addresses for the HACluster relation."""
original_vips = self.state.vips
new_vips = set(self.config.get("ha-cluster-vip").split())
old_vips = original_vips - new_vips

for vip in old_vips:
self.interface.remove_vip(self._unit_name, vip)
17 changes: 14 additions & 3 deletions src/k8s_api_endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,24 @@ def __init__(self, charm):
self.charm = charm

def from_config(self) -> Optional[str]:
"""Endpoint URLs from the loadbalancer-ips config option.
"""Endpoint URL from charm configuration.
Usually an IP address. Could be a domain name.
If the loadbalancer-ips config option is set, use that first.
Otherwise, if we are integrated with hacluster, then build an endpoint
from the ha-cluster-vip or ha-cluster-dns configs.
"""
addresses = self.charm.model.config["loadbalancer-ips"].split()
addresses = self.charm.config["loadbalancer-ips"].split()
if addresses:
return build_url(addresses[0])
return build_url(addresses[0], 6443)

if self.charm.hacluster.is_ready:
for key in ["ha-cluster-vip", "ha-cluster-dns"]:
addresses = self.charm.config[key].split()
if addresses:
return build_url(addresses[0], 6443)

def from_lb_external(self) -> Optional[str]:
"""Endpoint URL from the loadbalancer-external relation."""
Expand Down

0 comments on commit acecc84

Please sign in to comment.