diff --git a/actions.yaml b/actions.yaml index a682e8e7..19053efb 100644 --- a/actions.yaml +++ b/actions.yaml @@ -1,3 +1,38 @@ +cis-benchmark: + description: |- + Run the CIS Kubernetes Benchmark against snap-based components. + params: + apply: + type: string + default": none + description: |- + Apply remedies to address benchmark failures. The default, 'none', + will not attempt to fix any reported failures. Set to 'conservative' + to resolve simple failures. Set to 'dangerous' to attempt to resolve + all failures. + + Note: Applying any remediation may result in an unusable cluster. + config: + type: string + default: https://github.com/charmed-kubernetes/kube-bench-config/archive/cis-1.23.zip#sha1=3cda2fc68b4ca36f69f5913bfc0b02576e7a3b3d + description: |- + Archive containing configuration files to use when running kube-bench. + The default value is known to be compatible with snap components. When + using a custom URL, append '#=' to verify the + archive integrity when downloaded. + release: + type: string + default: https://github.com/aquasecurity/kube-bench/releases/download/v0.6.8/kube-bench_0.6.8_linux_amd64.tar.gz#sha256=5f9c5231949bd022a6993f5297cc05bb80a1b7c36a43cefed0a8c8af26778863 + description: |- + Archive containing the 'kube-bench' binary to run. The default value + points to a stable upstream release. When using a custom URL, append + '#=' to verify the archive integrity when + downloaded. + + This may also be set to the special keyword 'upstream'. In this case, + the action will compile and use a local kube-bench binary built from + the default branch of the upstream repository: + https://github.com/aquasecurity/kube-bench get-kubeconfig: description: Retrieve Kubernetes cluster config, including credentials upgrade: diff --git a/src/actions/csi_benchmark.py b/src/actions/csi_benchmark.py new file mode 100644 index 00000000..d6837339 --- /dev/null +++ b/src/actions/csi_benchmark.py @@ -0,0 +1,372 @@ +import contextlib +import dataclasses +import json +import logging +import os +import shlex +import shutil +import subprocess +import tempfile +from pathlib import Path +from typing import Optional + +import ops +from charmhelpers.fetch.archiveurl import ArchiveUrlFetchHandler + +log = logging.getLogger(__name__) + +BENCH_HOME = Path("/home/ubuntu/kube-bench") +BENCH_BIN = BENCH_HOME / "kube-bench" +BENCH_CFG = BENCH_HOME / "cfg-ck" +GO_PKG = "github.com/aquasecurity/kube-bench" +RESULTS_DIR = "/home/ubuntu/kube-bench-results" + +# Remediation dicts associate a failing test with a Remedy to fix it. +# Conservative fixes will probably leave the cluster in a good state. +# Dangerous fixes will likely break the cluster. +# Tuple examples: +# {'1.2.3': Remedy('manual -- we don't know how to auto fix this', None, None)} +# {'1.2.3': Remedy('cli', 'command to run', None)} +# {'1.2.3': Remedy('kv', 'snap', {cfg_key: value})} + + +@dataclasses.dataclass +class Remedy: + """Remedy class for benchmarking.""" + + type: str + command: Optional[str] + config: Optional[dict] = None + + def run(self, test_num: int, test_remediation: str) -> int: + """Run the remedy command.""" + if self.type == "manual": + log.info( + "Test %s: unable to auto-apply remedy.\n" "Manual steps:\n%s", + test_num, + test_remediation, + ) + elif self.type == "cli": + cmd = shlex.split(self.command) + try: + out = subprocess.check_output(cmd) + except subprocess.CalledProcessError: + raise ActionError(f"Test {test_num}: failed to run: {cmd}") + else: + log.info("Test %s: applied remedy: %s\nOutput: %s", test_num, cmd, out) + return 1 + + +CONSERVATIVE = { + "0.0.0": Remedy("cli", 'echo "this is fine"', None), + # etcd (no known failures with a default install) + # k8s-control-plane (no known failures with a default install) + # k8s-worker (no known failures with a default install) +} +ADMISSION_PLUGINS = { + "enable-admission-plugins": ( + "PersistentVolumeLabel", + "PodSecurityPolicy," "AlwaysPullImages", + "NodeRestriction", + ) +} +DANGEROUS = { + "0.0.0": Remedy("cli", 'echo "this is fine"', None), + # etcd (no known warnings with a default install) + # k8s-control-plane + "1.1.21": Remedy("cli", "chmod -R 600 /root/cdk/*.key", None), + "1.2.9": Remedy("manual", None, None), + "1.2.11": Remedy("kv", "kube-apiserver", ADMISSION_PLUGINS), + "1.2.25": Remedy("manual", None, None), + "1.2.33": Remedy("manual", None, None), + "1.2.34": Remedy("manual", None, None), + # k8s-worker + "4.2.9": Remedy("kv", "kubelet", {"event-qps": 0}), + "4.2.13": Remedy( + "kv", + "kubelet", + { + "tls-cipher-suites": "TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256," + "TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256," + "TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305," + "TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384," + "TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305," + "TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384," + "TLS_RSA_WITH_AES_256_GCM_SHA384," + "TLS_RSA_WITH_AES_128_GCM_SHA256" + }, + ), +} + + +class ActionError(Exception): + """Exception raised when an action fails.""" + + +@contextlib.contextmanager +def _action_handler(event: ops.ActionEvent): + """Context manager to handle action events.""" + try: + yield event + except ActionError as e: + event.fail(str(e)) + + +def _move_matching_parent(dirpath, filename, dest): + """Move a parent directory that contains a specific file. + + Helper function that walks a directory looking for a given file. If found, + the file's parent directory is moved to the given destination. + + :param: dirpath: String path to search + :param: filename: String file to find + :param: dest: String destination of the found parent directory + """ + for root, _, files in os.walk(dirpath): + for name in files: + if name == filename: + log.info("Moving %s to %s", root, dest) + shutil.move(root, dest) + return + else: + raise ActionError(f"Could not find {filename} in {dirpath}") + + +class CSIBenchmark(ops.Object): + """Action class for CIS benchmarking.""" + + stored = ops.StoredState() + + def __init__(self, charm: ops.CharmBase): + super().__init__(charm, "cis-benchmark") + self.charm = charm + self.framework.observe(charm.on.cis_benchmark_action, self._on_cis_benchmark) + self.stored.set_default(service_args={}) + + def _restart_charm(self, event: ops.ActionEvent): + """Set charm-specific flags and call reactive.main().""" + log.info("Reconcile charm") + self.charm.reconciler.reconcile(event) + + def install(self, release, config): + """Install kube-bench and related configuration. + + Release and configuration are set via action params. If installing an + upstream release, this method will also install 'go' if needed. + + :param: release: Archive URI or 'upstream' + :param: config: Archive URI of configuration files + """ + if BENCH_HOME.exists(): + shutil.rmtree(BENCH_HOME) + fetcher = ArchiveUrlFetchHandler() + + if release == "upstream": + BENCH_HOME.mkdir(parents=True, exist_ok=True) + + # Setup the 'go' environment + env = os.environ.copy() + go_bin = shutil.which("go", path=f'{env["PATH"]}:/snap/bin') + if not go_bin: + try: + cmd = ["snap", "install", "go", "--channel=stable", "--classic"] + subprocess.check_call(cmd) + go_bin = "/snap/bin/go" + except subprocess.CalledProcessError: + raise ActionError("Failed to install 'go' snap") + go_cache = os.getenv("GOCACHE", "/var/snap/go/common/cache") + go_path = os.getenv("GOPATH", "/var/snap/go/common") + env["GOCACHE"] = go_cache + env["GOPATH"] = go_path + Path(go_path).mkdir(parents=True, exist_ok=True) + + # From https://github.com/aquasecurity/kube-bench#installing-from-sources + go_cmd = [go_bin, "get", GO_PKG, "github.com/golang/dep/cmd/dep"] + try: + subprocess.check_call(go_cmd, cwd=go_path, env=env) + except subprocess.CalledProcessError: + raise ActionError(f"Failed to run: {go_cmd}") + + go_cmd = [go_bin, "build", "-o", BENCH_BIN, f"{go_path}/src/{GO_PKG}"] + try: + subprocess.check_call(go_cmd, cwd=go_path, env=env) + except subprocess.CalledProcessError: + raise ActionError(f"Failed to run: {go_cmd}") + else: + # Fetch the release URI and put it in the right place. + archive_path = fetcher.install(source=release) + # NB: We may not know the structure of the archive, but we know the + # directory containing 'kube-bench' belongs in our BENCH_HOME. + _move_matching_parent(dirpath=archive_path, filename="kube-bench", dest=BENCH_HOME) + + # Fetch the config URI and put it in the right place. + archive_dir = fetcher.install(source=config) + # NB: We may not know the structure of the archive, but we know the + # directory containing 'config.yaml' belongs in our BENCH_CFG. + _move_matching_parent(dirpath=archive_dir, filename="config.yaml", dest=BENCH_CFG) + + def apply(self, event: ops.ActionEvent, remediations=None): + """Apply remediations to address benchmark failures. + + :param: remediations: either 'conservative' or 'dangerous' + """ + applied_fixes = 0 + danger = True if remediations == "dangerous" else False + + json_log = self.report(event, log_format="json") + log.info("Loading JSON from: %s", json_log) + try: + with open(json_log, "r") as f: + full_json: dict = json.load(f) + except Exception: + raise ActionError(f"Failed to load: {json_log}") + + full_json = full_json.get("Controls")[0] if "Controls" in full_json else full_json + for test in full_json.get("tests", {}): + for result in test.get("results", {}): + test_num = result.get("test_number") + test_remediation = result.get("remediation") + test_status = result.get("status", "") + + if test_status.lower() in ("fail", "warn"): + test_remedy = CONSERVATIVE.get(test_num) + if not test_remedy and danger: + # no conservative remedy, check dangerous if user wants + test_remedy = DANGEROUS.get(test_num) + if test_remedy and test_remedy.type in ["cli", "manual"]: + applied_fixes += test_remedy.run(test_num, test_remediation) + elif test_remedy and test_remedy.type == "kv": + cfg = self.stored.service_args.get(test_remedy.command, {}) + cfg.update(test_remedy.config) + self.stored.service_args[test_remedy.command] = cfg + + log.info("Test %s: updated configuration: %s", test_num, cfg) + applied_fixes += 1 + else: + log.info("Test %s: remediation is missing", test_num) + + # CLI and KV changes will require a charm restart; do it. + if applied_fixes > 0: + self._restart_charm(event) + + msg = f'Applied {applied_fixes} remediations. Re-run with "apply=none" to generate a new report.' + event.set_results({"summary": msg}) + + def reset(self, event: ops.ActionEvent): + """Reset any remediations we applied to storedstate. + + This action does not track individual remediations to reset. Therefore, + this function unconditionally unsets all 'cis-' prefixed arguments that + this action may have set and restarts the relevant charm. + """ + self.stored.service_args["kube-apiserver"] = {} + self.stored.service_args["kube-scheduler"] = {} + self.stored.service_args["kube-controller-manager"] = {} + self.stored.service_args["kubelet"] = {} + self._restart_charm(event) + + event.set_results( + { + "summary": ( + "Reset is complete. Re-run with " '"apply=none" to generate a new report.' + ) + } + ) + + def craft_extra_args(self, service: str, args: dict): + """Craft a dict of extra args for a given service.""" + cis_args = self.stored.service_args.get(service) or {} + return dict(**args, **cis_args) + + def report(self, event: ops.ActionEvent, log_format="text"): + """Run kube-bench and report results. + + By default, save the full plain-text results to our RESULTS_DIR and set + action output with a summary. This function can also save full results in + a machine-friendly json format. + + :param: log_format: String determines if output is text or json + :returns: Path to results log + """ + Path(RESULTS_DIR).mkdir(parents=True, exist_ok=True) + + # Node type is different depending on the charm + app = self.charm.meta.name or "unknown" + version = "cis-1.23" + if "control-plane" in app: + # must refer to this as upstream kube-bench tests do + # wokeignore:rule=master + target = "master" + elif "worker" in app: + target = "node" + elif "etcd" in app: + target = "etcd" + else: + raise ActionError(f"Unable to determine the target to benchmark: {app}") + + # Commands and log names are different depending on the format + _cmd_base = [BENCH_BIN, "-D", BENCH_CFG, "--benchmark", version] + if log_format == "json": + log_prefix = "results-json-" + verbose_cmd = _cmd_base + ["--json", "run", "--targets", target] + else: + log_prefix = "results-text-" + verbose_cmd = _cmd_base + ["run", "--targets", target] + summary_cmd = _cmd_base + ["--noremediations", "--noresults", "run", "--targets", target] + + # Store full results for future consumption + with tempfile.NamedTemporaryFile( + mode="w+b", prefix=log_prefix, dir=RESULTS_DIR, delete=False + ) as res_file: + try: + subprocess.call(verbose_cmd, stdout=res_file, stderr=subprocess.DEVNULL) + except subprocess.CalledProcessError: + raise ActionError(f"Failed to run: {verbose_cmd}") + else: + # remember the filename for later (and make it readable, why not?) + Path(res_file.name).chmod(0o644) + log_file = res_file.name + + # When making a summary, we also have a verbose report. Set action output + # so operators can see everything related to this run. + try: + out = subprocess.check_output( + summary_cmd, universal_newlines=True, stderr=subprocess.DEVNULL + ) + except subprocess.CalledProcessError: + raise ActionError(f"Failed to run: {summary_cmd}") + else: + fetch_cmd = f"juju scp {self.charm.unit}:{log_file} ." + event.set_results({"cmd": summary_cmd, "report": fetch_cmd, "summary": out}) + + return log or None + + def _on_cis_benchmark(self, event: ops.ActionEvent): + with _action_handler(event): + # Validate action params + release = event.params.get("release") or "upstream" + config = event.params.get("config") + if not config: + msg = 'Missing "config" parameter' + raise ActionError(msg) + remediations = event.params.get("apply") + if remediations not in ["none", "conservative", "dangerous", "reset"]: + raise ActionError(f'Invalid "apply" parameter: {remediations}') + + # TODO: may want an option to overwrite an existing install + if BENCH_BIN.exists() and Path(BENCH_CFG).exists(): + log.info("%s exists; skipping install", BENCH_HOME) + else: + log.info("Installing benchmark from: %s", release) + self.install(release, config) + + # Reset, remediate, or report + if remediations == "reset": + log.info("Attempting to remove all remediations") + self.reset(event) + elif remediations != "none": + log.info('Applying "%s" remediations', remediations) + self.apply(event, remediations) + else: + log.info("Report only; no remediations were requested") + self.report(event, log_format="text") diff --git a/src/charm.py b/src/charm.py index 2b515d4f..a378bbc3 100755 --- a/src/charm.py +++ b/src/charm.py @@ -15,6 +15,7 @@ from subprocess import CalledProcessError from typing import Callable +import actions.csi_benchmark import actions.general import actions.namespace import actions.upgrade @@ -92,9 +93,10 @@ def __init__(self, *args): self.external_cloud_provider = ExternalCloudProvider(self, "external-cloud-provider") self.tokens = TokensProvider(self, endpoint="tokens") self.encryption_at_rest = EncryptionAtRest(self) + self.cis_benchmark = actions.csi_benchmark.CSIBenchmark(self) # register charm actions - actions = [ + action_events = [ self.on.upgrade_action, self.on.get_kubeconfig_action, self.on.apply_manifest_action, @@ -105,7 +107,7 @@ def __init__(self, *args): self.on.namespace_delete_action, self.on.namespace_list_action, ] - for action in actions: + for action in action_events: self.framework.observe(action, self.charm_actions) self.reconciler = Reconciler(self, self.reconcile) @@ -137,6 +139,11 @@ def api_dependencies_ready(self): return True + def service_extra_args(self, service_name, config_key) -> str: + extra_args = kubernetes_snaps.parse_extra_args(self.model.config[config_key]) + args = self.cis_benchmark.craft_extra_args(service_name, extra_args) + return " ".join(f"{k}={v}" for k, v in args.items()) + def configure_apiserver(self): status.add(ops.MaintenanceStatus("Configuring API Server")) kubernetes_snaps.configure_apiserver( @@ -147,7 +154,7 @@ def configure_apiserver(self): authorization_mode=self.model.config["authorization-mode"], cluster_cidr=self.cni.cidr, etcd_connection_string=self.etcd.get_connection_string(), - extra_args_config=self.model.config["api-extra-args"], + extra_args_config=self.service_extra_args("kube-apiserver", "api-extra-args"), privileged=self.model.config["allow-privileged"], service_cidr=self.model.config["service-cidr"], external_cloud_provider=self.external_cloud_provider, @@ -186,7 +193,9 @@ def configure_controller_manager(self): kubernetes_snaps.configure_controller_manager( cluster_cidr=self.cni.cidr, cluster_name=self.get_cluster_name(), - extra_args_config=self.model.config["controller-manager-extra-args"], + extra_args_config=self.service_extra_args( + "kube-controller-manager", "controller-manager-extra-args" + ), kubeconfig="/root/cdk/kubecontrollermanagerconfig", service_cidr=self.model.config["service-cidr"], external_cloud_provider=self.external_cloud_provider, @@ -248,7 +257,7 @@ def configure_kube_proxy(self): status.add(ops.MaintenanceStatus("Configuring Kube Proxy")) kubernetes_snaps.configure_kube_proxy( cluster_cidr=self.cni.cidr, - extra_args_config=self.model.config["proxy-extra-args"], + extra_args_config=self.service_extra_args("kube-proxy", "proxy-extra-args"), extra_config=yaml.safe_load(self.model.config["proxy-extra-config"]), kubeconfig="/root/cdk/kubeproxyconfig", external_cloud_provider=self.external_cloud_provider, @@ -260,7 +269,7 @@ def configure_kubelet(self): container_runtime_endpoint=self.container_runtime.socket, dns_domain=self.get_dns_domain(), dns_ip=self.get_dns_address(), - extra_args_config=self.model.config["kubelet-extra-args"], + extra_args_config=self.service_extra_args("kubelet", "kubelet-extra-args"), extra_config=yaml.safe_load(self.model.config["kubelet-extra-config"]), external_cloud_provider=self.external_cloud_provider, kubeconfig="/root/cdk/kubeconfig", @@ -309,7 +318,7 @@ def check_status(endpoint, ep_name): def configure_scheduler(self): status.add(ops.MaintenanceStatus("Configuring Scheduler")) kubernetes_snaps.configure_scheduler( - extra_args_config=self.model.config["scheduler-extra-args"], + extra_args_config=self.service_extra_args("kube-scheduler", "scheduler-extra-args"), kubeconfig="/root/cdk/kubeschedulerconfig", )