diff --git a/CHANGELOG.md b/CHANGELOG.md index cd856c6..b31333c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,8 @@ # Changelog +## Version 1.4.6 - Bugfix release +- Fix metrics API on eksctl v0.201.0 and later + ## Version 1.4.5 - Bugfix and improvements release - Fix Nvidia driver and ALB controller download issue. - Package Nvidia driver as fallback for creating GPU nodepools. diff --git a/plugin.json b/plugin.json index 568a673..b171b58 100644 --- a/plugin.json +++ b/plugin.json @@ -1,6 +1,6 @@ { "id": "eks-clusters", - "version": "1.4.5", + "version": "1.4.6", "meta": { "label": "EKS clusters", "description": "Interact with Amazon Elastic Kubernetes Service clusters", diff --git a/python-clusters/create-eks-cluster/cluster.json b/python-clusters/create-eks-cluster/cluster.json index f262c5a..72cc2bf 100644 --- a/python-clusters/create-eks-cluster/cluster.json +++ b/python-clusters/create-eks-cluster/cluster.json @@ -73,11 +73,12 @@ "description" : "Leave empty for current default of eksctl" }, { - "name": "installMetricsServer", - "label": "Install metrics server", - "type": "BOOLEAN", - "mandatory" : true, - "defaultValue" : true + "name": "installMetricsServer", + "label": "Ensure metrics server installation", + "type": "BOOLEAN", + "mandatory" : true, + "defaultValue" : true, + "description": "Some versions of eksctl may install the metrics server with the default add-ons." }, { "name": "privateCluster", diff --git a/python-clusters/create-eks-cluster/cluster.py b/python-clusters/create-eks-cluster/cluster.py index e60650c..e0f27f4 100644 --- a/python-clusters/create-eks-cluster/cluster.py +++ b/python-clusters/create-eks-cluster/cluster.py @@ -13,7 +13,7 @@ from dku_kube.kubeconfig import setup_creds_env from dku_kube.autoscaler import add_autoscaler_if_needed from dku_kube.gpu_driver import add_gpu_driver_if_needed -from dku_kube.metrics_server import install_metrics_server +from dku_kube.metrics_server import install_metrics_server_if_needed from dku_utils.cluster import make_overrides, get_connection_info from dku_utils.access import _is_none_or_blank from dku_utils.config_parser import get_region_arg, get_private_ip_from_metadata @@ -297,7 +297,7 @@ def add_vm_to_sg(): add_gpu_driver_if_needed(self.cluster_id, kube_config_path, connection_info, gpu_taints) if self.config.get("installMetricsServer"): - install_metrics_server(kube_config_path) + install_metrics_server_if_needed(kube_config_path) c = EksctlCommand(args, connection_info) cluster_info = json.loads(c.run_and_get_output())[0] diff --git a/python-lib/dku_kube/metrics_server.py b/python-lib/dku_kube/metrics_server.py index 4fee737..25c916a 100644 --- a/python-lib/dku_kube/metrics_server.py +++ b/python-lib/dku_kube/metrics_server.py @@ -5,7 +5,20 @@ from .kubectl_command import run_with_timeout, KubeCommandException -def install_metrics_server(kube_config_path): +def has_metrics_server(kube_config_path): + env = os.environ.copy() + env["KUBECONFIG"] = kube_config_path + cmd = ["kubectl", "get", "pods", "-n", "kube-system", "-l", "app.kubernetes.io/name=metrics-server", "--ignore-not-found"] + logging.info("Checking metrics server presence with : %s" % json.dumps(cmd)) + out, err = run_with_timeout(cmd, env=env, timeout=5) + return len(out.strip()) > 0 + + +def install_metrics_server_if_needed(kube_config_path): + if has_metrics_server(kube_config_path): + logging.info("Metrics server is already deployed on the cluster. Skipping install.") + return + try: env = os.environ.copy() env["KUBECONFIG"] = kube_config_path