Skip to content

Commit

Permalink
feat(checker): check Kube DNS has more than 2 replicas
Browse files Browse the repository at this point in the history
longhorn/longhorn-9752

Signed-off-by: Chin-Ya Huang <[email protected]>
  • Loading branch information
c3y1huang committed Nov 28, 2024
1 parent 130ed5c commit 4673a4f
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 10 deletions.
5 changes: 5 additions & 0 deletions pkg/consts/preflight.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ const (
AppNamePreflightInstaller = "longhorn-preflight-installer"
)

const (
KubeAppLabel = "k8s-app"
KubeAppValueDNS = "kube-dns"
)

type DependencyModuleType int

const (
Expand Down
63 changes: 53 additions & 10 deletions pkg/local/preflight/checker.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,16 @@ type Checker struct {
func (local *Checker) Init() error {
local.collection.Log = &types.LogCollection{}

config, err := commonkube.GetInClusterConfig()
if err != nil {
return errors.Wrap(err, "failed to get client config")
}

local.kubeClient, err = kubeclient.NewForConfig(config)
if err != nil {
return errors.Wrap(err, "failed to get Kubernetes clientset")
}

osRelease, err := utils.GetOSRelease()
if err != nil {
return errors.Wrap(err, "failed to get OS release")
Expand All @@ -61,16 +71,6 @@ func (local *Checker) Init() error {
local.logger = logrus.WithField("os", local.osRelease)

if local.osRelease == fmt.Sprint(consts.OperatingSystemContainerOptimizedOS) {
config, err := commonkube.GetInClusterConfig()
if err != nil {
return errors.Wrap(err, "failed to get client config")
}

local.kubeClient, err = kubeclient.NewForConfig(config)
if err != nil {
return errors.Wrap(err, "failed to get Kubernetes clientset")
}

return nil
}

Expand Down Expand Up @@ -177,6 +177,8 @@ func (local *Checker) Init() error {

// Run executes the preflight checks.
func (local *Checker) Run() error {
local.checkKubeDNS()

switch local.osRelease {
case fmt.Sprint(consts.OperatingSystemContainerOptimizedOS):
logrus.Infof("Checking preflight for %v", consts.OperatingSystemContainerOptimizedOS)
Expand Down Expand Up @@ -466,3 +468,44 @@ func (local *Checker) checkNFSv4Support() error {
local.collection.Log.Error = append(local.collection.Log.Error, "NFS4 is not supported")
return nil
}

// checkKubeDNS checks if the DNS deployment in the Kubernetes cluster
// has multiple replicas and logs warnings if it does not.
//
// It retrieves the deployment in the "kube-system" namespace with a
// "kube-app: kube-dns" lable from the kubeClient and checks the
// number of replicas specified in the deployment spec. If the number of
// replicas is less than 2, it logs a warning indicating that CoreDNS is
// not set to run in multiple replicas. Additionally, it checks the number
// of ready replicas in the deployment status and logs a warning if there
// are fewer than 2 ready replicas.
//
// https://github.com/longhorn/longhorn/issues/9752
func (local *Checker) checkKubeDNS() {
logrus.Info("Checking if CoreDNS has multiple replicas")

deployments, err := commonkube.ListDeployments(local.kubeClient, metav1.NamespaceSystem, map[string]string{consts.KubeAppLabel: consts.KubeAppValueDNS})
if err != nil {
local.collection.Log.Error = append(local.collection.Log.Error, fmt.Sprintf("Failed to list Kube DNS with label %s=%s: %v", consts.KubeAppLabel, consts.KubeAppValueDNS, err))
return
}

if len(deployments.Items) != 1 {
local.collection.Log.Warn = append(local.collection.Log.Warn, fmt.Sprintf("Found %d deployments with label %s=%s; expected 1", len(deployments.Items), consts.KubeAppLabel, consts.KubeAppValueDNS))
return
}

deployment := deployments.Items[0]

if deployment.Spec.Replicas == nil || *deployment.Spec.Replicas < 2 {
local.collection.Log.Warn = append(local.collection.Log.Warn, fmt.Sprintf("Kube DNS %q is set with fewer than 2 replicas; consider increasing replica count for high availability", deployment.Name))
return
}

if deployment.Status.ReadyReplicas < 2 {
local.collection.Log.Warn = append(local.collection.Log.Warn, fmt.Sprintf("Kube DNS %q has fewer than 2 ready replicas; some replicas may not be running or ready", deployment.Name))
return
}

local.collection.Log.Info = append(local.collection.Log.Info, fmt.Sprintf("Kube DNS %q is set with %d replicas and %d ready replicas", deployment.Name, *deployment.Spec.Replicas, deployment.Status.ReadyReplicas))
}

0 comments on commit 4673a4f

Please sign in to comment.