Skip to content

Commit

Permalink
Make metallb state become part of shoot health. (#228)
Browse files Browse the repository at this point in the history
  • Loading branch information
Gerrit91 authored Jan 10, 2022
1 parent e932fb7 commit fb1db9a
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 0 deletions.
54 changes: 54 additions & 0 deletions charts/internal/shoot-control-plane/templates/metallb.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,27 @@ spec:
drop:
- all
readOnlyRootFilesystem: true

# we write the controller health state into a config map to make
# the state easily check-able through Gardener and make it part of
# the shoot health
#
# helps us to identify stale configs easily:
# https://github.com/metallb/metallb/issues/462
- name: health
image: "bitnami/kubectl:latest"
command:
- bash
- -c
- |
set -eo pipefail
while true; do
sleep 30
stale=$(curl -m 3 -s localhost:7472/metrics | grep '^metallb_k8s_client_config_stale_bool' | awk '{ print $2 }')
loaded=$(curl -m 3 -s localhost:7472/metrics | grep '^metallb_k8s_client_config_loaded_bool' | awk '{ print $2 }')
kubectl create --save-config configmap health --dry-run=client -o yaml --from-literal=configStale=${stale} --from-literal=configLoaded=${loaded} | kubectl apply -f -
done
nodeSelector:
kubernetes.io/os: linux
securityContext:
Expand All @@ -429,3 +450,36 @@ spec:
fsGroup: 65534
serviceAccountName: controller
terminationGracePeriodSeconds: 0

---
# belongs to the health check sidecar
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
labels:
app: metallb
name: health-monitoring
namespace: metallb-system
rules:
- apiGroups:
- ''
resources:
- configmaps
verbs:
- create
- patch
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
labels:
app: metallb
name: health-monitoring
namespace: metallb-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: health-monitoring
subjects:
- kind: ServiceAccount
name: controller
4 changes: 4 additions & 0 deletions pkg/controller/healthcheck/add.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ func RegisterHealthChecks(mgr manager.Manager, opts AddOptions) error {
HealthCheck: CheckDuros(metal.DurosResourceName),
PreCheckFunc: durosPreCheck,
},
{
ConditionType: string(gardencorev1beta1.ShootSystemComponentsHealthy),
HealthCheck: CheckMetalLB(),
},
}); err != nil {
return err
}
Expand Down
78 changes: 78 additions & 0 deletions pkg/controller/healthcheck/metallb.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package healthcheck

import (
"context"
"fmt"

"github.com/gardener/gardener/extensions/pkg/controller/healthcheck"
gardencorev1beta1 "github.com/gardener/gardener/pkg/apis/core/v1beta1"

"github.com/go-logr/logr"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
)

// MetalLBHealthChecker contains all the information for the MetalLB HealthCheck
type MetalLBHealthChecker struct {
logger logr.Logger
shootClient client.Client
}

// CheckMetalLB is a healthCheck function to check MetalLBs
func CheckMetalLB() healthcheck.HealthCheck {
return &MetalLBHealthChecker{}
}

// shootClient injects the shoot client
func (healthChecker *MetalLBHealthChecker) InjectShootClient(shootClient client.Client) {
healthChecker.shootClient = shootClient
}

// SetLoggerSuffix injects the logger
func (healthChecker *MetalLBHealthChecker) SetLoggerSuffix(provider, extension string) {
healthChecker.logger = log.Log.WithName(fmt.Sprintf("%s-%s-healthcheck-metallb", provider, extension))
}

// DeepCopy clones the healthCheck struct by making a copy and returning the pointer to that new copy
func (healthChecker *MetalLBHealthChecker) DeepCopy() healthcheck.HealthCheck {
copy := *healthChecker
return &copy
}

// Check executes the health check
func (healthChecker *MetalLBHealthChecker) Check(ctx context.Context, request types.NamespacedName) (*healthcheck.SingleCheckResult, error) {
health := &v1.ConfigMap{}

if err := healthChecker.shootClient.Get(ctx, client.ObjectKey{Namespace: "metallb-system", Name: "health"}, health); err != nil {
err := fmt.Errorf("check metallb health configmap failed. Unable to retrieve 'health' in namespace 'metallb-system': %v", err)
healthChecker.logger.Error(err, "Health check failed")
return nil, err
}
if isHealthy, err := IsHealthy(health); !isHealthy {
healthChecker.logger.Error(err, "Health check failed")
return &healthcheck.SingleCheckResult{
Status: gardencorev1beta1.ConditionFalse,
Detail: err.Error(),
}, nil
}

return &healthcheck.SingleCheckResult{
Status: gardencorev1beta1.ConditionTrue,
}, nil
}

func IsHealthy(health *v1.ConfigMap) (bool, error) {
isLoaded := health.Data["configLoaded"]
if isLoaded != "1" {
return false, fmt.Errorf("metallb configmap is not loaded")
}

isStale := health.Data["configStale"]
if isStale == "1" {
return false, fmt.Errorf("metallb configmap is stale / erroneous, next speaker reload may interrupt workload traffic")
}

return true, nil
}

0 comments on commit fb1db9a

Please sign in to comment.