diff --git a/.gitignore b/.gitignore index 68e788b..ab465d2 100644 --- a/.gitignore +++ b/.gitignore @@ -26,4 +26,5 @@ testbin/* bundle/ bundle.Dockerfile -charts/ \ No newline at end of file +charts/ +config/local-development/tilt/replace-image.yaml diff --git a/Makefile b/Makefile index 11af2f5..6ba05b4 100644 --- a/Makefile +++ b/Makefile @@ -203,18 +203,22 @@ catalog-build: opm ## Build a catalog image. catalog-push: ## Push a catalog image. $(MAKE) docker-push IMG=$(CATALOG_IMG) -# Generate helm chart helmchart: kustomize mkdir -p ./charts/${OPERATOR_NAME}/templates mkdir -p ./charts/${OPERATOR_NAME}/crds + repo=${OPERATOR_NAME} envsubst < ./config/local-development/tilt/env-replace-image.yaml > ./config/local-development/tilt/replace-image.yaml + $(KUSTOMIZE) build ./config/helmchart -o ./charts/${OPERATOR_NAME}/templates + sed -i 's/\([{}]\{2\}\)/{{ "\1" }}/g' ./charts/${OPERATOR_NAME}/templates/monitoring.coreos.com_v1_prometheusrule_${OPERATOR_NAME}-certificate-rule-alerts.yaml + sed -i 's/release-namespace/{{.Release.Namespace}}/' ./charts/${OPERATOR_NAME}/templates/*.yaml + rm ./charts/${OPERATOR_NAME}/templates/v1_namespace_release-namespace.yaml ./charts/${OPERATOR_NAME}/templates/apps_v1_deployment_${OPERATOR_NAME}-controller-manager.yaml cp ./config/helmchart/templates/* ./charts/${OPERATOR_NAME}/templates - $(KUSTOMIZE) build ./config/helmchart | sed 's/namespace: system/namespace: {{ .Release.Namespace }}/' > ./charts/${OPERATOR_NAME}/templates/rbac.yaml - if [ -d "./config/crd" ]; then $(KUSTOMIZE) build ./config/crd > ./charts/${OPERATOR_NAME}/crds/crds.yaml; fi version=${VERSION} envsubst < ./config/helmchart/Chart.yaml.tpl > ./charts/${OPERATOR_NAME}/Chart.yaml version=${VERSION} image_repo=$${IMG%:*} envsubst < ./config/helmchart/values.yaml.tpl > ./charts/${OPERATOR_NAME}/values.yaml - sed -i '/^apiVersion: monitoring.coreos.com/i {{ if .Values.enableMonitoring }}' ./charts/${OPERATOR_NAME}/templates/rbac.yaml - echo {{ end }} >> ./charts/${OPERATOR_NAME}/templates/rbac.yaml - helm lint ./charts/${OPERATOR_NAME} + sed -i '1s/^/{{ if .Values.enableMonitoring }}/' ./charts/${OPERATOR_NAME}/templates/monitoring.coreos.com_v1_servicemonitor_${OPERATOR_NAME}-controller-manager-metrics-monitor.yaml + echo {{ end }} >> ./charts/${OPERATOR_NAME}/templates/monitoring.coreos.com_v1_servicemonitor_${OPERATOR_NAME}-controller-manager-metrics-monitor.yaml + sed -i '1s/^/{{ if .Values.enableMonitoring }}/' ./charts/${OPERATOR_NAME}/templates/monitoring.coreos.com_v1_prometheusrule_${OPERATOR_NAME}-certificate-rule-alerts.yaml + echo {{ end }} >> ./charts/${OPERATOR_NAME}/templates/monitoring.coreos.com_v1_prometheusrule_${OPERATOR_NAME}-certificate-rule-alerts.yaml + helm lint ./charts/${OPERATOR_NAME} helmchart-repo: helmchart mkdir -p ${HELM_REPO_DEST}/${OPERATOR_NAME} diff --git a/README.md b/README.md index 95e0d13..105c32a 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,17 @@ Here is an example of a certificate soon-to-expiry event: ![cert-expiry](media/cert-expiry.png) +In addition to this, This operator generates the following metrics for al TLS certificates: + +| Metric Name | Descrption | +|:-:|:-:| +| certutils_certificate_issue_time | time at which the certificate was created in seconds from from January 1, 1970 UTC | +| certutils_certificate_expiry_time | time at which the certificate expires in seconds from from January 1, 1970 UTC | +| cert:validity_duration:sec | duration of the certificate validity in seconds | +| cert:time_to_expiration:sec | time left to expiration in seconds | + +The operator also sets two alerts that fire respectively when a certificate has 15% and 5% left of its lifetime. + ## CA Injection [ValidatingWebhookConfiguration](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/), [MutatingWebhokConfiguration](https://kubernetes.io/docs/reference/access-authn-authz/extensible-admission-controllers/) [CustomResourceDefinition](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/custom-resources/) and [APIService](https://kubernetes.io/docs/concepts/extend-kubernetes/api-extension/apiserver-aggregation/) types of objects (and possibly in the future others) need the master API process to connect to trusted servers to perform their function. In order to do so over an encrypted connection, a CA bundle needs to be configured. In these objects the CA bundle is passed as part of the CR and not as a secret, and that is fine because the CA bundles are public info. However it may be difficult at deploy time to know what the correct CA bundle should be. Often the CA bundle needs to be discovered as a piece on information owned by some other objects of the cluster. @@ -187,7 +198,7 @@ It is recommended to deploy this operator via [`OperatorHub`](https://operatorhu | amd64 | ✅ | | arm64 | ✅ | | ppc64le | ✅ | -| s390x | ❌ | +| s390x | ✅ | ### Deploying from OperatorHub @@ -243,13 +254,16 @@ helm upgrade cert-utils-operator cert-utils-operator/cert-utils-operator ## Running the operator locally +> Note: this operator build process is tested with [podman](https://podman.io/), but some of the build files (Makefile specifically) use docker because they are generated automatically by operator-sdk. It is recommended [remap the docker command to the podman command](https://developers.redhat.com/blog/2020/11/19/transitioning-from-docker-to-podman#transition_to_the_podman_cli). + ```shell -make manifests -oc new-project cert-utils-operator-local -kustomize build ./config/local-development | oc apply -f - -n cert-utils-operator-local -export token=$(oc serviceaccounts get-token 'cert-utils-controller-manager' -n cert-utils-operator-local) -oc login --token ${token} -make run ENABLE_WEBHOOKS=false +export repo=raffaelespazzoli +docker login quay.io/$repo +oc new-project cert-utils-operator +oc project cert-utils-operator +oc label namespace cert-utils-operator openshift.io/cluster-monitoring="true" +envsubst < config/local-development/tilt/env-replace-image.yaml > config/local-development/tilt/replace-image.yaml +tilt up ``` ### Test helm chart locally diff --git a/Tiltfile b/Tiltfile new file mode 100644 index 0000000..dff3c15 --- /dev/null +++ b/Tiltfile @@ -0,0 +1,25 @@ +# -*- mode: Python -*- + +compile_cmd = 'CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o bin/manager main.go' +image = 'quay.io/' + os.environ['repo'] + '/cert-utils-operator' + +local_resource( + 'cert-utils-operator-compile', + compile_cmd, + deps=['./main.go','./api','./controllers']) + + +custom_build( + image, + 'podman build -t $EXPECTED_REF --ignorefile ci.Dockerfile.dockerignore -f ./ci.Dockerfile . && podman push $EXPECTED_REF $EXPECTED_REF', + entrypoint=['/manager'], + deps=['./bin'], + live_update=[ + sync('./bin/manager',"/manager"), + ], + skips_local_docker=True, +) + +allow_k8s_contexts(k8s_context()) +k8s_yaml(kustomize('./config/local-development/tilt')) +k8s_resource('cert-utils-operator-controller-manager',resource_deps=['cert-utils-operator-compile']) \ No newline at end of file diff --git a/ci.Dockerfile.dockerignore b/ci.Dockerfile.dockerignore new file mode 100644 index 0000000..e004133 --- /dev/null +++ b/ci.Dockerfile.dockerignore @@ -0,0 +1,7 @@ +api/ +bundle/ +config/ +controllers/ +examples/ +hack/ +test/ diff --git a/config/default/kustomization.yaml b/config/default/kustomization.yaml index aff7bd3..21ce407 100644 --- a/config/default/kustomization.yaml +++ b/config/default/kustomization.yaml @@ -57,6 +57,11 @@ vars: name: controller-manager-metrics fieldref: fieldpath: metadata.namespace +- name: ROLE_NAME + objref: + kind: Role + apiVersion: rbac.authorization.k8s.io/v1 + name: prometheus-k8s # [CERTMANAGER] To enable cert-manager, uncomment all sections with 'CERTMANAGER' prefix. #- name: CERTIFICATE_NAMESPACE # namespace of the certificate CR # objref: diff --git a/config/helmchart/cert-manager-ca-injection.yaml b/config/helmchart/cert-manager-ca-injection.yaml new file mode 100644 index 0000000..4864299 --- /dev/null +++ b/config/helmchart/cert-manager-ca-injection.yaml @@ -0,0 +1,4 @@ +- op: add + path: /metadata/annotations + value: + cert-manager.io/inject-ca-from: "{{ .Release.Namespace }}/webhook-server-cert" \ No newline at end of file diff --git a/config/helmchart/kustomization.yaml b/config/helmchart/kustomization.yaml index 632b11e..08999f2 100644 --- a/config/helmchart/kustomization.yaml +++ b/config/helmchart/kustomization.yaml @@ -1,34 +1,18 @@ -# Adds namespace to all resources. - - -# Value of this field is prepended to the -# names of all resources, e.g. a deployment named -# "wordpress" becomes "alices-wordpress". -# Note that it should also match with the prefix (text before '-') of the namespace -# field above. -namePrefix: cert-utils-operator- - -# Labels to add to all resources and selectors. -#commonLabels: -# someName: someValue - -resources: -- service-account.yaml +namespace: release-namespace bases: -- ../rbac -- ../prometheus +- ../local-development/tilt -vars: -- name: METRICS_SERVICE_NAME - objref: - kind: Service +patchesJson6902: +- target: + group: admissionregistration.k8s.io version: v1 - name: controller-manager-metrics -- name: METRICS_SERVICE_NAMESPACE - objref: - kind: Service + kind: MutatingWebhookConfiguration + name: cert-utils-operator-mutating-webhook-configuration + path: ./cert-manager-ca-injection.yaml +- target: + group: admissionregistration.k8s.io version: v1 - name: controller-manager-metrics - fieldref: - fieldpath: metadata.namespace + kind: ValidatingWebhookConfiguration + name: cert-utils-operator-validating-webhook-configuration + path: ./cert-manager-ca-injection.yaml \ No newline at end of file diff --git a/config/local-development/tilt/env-replace-image.yaml b/config/local-development/tilt/env-replace-image.yaml new file mode 100644 index 0000000..0528a8d --- /dev/null +++ b/config/local-development/tilt/env-replace-image.yaml @@ -0,0 +1,8 @@ +- op: replace + path: /spec/template/spec/containers/1/image + value: + quay.io/$repo/cert-utils-operator:latest +- op: add + path: /spec/template/spec/containers/1/args/- + value: + --zap-devel=true \ No newline at end of file diff --git a/config/local-development/tilt/kustomization.yaml b/config/local-development/tilt/kustomization.yaml new file mode 100644 index 0000000..7aff76d --- /dev/null +++ b/config/local-development/tilt/kustomization.yaml @@ -0,0 +1,19 @@ +# Adds namespace to all resources. +namespace: cert-utils-operator + +# Labels to add to all resources and selectors. +#commonLabels: +# someName: someValue + +bases: +- ../../default +- ./service-account.yaml + + +patchesJson6902: +- target: + group: apps + version: v1 + kind: Deployment + name: cert-utils-operator-controller-manager + path: ./replace-image.yaml \ No newline at end of file diff --git a/config/local-development/tilt/prometheus-role.yaml b/config/local-development/tilt/prometheus-role.yaml new file mode 100644 index 0000000..cd159cb --- /dev/null +++ b/config/local-development/tilt/prometheus-role.yaml @@ -0,0 +1,15 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: prometheus-k8s +rules: +- apiGroups: + - "" + resources: + - endpoints + - pods + - services + verbs: + - get + - list + - watch \ No newline at end of file diff --git a/config/local-development/tilt/prometheus-rolebinding.yaml b/config/local-development/tilt/prometheus-rolebinding.yaml new file mode 100644 index 0000000..9a6c911 --- /dev/null +++ b/config/local-development/tilt/prometheus-rolebinding.yaml @@ -0,0 +1,12 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: prometheus-k8s +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: prometheus-k8s +subjects: +- kind: ServiceAccount + name: prometheus-k8s + namespace: openshift-monitoring \ No newline at end of file diff --git a/config/helmchart/service-account.yaml b/config/local-development/tilt/service-account.yaml similarity index 78% rename from config/helmchart/service-account.yaml rename to config/local-development/tilt/service-account.yaml index 0033602..7cd6025 100644 --- a/config/helmchart/service-account.yaml +++ b/config/local-development/tilt/service-account.yaml @@ -2,4 +2,4 @@ apiVersion: v1 kind: ServiceAccount metadata: name: controller-manager - namespace: system \ No newline at end of file + namespace: system diff --git a/config/manifests/bases/cert-utils-operator.clusterserviceversion.yaml b/config/manifests/bases/cert-utils-operator.clusterserviceversion.yaml index ccd6c88..84fe0d4 100644 --- a/config/manifests/bases/cert-utils-operator.clusterserviceversion.yaml +++ b/config/manifests/bases/cert-utils-operator.clusterserviceversion.yaml @@ -11,6 +11,7 @@ metadata: description: Set of utilities for TLS certificates operatorframework.io/suggested-namespace: cert-utils-operator operators.openshift.io/infrastructure-features: '["Disconnected"]' + operatorframework.io/cluster-monitoring: "true" repository: https://github.com/redhat-cop/cert-utils-operator support: Best Effort labels: diff --git a/config/prometheus/kustomization.yaml b/config/prometheus/kustomization.yaml index 81dff32..8b963cd 100644 --- a/config/prometheus/kustomization.yaml +++ b/config/prometheus/kustomization.yaml @@ -1,5 +1,8 @@ resources: - monitor.yaml +- role.yaml +- rolebinding.yaml +- rules.yaml configurations: - kustomizeconfig.yaml diff --git a/config/prometheus/kustomizeconfig.yaml b/config/prometheus/kustomizeconfig.yaml index c85f72a..99912d8 100644 --- a/config/prometheus/kustomizeconfig.yaml +++ b/config/prometheus/kustomizeconfig.yaml @@ -1,4 +1,6 @@ --- varReference: - path: spec/endpoints/tlsConfig/serverName - kind: ServiceMonitor \ No newline at end of file + kind: ServiceMonitor +- path: roleRef/name + kind: RoleBinding \ No newline at end of file diff --git a/config/prometheus/role.yaml b/config/prometheus/role.yaml new file mode 100644 index 0000000..459dbd4 --- /dev/null +++ b/config/prometheus/role.yaml @@ -0,0 +1,16 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: prometheus-k8s + namespace: system +rules: + - apiGroups: + - "" + resources: + - endpoints + - pods + - services + verbs: + - get + - list + - watch diff --git a/config/prometheus/rolebinding.yaml b/config/prometheus/rolebinding.yaml new file mode 100644 index 0000000..bc01d93 --- /dev/null +++ b/config/prometheus/rolebinding.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: prometheus-k8s + namespace: system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: $(ROLE_NAME) +subjects: + - kind: ServiceAccount + name: prometheus-k8s + namespace: openshift-monitoring diff --git a/config/prometheus/rules.yaml b/config/prometheus/rules.yaml new file mode 100644 index 0000000..be1bc37 --- /dev/null +++ b/config/prometheus/rules.yaml @@ -0,0 +1,34 @@ +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: certificate-rule-alerts +spec: + groups: + - name: cert-utils-operator-recording-rules + rules: + - record: cert:validity_duration:sec + expr: certutils_certificate_expiry_time - certutils_certificate_issue_time + - record: cert:time_to_expiration:sec + expr: certutils_certificate_expiry_time - time() + - name: cert-utils-operator-alerting-rules + rules: + - alert: CertificateApproachingExpiration + annotations: + message: >- + Certificate {{ $labels.namespace }}/{{ $labels.name }} is at 85% of its lifetime + summary: >- + Certificate {{ $labels.namespace }}/{{ $labels.name }} is at 85% of its lifetime + expr: | + cert:time_to_expiration:sec/cert:validity_duration:sec < 0.15 + labels: + severity: warning + - alert: CertificateIsAboutToExpire + annotations: + message: >- + Certificate {{ $labels.namespace }}/{{ $labels.name }} is at 95% of its lifetime + summary: >- + Certificate {{ $labels.namespace }}/{{ $labels.name }} is at 95% of its lifetime + expr: > + cert:time_to_expiration:sec/cert:validity_duration:sec < 0.05 + labels: + severity: critical \ No newline at end of file diff --git a/controllers/certexpiryalert/certexpiryalert_controller.go b/controllers/certexpiryalert/certexpiryalert_controller.go index 990df7e..f32b75e 100644 --- a/controllers/certexpiryalert/certexpiryalert_controller.go +++ b/controllers/certexpiryalert/certexpiryalert_controller.go @@ -5,10 +5,12 @@ import ( "crypto/x509" "encoding/pem" "fmt" + "math" "reflect" "time" "github.com/go-logr/logr" + "github.com/prometheus/client_golang/prometheus" "github.com/redhat-cop/cert-utils-operator/controllers/util" outils "github.com/redhat-cop/operator-utils/pkg/util" corev1 "k8s.io/api/core/v1" @@ -17,6 +19,8 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/builder" "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/metrics" "sigs.k8s.io/controller-runtime/pkg/predicate" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -42,10 +46,48 @@ type CertExpiryAlertReconciler struct { controllerName string } +var ( + issueTime = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Subsystem: "certutils", + Name: "certificate_issue_time", + Help: "time at which the certificate was issued in number of seconds from January 1, 1970 UTC", + }, + []string{"name", "namespace"}, + ) + expiryTime = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Subsystem: "certutils", + Name: "certificate_expiry_time", + Help: "time at which the certificate expires in number of seconds from January 1, 1970 UTC", + }, + []string{"name", "namespace"}, + ) +) + +func init() { + // Register custom metrics with the global prometheus registry + metrics.Registry.MustRegister(issueTime, expiryTime) +} + +func deleteMetrics(ctx context.Context, secret *corev1.Secret) { + issueTime.DeleteLabelValues(secret.Name, secret.Namespace) + expiryTime.DeleteLabelValues(secret.Name, secret.Namespace) +} + +func updateMetrics(ctx context.Context, secret *corev1.Secret) { + creation, expiry := getCreationAndExpiry(ctx, secret) + creationGauge := issueTime.WithLabelValues(secret.Name, secret.Namespace) + expiryGauge := expiryTime.WithLabelValues(secret.Name, secret.Namespace) + creationGauge.Set(float64(creation.Unix())) + expiryGauge.Set(float64(expiry.Unix())) +} + // SetupWithManager sets up the controller with the Manager. func (r *CertExpiryAlertReconciler) SetupWithManager(mgr ctrl.Manager) error { r.controllerName = "certexpiryalert_controller" - + ctx := context.TODO() + ctx = log.IntoContext(ctx, r.Log) isAnnotatedSecret := predicate.Funcs{ UpdateFunc: func(e event.UpdateEvent) bool { oldSecret, ok := e.ObjectOld.(*corev1.Secret) @@ -59,6 +101,7 @@ func (r *CertExpiryAlertReconciler) SetupWithManager(mgr ctrl.Manager) error { if newSecret.Type != util.TLSSecret { return false } + updateMetrics(ctx, newSecret) oldValue, _ := e.ObjectOld.GetAnnotations()[certExpiryAlertAnnotation] newValue, _ := e.ObjectNew.GetAnnotations()[certExpiryAlertAnnotation] old := oldValue == "true" @@ -78,9 +121,21 @@ func (r *CertExpiryAlertReconciler) SetupWithManager(mgr ctrl.Manager) error { if secret.Type != util.TLSSecret { return false } + updateMetrics(ctx, secret) value, _ := e.Object.GetAnnotations()[certExpiryAlertAnnotation] return value == "true" }, + DeleteFunc: func(e event.DeleteEvent) bool { + secret, ok := e.Object.(*corev1.Secret) + if !ok { + return false + } + if secret.Type != util.TLSSecret { + return false + } + deleteMetrics(ctx, secret) + return false + }, } return ctrl.NewControllerManagedBy(mgr). @@ -139,6 +194,22 @@ func (r *CertExpiryAlertReconciler) Reconcile(context context.Context, req ctrl. } +func getCreationAndExpiry(ctx context.Context, secret *corev1.Secret) (time.Time, time.Time) { + ilog := log.FromContext(ctx) + creation := time.Unix(1, 0) + expiry := time.Unix(math.MaxInt32, 0) + for p, rest := pem.Decode(secret.Data[util.Cert]); p != nil; p, rest = pem.Decode(rest) { + cert, err := x509.ParseCertificate(p.Bytes) + if err != nil { + ilog.Error(err, "unable to decode this entry, skipping", "entry", string(p.Bytes)) + continue + } + expiry = min(expiry, cert.NotAfter) + creation = max(creation, cert.NotBefore) + } + return creation, expiry +} + func (r *CertExpiryAlertReconciler) getExpiry(secret *corev1.Secret) time.Time { result := time.Time{} for p, rest := pem.Decode(secret.Data[util.Cert]); p != nil; p, rest = pem.Decode(rest) { @@ -163,6 +234,13 @@ func min(a time.Time, b time.Time) time.Time { return b } +func max(a time.Time, b time.Time) time.Time { + if a.After(b) { + return a + } + return b +} + func (r *CertExpiryAlertReconciler) getExpiryThreshold(secret *corev1.Secret) time.Duration { sthreshold, ok := secret.GetAnnotations()[certSoonToExpireThresholdAnnotation] if !ok { diff --git a/go.mod b/go.mod index 804eabd..7770ad3 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/openshift/api v3.9.0+incompatible github.com/pavel-v-chernykh/keystore-go v2.1.0+incompatible github.com/pavel-v-chernykh/keystore-go/v4 v4.2.0 + github.com/prometheus/client_golang v1.7.1 github.com/redhat-cop/operator-utils v1.1.4 github.com/scylladb/go-set v1.0.2 github.com/stretchr/testify v1.6.1