From 1b54de205304fb654068c3fd438a8f2497f2551f Mon Sep 17 00:00:00 2001 From: Aditya Thebe Date: Thu, 16 Jan 2025 21:19:57 +0545 Subject: [PATCH] fix: Certificate health checks and new tests --- pkg/health/health_cert_manager.go | 160 +++++++++--------- pkg/health/health_test.go | 33 +++- .../certificate-issuing-first-time.yaml | 42 +++++ ...rtificate-issuing-manually-triggered.yaml} | 0 pkg/health/testdata/certificate-renewal.yaml | 40 +++++ 5 files changed, 195 insertions(+), 80 deletions(-) create mode 100644 pkg/health/testdata/certificate-issuing-first-time.yaml rename pkg/health/testdata/{certificate-issuing.yaml => certificate-issuing-manually-triggered.yaml} (100%) create mode 100644 pkg/health/testdata/certificate-renewal.yaml diff --git a/pkg/health/health_cert_manager.go b/pkg/health/health_cert_manager.go index 2a44a8f..1f6b8c5 100644 --- a/pkg/health/health_cert_manager.go +++ b/pkg/health/health_cert_manager.go @@ -155,22 +155,57 @@ func GetCertificateHealth(obj *unstructured.Unstructured) (*HealthStatus, error) continue } - switch string(c.Type) { - case string(certmanagerv1.CertificateConditionIssuing): - if cert.Status.NotBefore != nil { - hs := &HealthStatus{ - Status: HealthStatusCode(c.Reason), - Ready: false, - Message: c.Message, - } + if c.Type == certmanagerv1.CertificateConditionIssuing { + hs := &HealthStatus{ + Status: HealthStatusCode(c.Reason), + Ready: false, + Message: c.Message, + } - switch c.Reason { - case "ManuallyTriggered": - // Basically a renewal + switch c.Reason { + case "ManuallyTriggered", DoesNotExist: + // We check for expiry below + hs.Status = "Issuing" + hs.Health = HealthUnknown + + case Renewing: + renewalTime := cert.Status.RenewalTime.Time + + if time.Since(renewalTime) > certRenewalWarningPeriod { + hs.Health = HealthWarning + hs.Message = fmt.Sprintf( + "Certificate has been in renewal state for > %s", + time.Since(renewalTime).Truncate(time.Minute), + ) + } else { hs.Health = HealthHealthy - return hs, nil + } - default: + default: + unhealthyReasons := map[string]string{ + MissingData: "Certificate secret has missing data", + InvalidKeyPair: "Public key of certificate does not match private key", + InvalidCertificate: "Signed certificate could not be parsed or decoded", + InvalidCertificateRequest: "CSR could not be parsed or decoded", + SecretMismatch: "Secret's private key does not match spec", + IncorrectIssuer: "Certificate has been issued by incorrect Issuer", + IncorrectCertificate: "Certificate's secretName already has an annotation with another Certificate", + Expired: "Certificate has expired", + SecretTemplateMismatch: "SecretTemplate is not reflected on the target Secret", + SecretManagedMetadataMismatch: "Secret is missing labels that should have been added by cert-manager", + AdditionalOutputFormatsMismatch: "Certificate's AdditionalOutputFormats are not reflected on the target Secret", + ManagedFieldsParseError: "cert-manager was unable to decode the managed fields on a resource", + SecretOwnerRefMismatch: "Secret has an incorrect owner reference", + } + + if msg, exists := unhealthyReasons[string(c.Reason)]; exists { + return &HealthStatus{ + Health: HealthUnhealthy, + Status: HealthStatusCode(c.Reason), + Message: msg, + Ready: true, + }, nil + } else if cert.Status.NotBefore != nil { if overdue := time.Since(cert.Status.NotBefore.Time); overdue > time.Hour { hs.Health = HealthUnhealthy return hs, nil @@ -181,75 +216,17 @@ func GetCertificateHealth(obj *unstructured.Unstructured) (*HealthStatus, error) } } - case Renewing: - if cert.Status.RenewalTime != nil { - renewalTime := cert.Status.RenewalTime.Time - - if time.Since(renewalTime) > certRenewalWarningPeriod { - return &HealthStatus{ - Health: HealthWarning, - Status: HealthStatusWarning, - Message: fmt.Sprintf("Certificate has been in renewal state for > %s", time.Since(renewalTime)), - }, nil - } - } - - hs := &HealthStatus{ - Status: HealthStatusCode(c.Reason), - Ready: false, - Message: c.Message, - } - - return hs, nil - - default: - missingCases := map[string]string{ - DoesNotExist: "Certificate secret does not exist", - MissingData: "Certificate secret has missing data", - InvalidKeyPair: "Public key of certificate does not match private key", - InvalidCertificate: "Signed certificate could not be parsed or decoded", - InvalidCertificateRequest: "CSR could not be parsed or decoded", - SecretMismatch: "Secret's private key does not match spec", - IncorrectIssuer: "Certificate has been issued by incorrect Issuer", - IncorrectCertificate: "Certificate's secretName already has an annotation with another Certificate", - Expired: "Certificate has expired", - SecretTemplateMismatch: "SecretTemplate is not reflected on the target Secret", - SecretManagedMetadataMismatch: "Secret is missing labels that should have been added by cert-manager", - AdditionalOutputFormatsMismatch: "Certificate's AdditionalOutputFormats are not reflected on the target Secret", - ManagedFieldsParseError: "cert-manager was unable to decode the managed fields on a resource", - SecretOwnerRefMismatch: "Secret has an incorrect owner reference", - } - - if msg, exists := missingCases[string(c.Type)]; exists { - return &HealthStatus{ - Health: HealthUnhealthy, - Status: HealthStatusCode(c.Type), - Message: msg, - Ready: true, - }, nil + // If we're issuing a new cert, at least ensure the existing cert hasn't expired + if expiryHealth := certExpiryCheck(cert); expiryHealth != nil { + return expiryHealth, nil + } else { + return hs, nil } } } - if cert.Status.NotAfter != nil { - notAfterTime := cert.Status.NotAfter.Time - if notAfterTime.Before(time.Now()) { - return &HealthStatus{ - Health: HealthUnhealthy, - Status: "Expired", - Message: "Certificate has expired", - Ready: true, - }, nil - } - - if time.Until(notAfterTime) < certExpiryWarningPeriod { - return &HealthStatus{ - Health: HealthWarning, - Status: HealthStatusWarning, - Message: fmt.Sprintf("Certificate is expiring soon (%s)", notAfterTime), - Ready: true, - }, nil - } + if expiryHealth := certExpiryCheck(cert); expiryHealth != nil { + return expiryHealth, nil } if cert.Status.RenewalTime != nil { @@ -272,3 +249,30 @@ func GetCertificateHealth(obj *unstructured.Unstructured) (*HealthStatus, error) return status, nil } + +func certExpiryCheck(cert certmanagerv1.Certificate) *HealthStatus { + if cert.Status.NotAfter == nil { + return nil + } + + notAfterTime := cert.Status.NotAfter.Time + if notAfterTime.Before(time.Now()) { + return &HealthStatus{ + Health: HealthUnhealthy, + Status: "Expired", + Message: "Certificate has expired", + Ready: true, + } + } + + if time.Until(notAfterTime) < certExpiryWarningPeriod { + return &HealthStatus{ + Health: HealthWarning, + Status: HealthStatusWarning, + Message: fmt.Sprintf("Certificate is expiring soon (%s)", notAfterTime), + Ready: true, + } + } + + return nil +} diff --git a/pkg/health/health_test.go b/pkg/health/health_test.go index 81e942a..544cea5 100644 --- a/pkg/health/health_test.go +++ b/pkg/health/health_test.go @@ -287,12 +287,41 @@ func TestCertificate(t *testing.T) { // "2024-06-26T12:25:46Z": time.Now().Add(time.Hour).UTC().Format("2006-01-02T15:04:05Z"), // }, health.HealthStatusWarning, health.HealthWarning, true) - assertAppHealthMsg(t, "./testdata/certificate-issuing.yaml", "ManuallyTriggered", health.HealthHealthy, false) + assertAppHealthWithOverwriteMsg(t, "./testdata/certificate-renewal.yaml", map[string]string{ + "2025-01-16T14:04:53Z": time.Now().Add(-time.Hour).UTC().Format(time.RFC3339), // not Before + "2025-01-16T14:09:52Z": time.Now().Add(-time.Minute * 10).UTC().Format(time.RFC3339), // renewal time + }, "Renewing", health.HealthHealthy, false, "Renewing certificate as renewal was scheduled at 2025-01-16 14:09:47 +0000 UTC") + + assertAppHealthWithOverwriteMsg(t, "./testdata/certificate-renewal.yaml", map[string]string{ + "2025-01-16T14:04:53Z": time.Now().Add(-time.Hour).UTC().Format(time.RFC3339), // not Before + "2025-01-16T14:09:52Z": time.Now(). + Add(-time.Minute * 40). + UTC(). + Format(time.RFC3339), + // renewal time over the grace period + }, "Renewing", health.HealthWarning, false, "Certificate has been in renewal state for > 40m0s") + + assertAppHealthMsg(t, "./testdata/certificate-issuing-first-time.yaml", "Issuing", health.HealthUnknown, false) + + assertAppHealthMsg( + t, + "./testdata/certificate-issuing-manually-triggered.yaml", + "Issuing", + health.HealthUnknown, + false, + ) assertAppHealthMsg(t, "./testdata/certificate-healthy.yaml", "Issued", health.HealthHealthy, true) b := "../resource_customizations/cert-manager.io/Certificate/testdata/" assertAppHealthMsg(t, b+"degraded_configError.yaml", "ConfigError", health.HealthUnhealthy, true) - assertAppHealthMsg(t, b+"progressing_issuing.yaml", "Issuing", health.HealthUnknown, false) + assertAppHealthMsg( + t, + b+"progressing_issuing.yaml", + "Issuing", + health.HealthUnknown, + false, + "Issuing certificate as Secret does not exist", + ) } func TestExternalSecrets(t *testing.T) { diff --git a/pkg/health/testdata/certificate-issuing-first-time.yaml b/pkg/health/testdata/certificate-issuing-first-time.yaml new file mode 100644 index 0000000..052a9bd --- /dev/null +++ b/pkg/health/testdata/certificate-issuing-first-time.yaml @@ -0,0 +1,42 @@ +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: | + {"apiVersion":"cert-manager.io/v1","kind":"Certificate","metadata":{"annotations":{},"name":"shortlived","namespace":"nginx-ingress"},"spec":{"commonName":"example.com","dnsNames":["testing-cert-manager.example.com"],"duration":"1h","issuerRef":{"kind":"ClusterIssuer","name":"letsencrypt-production"},"renewBefore":"50m","secretName":"shortlived"}} + creationTimestamp: "2025-01-16T14:27:19Z" + generation: 1 + name: shortlived + namespace: nginx-ingress + resourceVersion: "128050007" + uid: bb3563f9-5fae-4f3d-aaa1-8de659aee674 +spec: + commonName: example.com + dnsNames: + - testing-cert-manager.example.com + duration: 1h + issuerRef: + kind: ClusterIssuer + name: letsencrypt-staging + renewBefore: 50m + secretName: shortlived +status: + conditions: + - lastTransitionTime: "2025-01-16T14:27:19Z" + message: 'The certificate request has failed to complete and will be retried: + The CSR PEM requests a commonName that is not present in the list of dnsNames + or ipAddresses. If a commonName is set, ACME requires that the value is also + present in the list of dnsNames or ipAddresses: "example.com" does not exist + in [testing-cert-manager.example.com] or []' + observedGeneration: 1 + reason: Failed + status: "False" + type: Issuing + - lastTransitionTime: "2025-01-16T14:27:19Z" + message: Issuing certificate as Secret does not exist + observedGeneration: 1 + reason: DoesNotExist + status: "False" + type: Ready + failedIssuanceAttempts: 1 + lastFailureTime: "2025-01-16T14:27:19Z" diff --git a/pkg/health/testdata/certificate-issuing.yaml b/pkg/health/testdata/certificate-issuing-manually-triggered.yaml similarity index 100% rename from pkg/health/testdata/certificate-issuing.yaml rename to pkg/health/testdata/certificate-issuing-manually-triggered.yaml diff --git a/pkg/health/testdata/certificate-renewal.yaml b/pkg/health/testdata/certificate-renewal.yaml new file mode 100644 index 0000000..ff223bd --- /dev/null +++ b/pkg/health/testdata/certificate-renewal.yaml @@ -0,0 +1,40 @@ +apiVersion: cert-manager.io/v1 +kind: Certificate +metadata: + annotations: + kubectl.kubernetes.io/last-applied-configuration: | + {"apiVersion":"cert-manager.io/v1","kind":"Certificate","metadata":{"annotations":{},"name":"super-fast-renewal","namespace":"nginx-ingress"},"spec":{"commonName":"testing-cert-manager.example.com","dnsNames":["testing-cert-manager.example.com"],"issuerRef":{"kind":"ClusterIssuer","name":"letsencrypt-staging"},"renewBefore":"2159h55m","secretName":"super-fast-renewal"}} + creationTimestamp: "2025-01-16T15:03:17Z" + generation: 1 + name: super-fast-renewal + namespace: nginx-ingress + resourceVersion: "128058895" + uid: d05b4281-57e7-41bb-85ab-3b333dc6f5e6 +spec: + commonName: testing-cert-manager.example.com + dnsNames: + - testing-cert-manager.example.com + issuerRef: + kind: ClusterIssuer + name: letsencrypt-staging + renewBefore: 2159h55m + secretName: super-fast-renewal +status: + conditions: + - lastTransitionTime: "2025-01-16T15:03:23Z" + message: Certificate is up to date and has not expired + observedGeneration: 1 + reason: Ready + status: "True" + type: Ready + - lastTransitionTime: "2025-01-16T15:03:24Z" + message: Renewing certificate as renewal was scheduled at 2025-01-16 14:09:47 +0000 UTC + observedGeneration: 1 + reason: Renewing + status: "True" + type: Issuing + nextPrivateKeySecretName: super-fast-renewal-b7nfs + notAfter: "2025-04-16T14:04:52Z" + notBefore: "2025-01-16T14:04:53Z" + renewalTime: "2025-01-16T14:09:52Z" + revision: 2 \ No newline at end of file