From de798c026488177498790f262a270a487018ca85 Mon Sep 17 00:00:00 2001 From: Jean-Francois Roy Date: Fri, 8 Nov 2024 10:25:18 -0800 Subject: [PATCH] feat(health): add check endpoint and loop control On Kubernetes, it makes more sense to use a liveness probe than the health server loop (i.e. only have one loop). This patch introduces a flag to disable the health server loop, and a new /check/ endpoint for such probes. When the connection is saturated, health checks can take a long time and therefore fail if the timeout is too short. Gradually increasing the timeout, as done in the health server loop, is not all that useful because the upper bound on the timeout is ultimately what you are willing to tolerate before declaring the connection unhealthy. So a static probe with a long timeout and a failure count, as implemented in Kubernetes, will be more stable (i.e. less flopping), especially if more than one sequential failure is allowed. The above argument aside, the two health/probe loops also do not work well together because they can get out of phase. Kubernetes probes usually must be used to sequence containers in a pod. Signed-off-by: Jean-Francois Roy --- Dockerfile | 1 + internal/configuration/settings/health.go | 13 ++++++++++--- internal/healthcheck/run.go | 8 ++++++-- internal/healthcheck/server.go | 15 ++++++++++++++- 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/Dockerfile b/Dockerfile index e6a0bb071..0513b0c71 100644 --- a/Dockerfile +++ b/Dockerfile @@ -161,6 +161,7 @@ ENV VPN_SERVICE_PROVIDER=pia \ LOG_LEVEL=info \ # Health HEALTH_SERVER_ADDRESS=127.0.0.1:9999 \ + HEALTH_SERVER_DISABLE_LOOP=off \ HEALTH_TARGET_ADDRESS=cloudflare.com:443 \ HEALTH_SUCCESS_WAIT_DURATION=5s \ HEALTH_VPN_DURATION_INITIAL=6s \ diff --git a/internal/configuration/settings/health.go b/internal/configuration/settings/health.go index 4035e2097..5db503584 100644 --- a/internal/configuration/settings/health.go +++ b/internal/configuration/settings/health.go @@ -34,6 +34,8 @@ type Health struct { SuccessWait time.Duration // VPN has health settings specific to the VPN loop. VPN HealthyWait + // Disable the healthcheck server loop. Useful for environments like Kubernetes which have their own healthcheck loops. + DisableLoop *bool } func (h Health) Validate() (err error) { @@ -58,6 +60,7 @@ func (h *Health) copy() (copied Health) { TargetAddress: h.TargetAddress, SuccessWait: h.SuccessWait, VPN: h.VPN.copy(), + DisableLoop: gosettings.CopyPointer(h.DisableLoop), } } @@ -71,6 +74,7 @@ func (h *Health) OverrideWith(other Health) { h.TargetAddress = gosettings.OverrideWithComparable(h.TargetAddress, other.TargetAddress) h.SuccessWait = gosettings.OverrideWithComparable(h.SuccessWait, other.SuccessWait) h.VPN.overrideWith(other.VPN) + h.DisableLoop = gosettings.OverrideWithPointer(h.DisableLoop, other.DisableLoop) } func (h *Health) SetDefaults() { @@ -83,6 +87,7 @@ func (h *Health) SetDefaults() { const defaultSuccessWait = 5 * time.Second h.SuccessWait = gosettings.DefaultComparable(h.SuccessWait, defaultSuccessWait) h.VPN.setDefaults() + h.DisableLoop = gosettings.DefaultPointer(h.DisableLoop, false) } func (h Health) String() string { @@ -97,6 +102,7 @@ func (h Health) toLinesNode() (node *gotree.Node) { node.Appendf("Read header timeout: %s", h.ReadHeaderTimeout) node.Appendf("Read timeout: %s", h.ReadTimeout) node.AppendNode(h.VPN.toLinesNode("VPN")) + node.Appendf("Disable loop: %s", gosettings.BoolToYesNo(h.DisableLoop)) return node } @@ -104,16 +110,17 @@ func (h *Health) Read(r *reader.Reader) (err error) { h.ServerAddress = r.String("HEALTH_SERVER_ADDRESS") h.TargetAddress = r.String("HEALTH_TARGET_ADDRESS", reader.RetroKeys("HEALTH_ADDRESS_TO_PING")) - h.SuccessWait, err = r.Duration("HEALTH_SUCCESS_WAIT_DURATION") if err != nil { return err } - err = h.VPN.read(r) if err != nil { return fmt.Errorf("VPN health settings: %w", err) } - + h.DisableLoop, err = r.BoolPtr("HEALTH_SERVER_DISABLE_LOOP") + if err != nil { + return err + } return nil } diff --git a/internal/healthcheck/run.go b/internal/healthcheck/run.go index 5f7bb7fcb..9234e627a 100644 --- a/internal/healthcheck/run.go +++ b/internal/healthcheck/run.go @@ -11,11 +11,15 @@ func (s *Server) Run(ctx context.Context, done chan<- struct{}) { defer close(done) loopDone := make(chan struct{}) - go s.runHealthcheckLoop(ctx, loopDone) + if !*s.config.DisableLoop { + go s.runHealthcheckLoop(ctx, loopDone) + } else { + close(done) + } server := http.Server{ Addr: s.config.ServerAddress, - Handler: s.handler, + Handler: s.mux, ReadHeaderTimeout: s.config.ReadHeaderTimeout, ReadTimeout: s.config.ReadTimeout, } diff --git a/internal/healthcheck/server.go b/internal/healthcheck/server.go index c3a3a6be8..c94321cfa 100644 --- a/internal/healthcheck/server.go +++ b/internal/healthcheck/server.go @@ -3,6 +3,7 @@ package healthcheck import ( "context" "net" + "net/http" "github.com/qdm12/gluetun/internal/configuration/settings" "github.com/qdm12/gluetun/internal/models" @@ -14,12 +15,13 @@ type Server struct { dialer *net.Dialer config settings.Health vpn vpnHealth + mux *http.ServeMux } func NewServer(config settings.Health, logger Logger, vpnLoop StatusApplier, ) *Server { - return &Server{ + s := &Server{ logger: logger, handler: newHandler(), dialer: &net.Dialer{ @@ -32,7 +34,18 @@ func NewServer(config settings.Health, loop: vpnLoop, healthyWait: *config.VPN.Initial, }, + mux: http.NewServeMux(), } + s.mux.Handle("/", s.handler) + s.mux.HandleFunc("/check/", func(w http.ResponseWriter, r *http.Request) { + err := s.healthCheck(r.Context()) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + w.WriteHeader(http.StatusOK) + }) + return s } type StatusApplier interface {