From 966ff13db582eed174d718848d79512eae2d8442 Mon Sep 17 00:00:00 2001 From: "J.P. Neverwas" Date: Sat, 2 Oct 2021 01:08:09 -0700 Subject: [PATCH] Add health check --- .gitignore | 1 + Dockerfile | 4 ++ examples/irccat.json | 7 ++- healthcheck/healthcheck.go | 43 ++++++++++++++++++ httplistener/health.go | 44 ++++++++++++++++++ httplistener/health_test.go | 88 ++++++++++++++++++++++++++++++++++++ httplistener/httplistener.go | 6 +++ irc.go | 4 ++ main.go | 16 +++++++ util/timestamp.go | 53 ++++++++++++++++++++++ util/timestamp_test.go | 49 ++++++++++++++++++++ 11 files changed, 313 insertions(+), 2 deletions(-) create mode 100644 healthcheck/healthcheck.go create mode 100644 httplistener/health.go create mode 100644 httplistener/health_test.go create mode 100644 util/timestamp.go create mode 100644 util/timestamp_test.go diff --git a/.gitignore b/.gitignore index f164b68..071b778 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ irccat irccat.json +health.timestamp diff --git a/Dockerfile b/Dockerfile index b28f51b..f60e019 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,8 +21,12 @@ RUN CGO_ENABLED=0 go get -t -v ./... && go build -a . # Step two: copy over the binary and root certs FROM scratch COPY --from=build /go/bin/irccat /irccat +COPY --from=build /go/bin/healthcheck /healthcheck COPY --from=build /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt +# Docker-only (no OCI) +HEALTHCHECK --interval=60s --timeout=10s --start-period=600s CMD ["/healthcheck"] + EXPOSE 12345 EXPOSE 8045 diff --git a/examples/irccat.json b/examples/irccat.json index d4fa251..c716a54 100644 --- a/examples/irccat.json +++ b/examples/irccat.json @@ -17,7 +17,8 @@ "irccat": "#irccat-dev" } } - } + }, + "health_endpoint": "/healthz" }, "irc": { "server": "irc.example.com:6697", @@ -30,7 +31,9 @@ "sasl_login": "", "sasl_pass": "", "channels": ["#channel"], - "keys": {"#channel": "join_key"} + "keys": {"#channel": "join_key"}, + "health_file": "./health.timestamp", + "health_period": "5m" }, "commands": { "auth_channel": "#channel", diff --git a/healthcheck/healthcheck.go b/healthcheck/healthcheck.go new file mode 100644 index 0000000..76d3789 --- /dev/null +++ b/healthcheck/healthcheck.go @@ -0,0 +1,43 @@ +// Command healthcheck reads a file containing a UNIX UTC timestamp and exits +// nonzero after printing a message to stderr if the allotted time has elapsed +// since a value was last recorded. Config value irc.health_period specifies +// the max allotted time plus a small cushion for lag. It defaults to fifteen +// minutes, which is go-ircevent's default PING frequency (as of +// github.com/thoj/go-ircevent@v0.0.0-20210723090443-73e444401d64). Optional +// config item irc.health_file designates the path to the timestamp file. If +// unset, the program says so (to stderr) and exits zero. +package main + +import ( + "log" + "time" + + "github.com/irccloud/irccat/util" + "github.com/spf13/viper" +) + +var lagInterval = 10 * time.Second // for testing +var defaultPeriod = 15 * time.Minute + +func main() { + viper.SetConfigName("irccat") + viper.AddConfigPath("/etc") + viper.AddConfigPath(".") + viper.SetDefault("irc.health_period", defaultPeriod) + + if err := viper.ReadInConfig(); err != nil { + log.Fatalln("Error reading config file. Exiting.") + } + + healthFile := viper.GetString("irc.health_file") + if healthFile == "" { + log.Println("Config option irc.health_file unset; exiting.") + return + } + + freq := lagInterval + viper.GetDuration("irc.health_period") + + if err := util.CheckTimestamp(healthFile, freq); err != nil { + log.Fatalln(err.Error()) + } +} diff --git a/httplistener/health.go b/httplistener/health.go new file mode 100644 index 0000000..56b9c5a --- /dev/null +++ b/httplistener/health.go @@ -0,0 +1,44 @@ +package httplistener + +import ( + "fmt" + "net/http" + "time" + + "github.com/irccloud/irccat/util" + "github.com/spf13/viper" +) + +var LagInterval = 10 * time.Second // for testing +var DefaultPeriod = 15 * time.Minute + +// healthHandler returns non-2xx if the configured timeout has elapsed. +// +// This mainly exists to present an interface for supervisors relying on +// liveliness/readiness probes (e.g., for Kubernetes deployments). However, a +// conservative client could query it before sending a payload. See also +// /healthcheck in this same pkg. +func (hl *HTTPListener) healthHandler( + w http.ResponseWriter, + request *http.Request, +) { + if request.Method != "GET" { + http.NotFound(w, request) + return + } + healthFile := viper.GetString("irc.health_file") + if healthFile == "" { + http.NotFound(w, request) + return + } + viper.SetDefault("irc.health_period", DefaultPeriod) + + freq := LagInterval + viper.GetDuration("irc.health_period") + err := util.CheckTimestamp(healthFile, freq) + if err != nil { + log.Criticalf("%s", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + fmt.Fprintln(w, "OK") +} diff --git a/httplistener/health_test.go b/httplistener/health_test.go new file mode 100644 index 0000000..29c837c --- /dev/null +++ b/httplistener/health_test.go @@ -0,0 +1,88 @@ +package httplistener + +import ( + "bytes" + "context" + "fmt" + "io" + "path" + "testing" + "time" + + "net/http" + + "github.com/irccloud/irccat/util" + "github.com/juju/loggo" + "github.com/spf13/viper" +) + +var origLag = LagInterval + +var configFmt = `--- +irc: + health_file: %s + health_period: 1ms +http: + health_endpoint: /testing/healthz +` + +func startServer(t *testing.T) { + hl := HTTPListener{ + http: http.Server{Addr: "localhost:18045"}, + } + http.HandleFunc(viper.GetString("http.health_endpoint"), hl.healthHandler) + go hl.http.ListenAndServe() + t.Cleanup(func() {hl.http.Shutdown(context.Background())}) +} + +func getOne(t *testing.T) (*http.Response, string) { + res, err := http.Get("http://localhost:18045/testing/healthz") + if err != nil { + t.Error(err) + } + got, err := io.ReadAll(res.Body) + if err != nil { + t.Error(err) + } + err = res.Body.Close() + if err != nil { + t.Error(err) + } + return res, string(got) +} + +func TestHealthHandler(t *testing.T) { + writer, err := loggo.RemoveWriter("default") + if err != nil { + t.Error(err) + } + t.Cleanup(func() {loggo.DefaultContext().AddWriter("default", writer)}) + LagInterval = 0 + t.Cleanup(func() {LagInterval = origLag}) + dir := t.TempDir() + now := time.Now() + file := path.Join(dir, "timestamp") + if err := util.WriteTimestamp(file, now); err != nil { + t.Error(err) + } + viper.SetConfigType("yaml") + config := []byte(fmt.Sprintf(configFmt, file)) + viper.ReadConfig(bytes.NewBuffer(config)) + startServer(t) + time.Sleep(time.Millisecond) + // Fail + resp, got := getOne(t) + if resp.StatusCode != 500 { + t.Error("unexpected status", resp.Status) + } + t.Log(resp.Status, got) + // Success + viper.Set("irc.health_period", time.Second) + resp, got = getOne(t) + if resp.StatusCode != 200 { + t.Error("unexpected failure", resp.Status) + } + if string(got) != "OK\n" { + t.Error("unexpected output", string(got)) + } +} diff --git a/httplistener/httplistener.go b/httplistener/httplistener.go index cebe60a..4a0366a 100644 --- a/httplistener/httplistener.go +++ b/httplistener/httplistener.go @@ -45,6 +45,12 @@ func New(irc *irc.Connection) (*HTTPListener, error) { mux.HandleFunc("/prometheus", hl.prometheusHandler) } + if viper.IsSet("http.health_endpoint") { + ep := viper.GetString("http.health_endpoint") + log.Infof("Listening for HTTP GET requests at", ep) + mux.HandleFunc(ep, hl.healthHandler) + } + hl.http.Handler = mux if viper.GetBool("http.tls") { go hl.http.ListenAndServeTLS(viper.GetString("http.tls_cert"), viper.GetString("http.tls_key")) diff --git a/irc.go b/irc.go index 0f0e5da..6ceb663 100644 --- a/irc.go +++ b/irc.go @@ -17,6 +17,9 @@ func (i *IRCCat) connectIRC(debug bool) error { irccon.Timeout = time.Second * 15 irccon.RequestCaps = []string{"away-notify", "account-notify", "draft/message-tags-0.2"} irccon.UseTLS = viper.GetBool("irc.tls") + if viper.IsSet("irc.health_period") { + irccon.PingFreq = viper.GetDuration("irc.health_period") + } if viper.IsSet("irc.sasl_pass") && viper.GetString("irc.sasl_pass") != "" { if viper.IsSet("irc.sasl_login") && viper.GetString("irc.sasl_login") != "" { @@ -52,6 +55,7 @@ func (i *IRCCat) connectIRC(debug bool) error { irccon.AddCallback("QUIT", i.handleQuit) irccon.AddCallback("KILL", i.handleQuit) irccon.AddCallback("NICK", i.handleNick) + irccon.AddCallback("PONG", i.handlePong) return nil } diff --git a/main.go b/main.go index ce0e923..3e4ae2d 100644 --- a/main.go +++ b/main.go @@ -3,6 +3,8 @@ package main import ( "flag" "fmt" + "time" + "github.com/deckarep/golang-set" "github.com/fsnotify/fsnotify" "github.com/irccloud/irccat/httplistener" @@ -13,6 +15,7 @@ import ( "os" "os/signal" "syscall" + "github.com/irccloud/irccat/util" ) var log = loggo.GetLogger("main") @@ -106,3 +109,16 @@ func (i *IRCCat) handleConfigChange(e fsnotify.Event) { i.channels.Remove(channel) } } + +// handlePong writes the current time in UNIX seconds to a designated file. +// Opt in by specifying a file path for config item irc.health_file. +func (i *IRCCat) handlePong(e *irc.Event) { + healthFile := viper.GetString("irc.health_file") + if healthFile == "" { + return + } + err:= util.WriteTimestamp(healthFile, time.Now()) + if err != nil { + log.Criticalf("%s", err) + } +} diff --git a/util/timestamp.go b/util/timestamp.go new file mode 100644 index 0000000..d1ae8bc --- /dev/null +++ b/util/timestamp.go @@ -0,0 +1,53 @@ +package util + +import ( + "fmt" + "os" + "strconv" + "strings" + "time" +) + +func isExpired(lastStamp int64, period time.Duration) bool { + return time.Now().Sub(time.Unix(lastStamp, 0)) > period +} + +func getTimestamp(tsFile string) (int64, error) { + raw, err := os.ReadFile(tsFile) + if err != nil { + return 0, fmt.Errorf("Couldn't read timestamp file: %s", err) + } + s := strings.TrimRight(string(raw), "\n") + ts, err := strconv.ParseInt(s, 10, 64) + if err != nil { + // Parsing error already includes offending text + return 0, fmt.Errorf("Couldn't parse timestamp: %s", err) + } + return ts, nil +} + +// WriteTimestamp creates a timestamp file. +func WriteTimestamp(tsFile string, t time.Time) error { + s := fmt.Sprintf("%d\n", t.Unix()) + err := os.WriteFile(tsFile, []byte(s), 0666) + if err != nil { + return fmt.Errorf("Couldn't write to timestamp file: %s", err) + } + return nil +} + +// CheckTimestamp reads a timestamp file and returns an error if it's expired. +// +// The file should contain a decimal representation of a Unix timestamp and an +// optional LF newline. +func CheckTimestamp(tsFile string, period time.Duration) error { + ts, err := getTimestamp(tsFile) + if err != nil { + return err + } + if isExpired(ts, period) { + diff := time.Now().Sub(time.Unix(ts, 0).Add(period)) + return fmt.Errorf("Timeout exceeded by %s", diff.String()) + } + return nil +} diff --git a/util/timestamp_test.go b/util/timestamp_test.go new file mode 100644 index 0000000..dd50b7f --- /dev/null +++ b/util/timestamp_test.go @@ -0,0 +1,49 @@ +package util + +import ( + "os" + "path" + "testing" + "time" +) + +func TestIsExpired(t *testing.T) { + ts := time.Now().Unix() + if isExpired(ts, time.Second) { + t.Error("Shouldn't have expired") + } + if !isExpired(ts, time.Microsecond) { + t.Error("Should've expired") + } +} + +func TestCheckTimestamp(t *testing.T) { + dir := t.TempDir() + now := time.Now() + file := path.Join(dir, "timestamp") + WriteTimestamp(file, now) + if err := CheckTimestamp(file, time.Second); err != nil { + t.Error(err) + } + err := CheckTimestamp(file, time.Microsecond) + if err == nil { + t.Error("Did not raise timeout") + } + t.Log(err) + fake := path.Join(dir, "fake") + // File missing + err = CheckTimestamp(fake, time.Second) + if err == nil { + t.Error("Did not raise missing file error") + } + t.Log(err) + // Wrong format + if err := os.WriteFile(fake, []byte{':', '('}, 0600); err != nil { + t.Error(err) + } + err = CheckTimestamp(fake, time.Second) + if err == nil { + t.Error("Did not raise bad parse error") + } + t.Log(err) +}