Skip to content

Commit

Permalink
wip: dispatch alerts with threads
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-karan committed Feb 18, 2022
1 parent 6e4cbb1 commit 9271b28
Show file tree
Hide file tree
Showing 9 changed files with 212 additions and 21 deletions.
2 changes: 2 additions & 0 deletions cmd/init.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,13 @@ func initProviders(ko *koanf.Koanf, lo *logrus.Logger) []prvs.Provider {
case "google_chat":
gchat, err := prvs.NewGoogleChat(
prvs.GoogleChatOpts{
Log: lo,
Timeout: ko.MustDuration(fmt.Sprintf("%s.timeout", cfgKey)),
MaxIdleConn: ko.MustInt(fmt.Sprintf("%s.max_idle_conns", cfgKey)),
ProxyURL: ko.String(fmt.Sprintf("%s.proxy_url", cfgKey)),
Endpoint: ko.String(fmt.Sprintf("%s.endpoint", cfgKey)),
Room: name,
Template: ko.String(fmt.Sprintf("%s.template", cfgKey)),
},
)
if err != nil {
Expand Down
10 changes: 6 additions & 4 deletions config.sample.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,17 @@ server_timeout = "5s"
enable_request_logs = true

[providers.prod_alerts]
endpoint = "https://chat.googleapis.com/v1/spaces/xxx/messages?key=abc-xyz&token=token-unique-key%3D"
endpoint = "https://chat.googleapis.com/v1/spaces/xxx/messages?key=key&token=token"
type = "google_chat"
max_idle_conns = 50
timeout = "7s"
proxy_url = "http://internal-squid-proxy.com:3128"
# proxy_url = "http://internal-squid-proxy.com:3128"
template = "static/message.tmpl"

[providers.dev_alerts]
endpoint = "https://chat.googleapis.com/v1/spaces/xxx/messages?key=abc-xyz&token=token-unique-key%3D"
endpoint = "https://chat.googleapis.com/v1/spaces/xxx/messages?key=key&token=token"
type = "google_chat"
max_idle_conns = 50
timeout = "7s"
proxy_url = "http://internal-squid-proxy.com:3128"
# proxy_url = "http://internal-squid-proxy.com:3128"
template = "static/message.tmpl"
2 changes: 1 addition & 1 deletion dev/alertmanager/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ route:
group_wait: 15s
group_interval: 30s
repeat_interval: 1m
group_by: ['room']
group_by: ['room', 'alertName']

receivers:
- name: 'calert'
Expand Down
4 changes: 2 additions & 2 deletions dev/prometheus/alert.rules
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ groups:
for: 10s
labels:
severity: deadman
room: dev-ops-alerts
room: dev_alerts
annotations:
title: "This is a dummy alert"
description: This is a DeadMansSwitch meant to ensure that the entire Alerting
Expand All @@ -19,7 +19,7 @@ groups:
for: 10s
labels:
severity: deadman
room: dev-ops-alerts
room: prod_alerts
annotations:
title: "This is a dummy alert"
description: This is a DeadMansSwitch meant to ensure that the entire Alerting
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ module github.com/mr-karan/calert

require (
github.com/go-chi/chi v1.5.4
github.com/gofrs/uuid v4.0.0+incompatible
github.com/knadh/koanf v1.4.0
github.com/prometheus/alertmanager v0.23.0
github.com/sirupsen/logrus v1.6.0
Expand Down
1 change: 1 addition & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,7 @@ github.com/gobuffalo/packd v0.1.0/go.mod h1:M2Juc+hhDXf/PnmBANFCqx4DM3wRbgDvnVWe
github.com/gobuffalo/packr/v2 v2.0.9/go.mod h1:emmyGweYTm6Kdper+iywB6YK5YzuKchGtJQZ0Odn4pQ=
github.com/gobuffalo/packr/v2 v2.2.0/go.mod h1:CaAwI0GPIAv+5wKLtv8Afwl+Cm78K/I/VCm/3ptBN+0=
github.com/gobuffalo/syncx v0.0.0-20190224160051-33c29581e754/go.mod h1:HhnNqWY95UYwwW3uSASeV7vtgYkT2t16hJgV3AEPUpw=
github.com/gofrs/uuid v4.0.0+incompatible h1:1SD/1F5pU8p29ybwgQSwpQk+mwdRrXCYuPhW6m+TnJw=
github.com/gofrs/uuid v4.0.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
Expand Down
10 changes: 5 additions & 5 deletions internal/notifier/notifier.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
package notifier

import (
"fmt"

"github.com/mr-karan/calert/internal/providers"
alertmgrtmpl "github.com/prometheus/alertmanager/template"
"github.com/sirupsen/logrus"
Expand Down Expand Up @@ -49,9 +47,11 @@ func (n *Notifier) Dispatch(alerts []alertmgrtmpl.Alert) error {
// For each room, dispatch the alert based on their provider.
for k, v := range alertsByRoom {
// Do a lookup for the provider by the room name and push the alerts.
// TODO: Check if the key is here.
fmt.Println(k, v)
// n.providers[k].Push(v)
if _, ok := n.providers[k]; !ok {
n.lo.WithField("room", k).Warn("no provider available for room")
continue
}
n.providers[k].Push(v)
}
return nil
}
199 changes: 192 additions & 7 deletions internal/providers/google_chat.go
Original file line number Diff line number Diff line change
@@ -1,27 +1,103 @@
package providers

import (
"bytes"
"encoding/json"
"errors"
"fmt"
"html/template"
"net/http"
"net/url"
"strings"
"sync"
"time"

"github.com/gofrs/uuid"
alertmgrtmpl "github.com/prometheus/alertmanager/template"
"github.com/sirupsen/logrus"
)

const (
maxMsgSize = 4096
)

// ActiveAlerts represents a map of alerts unique fingerprint hash
// with their details.
// We store this map to use "threading" in Google Chat.
// Alertmanager doesn't send a unique ID either, so we need to prune
// the alerts based on a TTL from a config and hence we need a lock here.
type ActiveAlerts struct {
sync.RWMutex
alerts map[string]AlertDetails
}

type AlertDetails struct {
StartsAt time.Time
UUID uuid.UUID
}

type ChatMessage struct {
Text string `json:"text"`
}

// add adds an alert to the active alerts map.
func (d *ActiveAlerts) add(a alertmgrtmpl.Alert) error {
d.Lock()
defer d.Unlock()

// Create a UUID for the alert. This UUID is
// sent as a `threadKey` param in G-Chat API.
// Set UUID for the alert.
uid, err := uuid.NewV4()
if err != nil {
return err
}

// Add the alert metadata to the map.
d.alerts[a.Fingerprint] = AlertDetails{
UUID: uid,
StartsAt: a.StartsAt,
}

return nil
}

// remove removes the alert from the active alerts map.
func (d *ActiveAlerts) remove(fingerprint string) {
d.Lock()
defer d.Unlock()
delete(d.alerts, fingerprint)
}

// loookup retrievs the UUID for the alert based on the fingerprint.
func (d *ActiveAlerts) loookup(fingerprint string) string {
d.Lock()
defer d.Unlock()
// Do a lookup for the provider by the room name and push the alerts.
if _, ok := d.alerts[fingerprint]; !ok {
return ""
}
return d.alerts[fingerprint].UUID.String()
}

// GoogleChatManager represents the various methods for interacting with Google Chat.
type GoogleChatManager struct {
endpoint string
room string
client *http.Client
lo *logrus.Logger
activeAlerts ActiveAlerts
endpoint string
room string
client *http.Client
msgTmpl *template.Template
}

type GoogleChatOpts struct {
Log *logrus.Logger
MaxIdleConn int
Timeout time.Duration
ProxyURL string
Endpoint string
Room string
Template string
}

// NewGoogleChat initializes a Google Chat provider object.
Expand Down Expand Up @@ -53,16 +129,58 @@ func NewGoogleChat(opts GoogleChatOpts) (*GoogleChatManager, error) {
Transport: transport,
}

// Initialise the map of active alerts.
alerts := make(map[string]AlertDetails, 0)

a := ActiveAlerts{
alerts: alerts,
}

// common funcs used in template
templateFuncMap := template.FuncMap{
"Title": strings.Title,
"toUpper": strings.ToUpper,
"Contains": strings.Contains,
}
// read template file
tmpl, err := template.New("message.tmpl").Funcs(templateFuncMap).ParseFiles(opts.Template)
if err != nil {
return nil, err
}

return &GoogleChatManager{
client: client,
endpoint: opts.Endpoint,
room: opts.Room,
lo: opts.Log,
client: client,
endpoint: opts.Endpoint,
room: opts.Room,
activeAlerts: a,
msgTmpl: tmpl,
}, nil
}

// Push sends out events to an HTTP Endpoint.
func (m *GoogleChatManager) Push(alerts []alertmgrtmpl.Alert) error {
fmt.Println("got alerts", len(alerts))
m.lo.WithField("count", len(alerts)).Info("dispatching alerts to google chat")
// For each alert, lookup the UUID and send the alert.
for _, a := range alerts {
if m.activeAlerts.loookup(a.Fingerprint) == "" {
m.activeAlerts.add(a)
}
msgs, err := m.prepareMessage(a)
if err != nil {
m.lo.WithError(err).Error("error preparing message")
continue
}
// If message is split in multiple parts
// due to size limit, send as individual HTTP requests.
for _, msg := range msgs {
err := m.sendMessage(msg, m.activeAlerts.alerts[a.Fingerprint].UUID.String())
if err != nil {
m.lo.WithError(err).Error("error sending message")
continue
}
}
}
return nil
}

Expand All @@ -75,3 +193,70 @@ func (m *GoogleChatManager) GetRoom() string {
func (m *GoogleChatManager) ID() string {
return "google_chat"
}

func (m *GoogleChatManager) prepareMessage(alert alertmgrtmpl.Alert) ([]ChatMessage, error) {
var (
str strings.Builder
to bytes.Buffer
msg ChatMessage
)
messages := make([]ChatMessage, 0)

err := m.msgTmpl.Execute(&to, alert)
if err != nil {
m.lo.WithError(err).Error("Error parsing values in template")
return messages, err
}
if (len(str.String()) + len(to.String())) >= maxMsgSize {
msg.Text = str.String()
messages = append(messages, msg)
str.Reset()
}
str.WriteString(to.String())
str.WriteString("\n")

// prepare request payload for Google chat webhook endpoint
msg.Text = str.String()
messages = append(messages, msg)

return messages, nil
}

// PushNotification pushes out a notification to Google Chat Room.
func (m *GoogleChatManager) sendMessage(msg ChatMessage, threadKey string) error {
out, err := json.Marshal(msg)
if err != nil {
return err
}
// Set threadkey in the URL
u, err := url.Parse(m.endpoint)
if err != nil {
return err
}
q := u.Query()
q.Set("threadKey", threadKey)
u.RawQuery = q.Encode()
endpoint := u.String()

// prepare POST request to webhook endpoint.
req, err := http.NewRequest("POST", endpoint, bytes.NewBuffer(out))
if err != nil {
return err
}
req.Header.Set("Content-Type", "application/json")

// send the request
resp, err := m.client.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()

// if response is not 200 log error response from gchat
if resp.StatusCode != http.StatusOK {
m.lo.WithField("status", resp.StatusCode).Error("Non OK HTTP Response received from Google Chat Webhook endpoint")
return errors.New("non ok response from gchat")
}

return nil
}
4 changes: 2 additions & 2 deletions static/message.tmpl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
*{{ .Labels.alertname | Title }} - {{.Status | Title }} ({{.Labels.severity | toUpper }})*
*({{.Labels.severity | toUpper }}) {{ .Labels.alertname | Title }} - {{.Status | Title }}*
{{ range .Annotations.SortedPairs -}}
{{ .Name }}: {{ .Value}}
{{ .Name | Title }}: {{ .Value}}
{{ end -}}

0 comments on commit 9271b28

Please sign in to comment.