Skip to content

Commit

Permalink
Rate limit log messages sent to honeycomb logger (#200)
Browse files Browse the repository at this point in the history
We have encountered a failure case in Refinery where logging overwhelms downstream shepherd. Adding configurable rate-limiting to the Honeycomb logger to prevent this.

* prevent overwhelming the downstream service, when getting a spike of
similar messages (e.g. the same error)
* the sampling strategy will rate-limit per [log level + log message] key, with a configurable throughput
* sampler is disabled by default, enabled via config

### Considerations
* I looked into adding an integration test. I could see the outbound events traffic by injecting into the http transport, but the sampling is determined by events/sec so it was a spaghetti-o of thread.sleeps and log messages.
  * Realistically, we'd need to inject the Honeycomb WriterSender as the logger transmission (similarly to the app integration test), but that felt like a larger change than I was up for this round.
* I didn't update anything in the README, because that seemed too high level, but I will update the public config docs as a follow-up: https://docs.honeycomb.io/working-with-your-data/best-practices/refinery/configuration/
  • Loading branch information
vreynolds authored Dec 28, 2020
1 parent fcd79cd commit 0ed99dc
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 8 deletions.
91 changes: 91 additions & 0 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -460,3 +460,94 @@ func TestDefaultSampler(t *testing.T) {

assert.IsType(t, &DeterministicSamplerConfig{}, s)
}

func TestHoneycombLoggerConfig(t *testing.T) {
tmpDir, err := ioutil.TempDir("", "")
assert.NoError(t, err)
defer os.RemoveAll(tmpDir)

rulesFile, err := ioutil.TempFile(tmpDir, "*.toml")
assert.NoError(t, err)

configFile, err := ioutil.TempFile(tmpDir, "*.toml")
assert.NoError(t, err)

dummy := []byte(`
[InMemCollector]
CacheCapacity=1000
[HoneycombMetrics]
MetricsHoneycombAPI="http://honeycomb.io"
MetricsAPIKey="1234"
MetricsDataset="testDatasetName"
MetricsReportingInterval=3
[HoneycombLogger]
LoggerHoneycombAPI="http://honeycomb.io"
LoggerAPIKey="1234"
LoggerDataset="loggerDataset"
LoggerSamplerEnabled=true
LoggerSamplerThroughput=10
`)

_, err = configFile.Write(dummy)
assert.NoError(t, err)
configFile.Close()

c, err := NewConfig(configFile.Name(), rulesFile.Name(), func(err error) {})

assert.NoError(t, err)

loggerConfig, err := c.GetHoneycombLoggerConfig()

assert.NoError(t, err)

assert.Equal(t, "http://honeycomb.io", loggerConfig.LoggerHoneycombAPI)
assert.Equal(t, "1234", loggerConfig.LoggerAPIKey)
assert.Equal(t, "loggerDataset", loggerConfig.LoggerDataset)
assert.Equal(t, true, loggerConfig.LoggerSamplerEnabled)
assert.Equal(t, 10, loggerConfig.LoggerSamplerThroughput)
}

func TestHoneycombLoggerConfigDefaults(t *testing.T) {
tmpDir, err := ioutil.TempDir("", "")
assert.NoError(t, err)
defer os.RemoveAll(tmpDir)

rulesFile, err := ioutil.TempFile(tmpDir, "*.toml")
assert.NoError(t, err)

configFile, err := ioutil.TempFile(tmpDir, "*.toml")
assert.NoError(t, err)

dummy := []byte(`
[InMemCollector]
CacheCapacity=1000
[HoneycombMetrics]
MetricsHoneycombAPI="http://honeycomb.io"
MetricsAPIKey="1234"
MetricsDataset="testDatasetName"
MetricsReportingInterval=3
[HoneycombLogger]
LoggerHoneycombAPI="http://honeycomb.io"
LoggerAPIKey="1234"
LoggerDataset="loggerDataset"
`)

_, err = configFile.Write(dummy)
assert.NoError(t, err)
configFile.Close()

c, err := NewConfig(configFile.Name(), rulesFile.Name(), func(err error) {})

assert.NoError(t, err)

loggerConfig, err := c.GetHoneycombLoggerConfig()

assert.NoError(t, err)

assert.Equal(t, false, loggerConfig.LoggerSamplerEnabled)
assert.Equal(t, 5, loggerConfig.LoggerSamplerThroughput)
}
16 changes: 12 additions & 4 deletions config/file_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,10 +83,12 @@ type InMemoryCollectorCacheCapacity struct {
type HoneycombLevel int

type HoneycombLoggerConfig struct {
LoggerHoneycombAPI string `validate:"required,url"`
LoggerAPIKey string `validate:"required"`
LoggerDataset string `validate:"required"`
Level HoneycombLevel
LoggerHoneycombAPI string `validate:"required,url"`
LoggerAPIKey string `validate:"required"`
LoggerDataset string `validate:"required"`
LoggerSamplerEnabled bool
LoggerSamplerThroughput int
Level HoneycombLevel
}

type PrometheusMetricsConfig struct {
Expand Down Expand Up @@ -131,6 +133,8 @@ func NewConfig(config, rules string, errorCallback func(error)) (Config, error)
c.SetDefault("UpstreamBufferSize", libhoney.DefaultPendingWorkCapacity)
c.SetDefault("PeerBufferSize", libhoney.DefaultPendingWorkCapacity)
c.SetDefault("MaxAlloc", uint64(0))
c.SetDefault("HoneycombLogger.LoggerSamplerEnabled", false)
c.SetDefault("HoneycombLogger.LoggerSamplerThroughput", 5)

c.SetConfigFile(config)
err := c.ReadInConfig()
Expand Down Expand Up @@ -452,6 +456,10 @@ func (f *fileConfig) GetHoneycombLoggerConfig() (HoneycombLoggerConfig, error) {
return *hlConfig, err
}

// https://github.com/spf13/viper/issues/747
hlConfig.LoggerSamplerEnabled = f.config.GetBool("HoneycombLogger.LoggerSamplerEnabled")
hlConfig.LoggerSamplerThroughput = f.config.GetInt("HoneycombLogger.LoggerSamplerThroughput")

v := validator.New()
err = v.Struct(hlConfig)
if err != nil {
Expand Down
14 changes: 11 additions & 3 deletions config_complete.toml
Original file line number Diff line number Diff line change
Expand Up @@ -199,18 +199,26 @@ MaxAlloc = 0

# LoggerHoneycombAPI is the URL for the upstream Honeycomb API.
# Eligible for live reload.
LoggerHoneycombAPI = "https://api.honeycomb.io"
LoggerHoneycombAPI = "https://api.honeycomb.io"

# LoggerAPIKey is the API key to use to send log events to the Honeycomb logging
# dataset. This is separate from the APIKeys used to authenticate regular
# traffic.
# Eligible for live reload.
LoggerAPIKey = "abcd1234"
LoggerAPIKey = "abcd1234"

# LoggerDataset is the name of the dataset to which to send Refinery logs
# Eligible for live reload.
LoggerDataset = "Refinery Logs"
LoggerDataset = "Refinery Logs"

# LoggerSamplerEnabled enables a PerKeyThroughput dynamic sampler for log messages.
# This will sample log messages based on [log level:message] key on a per second throughput basis.
# Not eligible for live reload.
LoggerSamplerEnabled = true

# LoggerSamplerThroughput is the per key per second throughput for the log message dynamic sampler.
# Not eligible for live reload.
LoggerSamplerThroughput = 10

#######################
## Honeycomb Metrics ##
Expand Down
31 changes: 30 additions & 1 deletion logger/honeycomb.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"strings"
"time"

"github.com/honeycombio/dynsampler-go"
libhoney "github.com/honeycombio/libhoney-go"
"github.com/honeycombio/libhoney-go/transmission"

Expand All @@ -24,11 +25,13 @@ type HoneycombLogger struct {
loggerConfig config.HoneycombLoggerConfig
libhClient *libhoney.Client
builder *libhoney.Builder
sampler dynsampler.Sampler
}

type HoneycombEntry struct {
loggerConfig config.HoneycombLoggerConfig
builder *libhoney.Builder
sampler dynsampler.Sampler
}

const (
Expand Down Expand Up @@ -66,6 +69,18 @@ func (h *HoneycombLogger) Start() error {
}
}

if loggerConfig.LoggerSamplerEnabled {
h.sampler = &dynsampler.PerKeyThroughput{
ClearFrequencySec: 10,
PerKeyThroughputPerSec: loggerConfig.LoggerSamplerThroughput,
MaxKeys: 1000,
}
err := h.sampler.Start()
if err != nil {
return err
}
}

libhClientConfig := libhoney.ClientConfig{
APIHost: h.loggerConfig.LoggerHoneycombAPI,
APIKey: h.loggerConfig.LoggerAPIKey,
Expand Down Expand Up @@ -106,6 +121,8 @@ func (h *HoneycombLogger) readResponses() {
respString := fmt.Sprintf("Response: status: %d, duration: %dms", resp.StatusCode, resp.Duration)
// read response, log if there's an error
switch {
case resp.StatusCode == 0: // log message dropped due to sampling
continue
case resp.Err != nil:
fmt.Fprintf(os.Stderr, "Honeycomb Logger got an error back from Honeycomb while trying to send a log line: %s, error: %s, body: %s\n", respString, resp.Err.Error(), string(resp.Body))
case resp.StatusCode > 202:
Expand Down Expand Up @@ -147,6 +164,7 @@ func (h *HoneycombLogger) Debug() Entry {
ev := &HoneycombEntry{
loggerConfig: h.loggerConfig,
builder: h.builder.Clone(),
sampler: h.sampler,
}
ev.builder.AddField("level", "debug")

Expand All @@ -161,6 +179,7 @@ func (h *HoneycombLogger) Info() Entry {
ev := &HoneycombEntry{
loggerConfig: h.loggerConfig,
builder: h.builder.Clone(),
sampler: h.sampler,
}
ev.builder.AddField("level", "info")

Expand All @@ -175,6 +194,7 @@ func (h *HoneycombLogger) Error() Entry {
ev := &HoneycombEntry{
loggerConfig: h.loggerConfig,
builder: h.builder.Clone(),
sampler: h.sampler,
}
ev.builder.AddField("level", "error")

Expand Down Expand Up @@ -218,10 +238,19 @@ func (h *HoneycombEntry) WithFields(fields map[string]interface{}) Entry {

func (h *HoneycombEntry) Logf(f string, args ...interface{}) {
ev := h.builder.NewEvent()
ev.AddField("msg", fmt.Sprintf(f, args...))
msg := fmt.Sprintf(f, args...)
ev.AddField("msg", msg)
ev.Metadata = map[string]string{
"api_host": ev.APIHost,
"dataset": ev.Dataset,
}
level, ok := ev.Fields()["level"].(string)
if !ok {
level = "unknown"
}
if h.sampler != nil {
rate := h.sampler.GetSampleRate(fmt.Sprintf(`%s:%s`, level, msg))
ev.SampleRate = uint(rate)
}
ev.Send()
}

0 comments on commit 0ed99dc

Please sign in to comment.