Skip to content

Commit

Permalink
algod: Add static EnableTelemetry retry (algorand#6183)
Browse files Browse the repository at this point in the history
  • Loading branch information
urtho authored Feb 24, 2025
1 parent 113b0c3 commit fd2f520
Show file tree
Hide file tree
Showing 8 changed files with 56 additions and 21 deletions.
27 changes: 26 additions & 1 deletion cmd/algod/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package main

import (
"context"
"encoding/json"
"flag"
"fmt"
Expand Down Expand Up @@ -59,6 +60,11 @@ var sessionGUID = flag.String("s", "", "Telemetry Session GUID to use")
var telemetryOverride = flag.String("t", "", `Override telemetry setting if supported (Use "true", "false", "0" or "1")`)
var seed = flag.String("seed", "", "input to math/rand.Seed()")

const (
defaultStaticTelemetryStartupTimeout = 5 * time.Second
defaultStaticTelemetryBGDialRetry = 1 * time.Minute
)

func main() {
flag.Parse()
exitCode := run()
Expand Down Expand Up @@ -232,9 +238,28 @@ func run() int {
telemetryConfig.SessionGUID = *sessionGUID
}
}
err = log.EnableTelemetry(telemetryConfig)
// Try to enable remote telemetry now when URI is defined. Skip for DNS based telemetry.
ctx, telemetryCancelFn := context.WithTimeout(context.Background(), defaultStaticTelemetryStartupTimeout)
err = log.EnableTelemetryContext(ctx, telemetryConfig)
telemetryCancelFn()
if err != nil {
fmt.Fprintln(os.Stdout, "error creating telemetry hook", err)

// Remote telemetry init loop
go func() {
for {
time.Sleep(defaultStaticTelemetryBGDialRetry)
// Try to enable remote telemetry now when URI is defined. Skip for DNS based telemetry.
err := log.EnableTelemetryContext(context.Background(), telemetryConfig)
// Error occurs only if URI is defined and we need to retry later
if err == nil {
// Remote telemetry enabled or empty static URI, stop retrying
return
}
fmt.Fprintln(os.Stdout, "error creating telemetry hook", err)
// Try to reenable every minute
}
}()
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion cmd/algoh/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package main

import (
"context"
"flag"
"fmt"
"os"
Expand Down Expand Up @@ -307,7 +308,7 @@ func initTelemetry(genesis bookkeeping.Genesis, log logging.Logger, dataDirector
telemetryConfig.Enable = logging.TelemetryOverride(*telemetryOverride, &telemetryConfig)

if telemetryConfig.Enable {
err = log.EnableTelemetry(telemetryConfig)
err = log.EnableTelemetryContext(context.Background(), telemetryConfig)
if err != nil {
fmt.Fprintln(os.Stdout, "error creating telemetry hook", err)
return
Expand Down
3 changes: 2 additions & 1 deletion data/pools/transactionPool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ package pools
import (
"bufio"
"bytes"
"context"
"fmt"
"math/rand"
"os"
Expand Down Expand Up @@ -1438,7 +1439,7 @@ func TestStateProofLogging(t *testing.T) {
// Set the logging to capture the telemetry Metrics into logging
logger := logging.TestingLog(t)
logger.SetLevel(logging.Info)
logger.EnableTelemetry(logging.TelemetryConfig{Enable: true, SendToLog: true})
logger.EnableTelemetryContext(context.Background(), logging.TelemetryConfig{Enable: true, SendToLog: true})
var buf bytes.Buffer
logger.SetOutput(&buf)

Expand Down
7 changes: 4 additions & 3 deletions logging/log.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ logger.Info("New wallet was created")
package logging

import (
"context"
"io"
"runtime"
"runtime/debug"
Expand Down Expand Up @@ -154,7 +155,7 @@ type Logger interface {
// Adds a hook to the logger
AddHook(hook logrus.Hook)

EnableTelemetry(cfg TelemetryConfig) error
EnableTelemetryContext(ctx context.Context, cfg TelemetryConfig) error
UpdateTelemetryURI(uri string) error
GetTelemetryEnabled() bool
GetTelemetryUploadingEnabled() bool
Expand Down Expand Up @@ -389,11 +390,11 @@ func RegisterExitHandler(handler func()) {
logrus.RegisterExitHandler(handler)
}

func (l logger) EnableTelemetry(cfg TelemetryConfig) (err error) {
func (l logger) EnableTelemetryContext(ctx context.Context, cfg TelemetryConfig) (err error) {
if l.loggerState.telemetry != nil || (!cfg.Enable && !cfg.SendToLog) {
return nil
}
return EnableTelemetry(cfg, &l)
return EnableTelemetryContext(ctx, cfg, &l)
}

func (l logger) UpdateTelemetryURI(uri string) (err error) {
Expand Down
9 changes: 5 additions & 4 deletions logging/telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package logging

import (
"context"
"fmt"
"io"
"os"
Expand All @@ -36,8 +37,8 @@ const telemetrySeparator = "/"
const logBufferDepth = 2

// EnableTelemetry configures and enables telemetry based on the config provided
func EnableTelemetry(cfg TelemetryConfig, l *logger) (err error) {
telemetry, err := makeTelemetryState(cfg, createElasticHook)
func EnableTelemetryContext(ctx context.Context, cfg TelemetryConfig, l *logger) (err error) {
telemetry, err := makeTelemetryStateContext(ctx, cfg, createElasticHookContext)
if err != nil {
return
}
Expand Down Expand Up @@ -70,14 +71,14 @@ func makeLevels(min logrus.Level) []logrus.Level {
return levels
}

func makeTelemetryState(cfg TelemetryConfig, hookFactory hookFactory) (*telemetryState, error) {
func makeTelemetryStateContext(ctx context.Context, cfg TelemetryConfig, hookFactory hookFactory) (*telemetryState, error) {
telemetry := &telemetryState{}
telemetry.history = createLogBuffer(logBufferDepth)
if cfg.Enable {
if cfg.SessionGUID == "" {
cfg.SessionGUID = uuid.New()
}
hook, err := createTelemetryHook(cfg, telemetry.history, hookFactory)
hook, err := createTelemetryHookContext(ctx, cfg, telemetry.history, hookFactory)
if err != nil {
return nil, err
}
Expand Down
6 changes: 4 additions & 2 deletions logging/telemetryCommon.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@
package logging

import (
"context"
"sync"

"github.com/algorand/go-deadlock"
"github.com/sirupsen/logrus"
"sync"
)

type telemetryHook interface {
Expand Down Expand Up @@ -81,4 +83,4 @@ type asyncTelemetryHook struct {
// A dummy noop type to get rid of checks like telemetry.hook != nil
type dummyHook struct{}

type hookFactory func(cfg TelemetryConfig) (logrus.Hook, error)
type hookFactory func(ctx context.Context, cfg TelemetryConfig) (logrus.Hook, error)
10 changes: 6 additions & 4 deletions logging/telemetry_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@
package logging

import (
"context"
"encoding/json"
"fmt"
"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"
"os"
"testing"

"github.com/sirupsen/logrus"
"github.com/stretchr/testify/require"

"github.com/algorand/go-deadlock"

"github.com/algorand/go-algorand/config"
Expand Down Expand Up @@ -84,7 +86,7 @@ func makeTelemetryTestFixtureWithConfig(minLevel logrus.Level, cfg *TelemetryCon
f.l = Base().(logger)
f.l.SetLevel(Debug) // Ensure logging doesn't filter anything out

f.telem, _ = makeTelemetryState(lcfg, func(cfg TelemetryConfig) (hook logrus.Hook, err error) {
f.telem, _ = makeTelemetryStateContext(context.Background(), lcfg, func(ctx context.Context, cfg TelemetryConfig) (hook logrus.Hook, err error) {
return &f.hook, nil
})
f.l.loggerState.telemetry = f.telem
Expand Down Expand Up @@ -138,7 +140,7 @@ func TestCreateHookError(t *testing.T) {

cfg := createTelemetryConfig()
cfg.Enable = true
telem, err := makeTelemetryState(cfg, func(cfg TelemetryConfig) (hook logrus.Hook, err error) {
telem, err := makeTelemetryStateContext(context.Background(), cfg, func(ctx context.Context, cfg TelemetryConfig) (hook logrus.Hook, err error) {
return nil, fmt.Errorf("failed")
})

Expand Down
12 changes: 7 additions & 5 deletions logging/telemetryhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package logging

import (
"context"
"fmt"

"github.com/olivere/elastic"
Expand Down Expand Up @@ -228,14 +229,14 @@ func (el elasticClientLogger) Printf(format string, v ...interface{}) {
}
}

func createElasticHook(cfg TelemetryConfig) (hook logrus.Hook, err error) {
func createElasticHookContext(ctx context.Context, cfg TelemetryConfig) (hook logrus.Hook, err error) {
// Returning an error here causes issues... need the hooks to be created even if the elastic hook fails so that
// things can recover later.
if cfg.URI == "" {
return nil, nil
}

client, err := elastic.NewClient(elastic.SetURL(cfg.URI),
client, err := elastic.DialContext(ctx, elastic.SetURL(cfg.URI),
elastic.SetBasicAuth(cfg.UserName, cfg.Password),
elastic.SetSniff(false),
elastic.SetGzip(true),
Expand All @@ -257,12 +258,12 @@ func createElasticHook(cfg TelemetryConfig) (hook logrus.Hook, err error) {
}

// createTelemetryHook creates the Telemetry log hook, or returns nil if remote logging is not enabled
func createTelemetryHook(cfg TelemetryConfig, history *logBuffer, hookFactory hookFactory) (hook logrus.Hook, err error) {
func createTelemetryHookContext(ctx context.Context, cfg TelemetryConfig, history *logBuffer, hookFactory hookFactory) (hook logrus.Hook, err error) {
if !cfg.Enable {
return nil, fmt.Errorf("createTelemetryHook called when telemetry not enabled")
}

hook, err = hookFactory(cfg)
hook, err = hookFactory(ctx, cfg)

if err != nil {
return nil, err
Expand Down Expand Up @@ -290,7 +291,8 @@ func (hook *asyncTelemetryHook) UpdateHookURI(uri string) (err error) {
copy := tfh.telemetryConfig
copy.URI = uri
var newHook logrus.Hook
newHook, err = tfh.factory(copy)

newHook, err = tfh.factory(context.Background(), copy)

if err == nil && newHook != nil {
tfh.wrappedHook = newHook
Expand Down

0 comments on commit fd2f520

Please sign in to comment.