diff --git a/agent/agent_configuration.go b/agent/agent_configuration.go index 5381d24c4e..171a231d32 100644 --- a/agent/agent_configuration.go +++ b/agent/agent_configuration.go @@ -57,4 +57,5 @@ type AgentConfiguration struct { AcquireJob string TracingBackend string TracingServiceName string + TraceLogGroups bool } diff --git a/agent/job_runner.go b/agent/job_runner.go index 2731afcf5e..eb851edc99 100644 --- a/agent/job_runner.go +++ b/agent/job_runner.go @@ -523,6 +523,7 @@ func (r *JobRunner) createEnvironment() ([]string, error) { if r.conf.AgentConfiguration.TracingBackend != "" { env["BUILDKITE_TRACING_BACKEND"] = r.conf.AgentConfiguration.TracingBackend env["BUILDKITE_TRACING_SERVICE_NAME"] = r.conf.AgentConfiguration.TracingServiceName + env["BUILDKITE_TRACE_LOG_GROUPS"] = fmt.Sprintf("%t", r.conf.AgentConfiguration.TraceLogGroups) } // see documentation for BuildkiteMessageMax diff --git a/clicommand/agent_start.go b/clicommand/agent_start.go index 90e90bf34d..b0c14c0f4a 100644 --- a/clicommand/agent_start.go +++ b/clicommand/agent_start.go @@ -142,6 +142,7 @@ type AgentStartConfig struct { MetricsDatadogDistributions bool `cli:"metrics-datadog-distributions"` TracingBackend string `cli:"tracing-backend"` TracingServiceName string `cli:"tracing-service-name"` + TraceLogGroups bool `cli:"trace-log-groups"` // Global flags Debug bool `cli:"debug"` @@ -582,6 +583,11 @@ var AgentStartCommand = cli.Command{ EnvVar: "BUILDKITE_TRACING_SERVICE_NAME", Value: "buildkite-agent", }, + cli.BoolFlag{ + Name: "trace-log-groups", + Usage: "Automatically creates tracing spans for log groups when tracing is enabled.", + EnvVar: "BUILDKITE_TRACE_LOG_GROUPS", + }, cli.StringFlag{ Name: "job-verification-key-path", Usage: "Path to a file containing a verification key. Passing this flag enables job verification. For hmac-sha256, the raw file content is used as the shared key", @@ -800,9 +806,13 @@ var AgentStartCommand = cli.Command{ }) // Sense check supported tracing backends, we don't want bootstrapped jobs to silently have no tracing - if _, has := tracetools.ValidTracingBackends[cfg.TracingBackend]; !has { + _, tracingEnabled := tracetools.ValidTracingBackends[cfg.TracingBackend] + if !tracingEnabled { l.Fatal("The given tracing backend %q is not supported. Valid backends are: %q", cfg.TracingBackend, maps.Keys(tracetools.ValidTracingBackends)) } + if !tracingEnabled && cfg.TraceLogGroups { + l.Fatal("Log group tracing cannot be enabled without a tracing backend.") + } if experiments.IsEnabled(experiments.AgentAPI) { shutdown := runAgentAPI(ctx, l, cfg.SocketsPath) @@ -864,6 +874,7 @@ var AgentStartCommand = cli.Command{ AcquireJob: cfg.AcquireJob, TracingBackend: cfg.TracingBackend, TracingServiceName: cfg.TracingServiceName, + TraceLogGroups: cfg.TraceLogGroups, JobVerificationNoSignatureBehavior: cfg.JobVerificationNoSignatureBehavior, JobVerificationInvalidSignatureBehavior: cfg.JobVerificationInvalidSignatureBehavior, diff --git a/clicommand/bootstrap.go b/clicommand/bootstrap.go index 4f82758862..dde8a0195e 100644 --- a/clicommand/bootstrap.go +++ b/clicommand/bootstrap.go @@ -94,6 +94,7 @@ type BootstrapConfig struct { RedactedVars []string `cli:"redacted-vars" normalize:"list"` TracingBackend string `cli:"tracing-backend"` TracingServiceName string `cli:"tracing-service-name"` + TraceLogGroups bool `cli:"trace-log-groups"` } var BootstrapCommand = cli.Command{ @@ -355,6 +356,11 @@ var BootstrapCommand = cli.Command{ EnvVar: "BUILDKITE_TRACING_SERVICE_NAME", Value: "buildkite-agent", }, + cli.BoolFlag{ + Name: "trace-log-groups", + Usage: "Automatically creates tracing spans for log groups when tracing is enabled.", + EnvVar: "BUILDKITE_TRACE_LOG_GROUPS", + }, DebugFlag, LogLevelFlag, ExperimentsFlag, @@ -439,6 +445,7 @@ var BootstrapCommand = cli.Command{ Tag: cfg.Tag, TracingBackend: cfg.TracingBackend, TracingServiceName: cfg.TracingServiceName, + TraceLogGroups: cfg.TraceLogGroups, }) cctx, cancel := context.WithCancel(ctx) diff --git a/internal/job/config.go b/internal/job/config.go index ac52f9aaf6..2925479df5 100644 --- a/internal/job/config.go +++ b/internal/job/config.go @@ -164,6 +164,9 @@ type ExecutorConfig struct { // Service name to use when reporting traces. TracingServiceName string + + // Whether to auto-create tracing spans for log groups. + TraceLogGroups bool } // ReadFromEnvironment reads configuration from the Environment, returns a map diff --git a/internal/job/executor.go b/internal/job/executor.go index 2598fa0a6d..74213fd06e 100644 --- a/internal/job/executor.go +++ b/internal/job/executor.go @@ -82,6 +82,7 @@ func (e *Executor) Run(ctx context.Context) (exitCode int) { e.shell.Debug = e.ExecutorConfig.Debug e.shell.InterruptSignal = e.ExecutorConfig.CancelSignal e.shell.SignalGracePeriod = e.ExecutorConfig.SignalGracePeriod + e.shell.TraceLogGroups = e.ExecutorConfig.TraceLogGroups } if experiments.IsEnabled(experiments.KubernetesExec) { kubernetesClient := &kubernetes.Client{} diff --git a/internal/job/shell/shell.go b/internal/job/shell/shell.go index 4654c772e7..ec73e306da 100644 --- a/internal/job/shell/shell.go +++ b/internal/job/shell/shell.go @@ -72,6 +72,9 @@ type Shell struct { // Amount of time to wait between sending the InterruptSignal and SIGKILL SignalGracePeriod time.Duration + + // Whether to auto create tracing spans for log groups + TraceLogGroups bool } // New returns a new Shell @@ -532,6 +535,32 @@ func round(d time.Duration) time.Duration { } } +// spanMakerWriter is an io.Writer shim that captures logs and automatically creates trace spans for the log group. +type spanMakerWriter struct { + w io.Writer + ctx context.Context + span opentracing.Span +} + +func (s *spanMakerWriter) Write(p []byte) (n int, err error) { + if bytes.HasPrefix(p, []byte("~~~ ")) || bytes.HasPrefix(p, []byte("--- ")) || bytes.HasPrefix(p, []byte("+++ ")) { + s.FinishIfActive() + operationName, _, _ := strings.Cut(string(p[4:]), "\r\n") + // We don't store the context bc we don't need to create child spans (yet). If we stored it, every log group would + // look like a child of the previous log group, where they're all more like siblings under the same parent span, + // since Buildkite itself has no concept of log group hierarchy. + s.span, _ = opentracing.StartSpanFromContext(s.ctx, operationName) + } + return s.w.Write(p) +} + +func (s *spanMakerWriter) FinishIfActive() { + if s.span != nil { + s.span.Finish() + s.span = nil + } +} + func (s *Shell) executeCommand( ctx context.Context, cmd *command, @@ -542,6 +571,14 @@ func (s *Shell) executeCommand( tracedEnv := env.FromSlice(cmd.Env) s.injectTraceCtx(ctx, tracedEnv) cmd.Env = tracedEnv.ToSlice() + writer := w + writerCloser := func() {} + if s.TraceLogGroups { + logToSpanWriter := &spanMakerWriter{w: w, ctx: ctx, span: nil} + writer = logToSpanWriter + writerCloser = func() { logToSpanWriter.FinishIfActive() } + } + defer writerCloser() s.cmdLock.Lock() s.cmd = cmd @@ -561,11 +598,11 @@ func (s *Shell) executeCommand( // Modify process config based on execution flags if flags.PTY { cfg.PTY = true - cfg.Stdout = w + cfg.Stdout = writer } else { // Show stdout if requested or via debug if flags.Stdout { - cfg.Stdout = w + cfg.Stdout = writer } else if s.Debug { stdOutStreamer := NewLoggerStreamer(s.Logger) defer stdOutStreamer.Close() @@ -574,7 +611,7 @@ func (s *Shell) executeCommand( // Show stderr if requested or via debug if flags.Stderr { - cfg.Stderr = w + cfg.Stderr = writer } else if s.Debug { stdErrStreamer := NewLoggerStreamer(s.Logger) defer stdErrStreamer.Close()