From e617475d15def78b0940fa2480a90710c21b0ba1 Mon Sep 17 00:00:00 2001 From: Chen Xu Date: Thu, 15 Aug 2024 00:06:34 -0700 Subject: [PATCH] Standardize tracing span tags with proper error annotation --- runtime/client_http_request.go | 8 +- runtime/server_http_request.go | 9 ++- runtime/server_http_response.go | 7 +- runtime/tracing.go | 25 +++++++ runtime/tracing_test.go | 94 ++++++++++++++++++++++-- test/jaeger_test.go | 125 ++++++++++++++++++++++++++++++++ 6 files changed, 253 insertions(+), 15 deletions(-) diff --git a/runtime/client_http_request.go b/runtime/client_http_request.go index 12310d953..9a3312f2b 100644 --- a/runtime/client_http_request.go +++ b/runtime/client_http_request.go @@ -30,6 +30,7 @@ import ( "time" "github.com/opentracing/opentracing-go" + "github.com/opentracing/opentracing-go/ext" "github.com/pkg/errors" "github.com/uber/zanzibar/v2/runtime/jsonwrapper" @@ -187,9 +188,9 @@ func (req *ClientHTTPRequest) WriteBytes( // Do will send the request out. func (req *ClientHTTPRequest) Do() (*ClientHTTPResponse, error) { opName := fmt.Sprintf("%s.%s(%s)", req.ClientID, req.MethodName, req.ClientTargetEndpoint) - urlTag := opentracing.Tag{Key: "URL", Value: req.httpReq.URL} - methodTag := opentracing.Tag{Key: "Method", Value: req.httpReq.Method} - span, ctx := opentracing.StartSpanFromContext(req.ctx, opName, urlTag, methodTag) + urlTag := opentracing.Tag{Key: string(ext.HTTPUrl), Value: req.httpReq.URL} + methodTag := opentracing.Tag{Key: string(ext.HTTPMethod), Value: req.httpReq.Method} + span, ctx := opentracing.StartSpanFromContext(req.ctx, opName, urlTag, methodTag, ext.SpanKindRPCClient, tracingComponentTag) err := req.InjectSpanToHeader(span, opentracing.HTTPHeaders) if err != nil { /* coverage ignore next line */ @@ -207,6 +208,7 @@ func (req *ClientHTTPRequest) Do() (*ClientHTTPResponse, error) { res, retryCount, err = req.executeDoWithRetry(ctx) // new code for retry and timeout per ep level } + updateClientSpanWithError(span, res, err) span.Finish() AppendLogFieldsToContext(req.ctx, zap.Int64(fmt.Sprintf(logFieldClientAttempts, req.ClientID), retryCount)) diff --git a/runtime/server_http_request.go b/runtime/server_http_request.go index 1590c2e9b..da5e92345 100644 --- a/runtime/server_http_request.go +++ b/runtime/server_http_request.go @@ -191,8 +191,8 @@ func (req *ServerHTTPRequest) start() { if req.tracer != nil { opName := fmt.Sprintf("%s.%s", req.EndpointName, req.HandlerName) - urlTag := opentracing.Tag{Key: "URL", Value: req.URL} - MethodTag := opentracing.Tag{Key: "Method", Value: req.Method} + urlTag := opentracing.Tag{Key: string(ext.HTTPUrl), Value: req.URL} + MethodTag := opentracing.Tag{Key: string(ext.HTTPMethod), Value: req.Method} carrier := opentracing.HTTPHeadersCarrier(req.httpRequest.Header) spanContext, err := req.tracer.Extract(opentracing.HTTPHeaders, carrier) var span opentracing.Span @@ -201,9 +201,10 @@ func (req *ServerHTTPRequest) start() { /* coverage ignore next line */ req.contextLogger.WarnZ(req.Context(), "Error Extracting Trace Headers", zap.Error(err)) } - span = req.tracer.StartSpan(opName, urlTag, MethodTag) + span = req.tracer.StartSpan(opName, urlTag, MethodTag, tracingComponentTag, ext.SpanKindRPCServer) + ext.LogError(span, err) } else { - span = req.tracer.StartSpan(opName, urlTag, MethodTag, ext.RPCServerOption(spanContext)) + span = req.tracer.StartSpan(opName, urlTag, MethodTag, tracingComponentTag, ext.RPCServerOption(spanContext)) } req.span = span } diff --git a/runtime/server_http_response.go b/runtime/server_http_response.go index b538700cc..c32747411 100644 --- a/runtime/server_http_response.go +++ b/runtime/server_http_response.go @@ -29,6 +29,7 @@ import ( "time" "github.com/buger/jsonparser" + "github.com/opentracing/opentracing-go/ext" "github.com/pkg/errors" "github.com/uber-go/tally" "github.com/uber/zanzibar/v2/runtime/jsonwrapper" @@ -126,14 +127,18 @@ func (res *ServerHTTPResponse) finish(ctx context.Context) { tagged.Counter(endpointStatus).Inc(1) } + span := res.Request.GetSpan() logFn := res.contextLogger.Debug if !known || res.StatusCode >= 400 && res.StatusCode < 600 { tagged.Counter(endpointAppErrors).Inc(1) logFn = res.contextLogger.WarnZ + if span != nil { + ext.Error.Set(span, true) + } } - span := res.Request.GetSpan() if span != nil { + ext.HTTPStatusCode.Set(span, uint16(res.StatusCode)) span.Finish() } diff --git a/runtime/tracing.go b/runtime/tracing.go index 63dd821fa..41046189e 100644 --- a/runtime/tracing.go +++ b/runtime/tracing.go @@ -21,12 +21,19 @@ package zanzibar import ( + "net/http" + "github.com/opentracing/opentracing-go" + "github.com/opentracing/opentracing-go/ext" "github.com/uber/jaeger-client-go" "go.opentelemetry.io/otel/trace" "go.uber.org/zap" ) +var ( + tracingComponentTag = opentracing.Tag{Key: string(ext.Component), Value: "zanzibar"} +) + // The span context struct in otel is not exportable, an this is the way to access fields in span context // https://github.com/open-telemetry/opentelemetry-go/blob/v1.16.0/bridge/opentracing/README.md#extended-functionality type otelSpanContextProvider interface { @@ -55,3 +62,21 @@ func extractSpanLogFields(span opentracing.Span) []zap.Field { } return fields } + +// updateClientSpanWithError updates a client span with the error tags and logs +func updateClientSpanWithError(span opentracing.Span, res *http.Response, err error) { + if span == nil { + return + } + + if res != nil { + ext.HTTPStatusCode.Set(span, uint16(res.StatusCode)) + if res.StatusCode >= 400 { + ext.Error.Set(span, true) + } + } + if err != nil { + ext.Error.Set(span, true) + ext.LogError(span, err) + } +} diff --git a/runtime/tracing_test.go b/runtime/tracing_test.go index 0543adf2f..fadf167d3 100644 --- a/runtime/tracing_test.go +++ b/runtime/tracing_test.go @@ -21,8 +21,12 @@ package zanzibar import ( + "io" + "net/http" "testing" + "github.com/opentracing/opentracing-go" + "github.com/pkg/errors" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "github.com/uber/jaeger-client-go" @@ -36,13 +40,7 @@ func TestExtractSpanLogFields(t *testing.T) { }) t.Run("jaeger span", func(t *testing.T) { - tracer, closer, err := config.Configuration{ - ServiceName: "test", - Sampler: &config.SamplerConfig{ - Type: "const", - Param: 1, - }}.NewTracer(config.Reporter(jaeger.NewInMemoryReporter())) - require.NoError(t, err) + tracer, closer := createTestTracer(t) defer closer.Close() span := tracer.StartSpan("op") @@ -58,3 +56,85 @@ func TestExtractSpanLogFields(t *testing.T) { }, fields) }) } + +func TestUpdateClientSpanWithError(t *testing.T) { + tests := []struct { + name string + res *http.Response + err error + errTagExpected bool + statusCodeTagExpected bool + }{ + { + name: "nil response; no err", + }, + { + name: "nil response; err", + err: errors.New("some error"), + errTagExpected: true, + }, + { + name: "200 response; no err", + res: &http.Response{StatusCode: http.StatusOK}, + statusCodeTagExpected: true, + }, + { + name: "200 response; err", + res: &http.Response{StatusCode: http.StatusOK}, + err: errors.New("some error"), + errTagExpected: true, + statusCodeTagExpected: true, + }, + { + name: "400 response; no err", + res: &http.Response{StatusCode: http.StatusBadRequest}, + errTagExpected: true, + statusCodeTagExpected: true, + }, + { + name: "400 response; err", + res: &http.Response{StatusCode: http.StatusBadRequest}, + err: errors.New("some error"), + errTagExpected: true, + statusCodeTagExpected: true, + }, + } + + tracer, closer := createTestTracer(t) + defer closer.Close() + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + span := tracer.StartSpan("test") + updateClientSpanWithError(span, test.res, test.err) + jSpan, ok := span.(*jaeger.Span) + require.True(t, ok) + + _, exist := jSpan.Tags()["error"] + assert.Equal(t, test.errTagExpected, exist) + statusCode, exist := jSpan.Tags()["http.status_code"] + assert.Equal(t, test.statusCodeTagExpected, exist) + if test.statusCodeTagExpected { + assert.Equal(t, uint16(test.res.StatusCode), statusCode) + } + if test.err != nil { + assert.Len(t, jSpan.Logs(), 1) + } + }) + } +} + +func TestUpdateClientSpanWithErrorNilSpan(t *testing.T) { + +} + +func createTestTracer(t *testing.T) (opentracing.Tracer, io.Closer) { + tracer, closer, err := config.Configuration{ + ServiceName: "test", + Sampler: &config.SamplerConfig{ + Type: "const", + Param: 1, + }}.NewTracer(config.Reporter(jaeger.NewInMemoryReporter())) + require.NoError(t, err) + return tracer, closer +} diff --git a/test/jaeger_test.go b/test/jaeger_test.go index 313057c6a..24b607b97 100644 --- a/test/jaeger_test.go +++ b/test/jaeger_test.go @@ -23,11 +23,13 @@ package gateway import ( "bytes" "context" + "fmt" "net/http" "testing" "time" "github.com/opentracing/opentracing-go" + "github.com/opentracing/opentracing-go/ext" "github.com/stretchr/testify/assert" "github.com/uber/jaeger-client-go" "github.com/uber/jaeger-client-go/config" @@ -94,6 +96,17 @@ func TestHTTPEndpointToHTTPClient(t *testing.T) { assert.Equal(t, "bar.Hello(Bar::helloWorld)", clientSpan.GetOperationName()) assert.Equal(t, "bar.helloWorld", endpointSpan.GetOperationName()) assert.Equal(t, endpointSpan.GetSpanId(), clientSpan.GetParentSpanId()) + + assertStrTag(t, clientSpan, string(ext.SpanKind), "client") + assertStrTag(t, clientSpan, string(ext.Component), "zanzibar") + assertStrTag(t, clientSpan, string(ext.HTTPMethod), "GET") + assertIntTag(t, clientSpan, string(ext.HTTPStatusCode), http.StatusOK) + + assertStrTag(t, endpointSpan, string(ext.SpanKind), "server") + assertStrTag(t, endpointSpan, string(ext.Component), "zanzibar") + assertStrTag(t, endpointSpan, string(ext.HTTPMethod), "GET") + assertStrTag(t, endpointSpan, string(ext.HTTPUrl), "/bar/hello") + assertIntTag(t, endpointSpan, string(ext.HTTPStatusCode), http.StatusOK) } func TestHTTPEndpointToHTTPClientWithUpstreamSpan(t *testing.T) { @@ -177,6 +190,84 @@ func TestHTTPEndpointToHTTPClientWithUpstreamSpan(t *testing.T) { assert.Equal(t, "bar.helloWorld", endpointSpan.GetOperationName()) assert.Equal(t, int64(upstreamSpanID), endpointSpan.GetParentSpanId()) assert.Equal(t, endpointSpan.GetSpanId(), clientSpan.GetParentSpanId()) + + assertStrTag(t, clientSpan, string(ext.SpanKind), "client") + assertStrTag(t, clientSpan, string(ext.Component), "zanzibar") + assertStrTag(t, clientSpan, string(ext.HTTPMethod), "GET") + assertIntTag(t, clientSpan, string(ext.HTTPStatusCode), http.StatusOK) + + assertStrTag(t, endpointSpan, string(ext.SpanKind), "server") + assertStrTag(t, endpointSpan, string(ext.Component), "zanzibar") + assertStrTag(t, endpointSpan, string(ext.HTTPMethod), "GET") + assertStrTag(t, endpointSpan, string(ext.HTTPUrl), "/bar/hello") + assertIntTag(t, endpointSpan, string(ext.HTTPStatusCode), http.StatusOK) +} + +func TestHTTPEndpointToHTTPClientNon200(t *testing.T) { + gateway, err := testGateway.CreateGateway(t, map[string]interface{}{ + "clients.bar.serviceName": "barService", + }, &testGateway.Options{ + CountMetrics: true, + KnownHTTPBackends: []string{"bar"}, + TestBinary: util.DefaultMainFile("example-gateway"), + ConfigFiles: util.DefaultConfigFiles("example-gateway"), + JaegerFlushMillis: 1, + }) + if !assert.NoError(t, err, "got bootstrap err") { + return + } + defer gateway.Close() + + cg := gateway.(*testGateway.ChildProcessGateway) + gateway.HTTPBackends()["bar"].HandleFunc( + "GET", "/bar/hello", + func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + if _, err := w.Write([]byte(`"hello"`)); err != nil { + t.Fatal("can't write fake response") + } + }, + ) + + _, err = gateway.MakeRequest( + "GET", + "/bar/hello", + nil, nil, + ) + if !assert.NoError(t, err, "got http error") { + return + } + + // Wait until all spans are flushed + time.Sleep(time.Millisecond * 100) + + batches := cg.JaegerAgent.GetJaegerBatches() + + var spans []*jaegerGen.Span + for _, batch := range batches { + for _, span := range batch.Spans { + spans = append(spans, span) + } + } + + assert.Equal(t, 2, len(spans)) + endpointSpan, clientSpan := spans[1], spans[0] + assert.Equal(t, "bar.Hello(Bar::helloWorld)", clientSpan.GetOperationName()) + assert.Equal(t, "bar.helloWorld", endpointSpan.GetOperationName()) + assert.Equal(t, endpointSpan.GetSpanId(), clientSpan.GetParentSpanId()) + + assertStrTag(t, clientSpan, string(ext.SpanKind), "client") + assertStrTag(t, clientSpan, string(ext.Component), "zanzibar") + assertStrTag(t, clientSpan, string(ext.HTTPMethod), "GET") + assertIntTag(t, clientSpan, string(ext.HTTPStatusCode), 500) + assertBoolTag(t, clientSpan, string(ext.Error), true) + + assertStrTag(t, endpointSpan, string(ext.SpanKind), "server") + assertStrTag(t, endpointSpan, string(ext.Component), "zanzibar") + assertStrTag(t, endpointSpan, string(ext.HTTPMethod), "GET") + assertStrTag(t, endpointSpan, string(ext.HTTPUrl), "/bar/hello") + assertIntTag(t, endpointSpan, string(ext.HTTPStatusCode), 500) + assertBoolTag(t, endpointSpan, string(ext.Error), true) } func TestHTTPEndpointToTChannelClient(t *testing.T) { @@ -344,6 +435,13 @@ func TestHTTPEndpointToTChannelClientWithUpstreamSpan(t *testing.T) { assert.Equal(t, "baz.call", endpointSpan.GetOperationName()) assert.Equal(t, int64(upstreamSpanID), endpointSpan.GetParentSpanId()) assert.Equal(t, endpointSpan.GetSpanId(), clientSpan.GetParentSpanId()) + + assertStrTag(t, clientSpan, string(ext.SpanKind), "client") + assertStrTag(t, endpointSpan, string(ext.SpanKind), "server") + assertStrTag(t, endpointSpan, string(ext.Component), "zanzibar") + assertStrTag(t, endpointSpan, string(ext.HTTPMethod), "POST") + assertStrTag(t, endpointSpan, string(ext.HTTPUrl), "/baz/call") + assertIntTag(t, endpointSpan, string(ext.HTTPStatusCode), http.StatusNoContent) } func TestTChannelEndpoint(t *testing.T) { @@ -427,3 +525,30 @@ func TestTChannelEndpoint(t *testing.T) { assert.Equal(t, "SimpleService::Call", endpointSpan.GetOperationName()) assert.Equal(t, endpointSpan.GetSpanId(), clientSpan.GetParentSpanId()) } + +func assertStrTag(t *testing.T, span *jaegerGen.Span, key, val string) { + for _, tag := range span.GetTags() { + if tag.GetKey() == key && tag.VType == jaegerGen.TagType_STRING && tag.GetVStr() == val { + return + } + } + assert.Fail(t, fmt.Sprintf("tag {key: %s, value: %s} not found", key, val)) +} + +func assertIntTag(t *testing.T, span *jaegerGen.Span, key string, val int64) { + for _, tag := range span.GetTags() { + if tag.GetKey() == key && tag.VType == jaegerGen.TagType_LONG && tag.GetVLong() == val { + return + } + } + assert.Fail(t, fmt.Sprintf("tag {key: %s, value: %d} not found", key, val)) +} + +func assertBoolTag(t *testing.T, span *jaegerGen.Span, key string, val bool) { + for _, tag := range span.GetTags() { + if tag.GetKey() == key && tag.VType == jaegerGen.TagType_BOOL && tag.GetVBool() == val { + return + } + } + assert.Fail(t, fmt.Sprintf("tag {key: %s, value: %t} not found", key, val)) +}