diff --git a/README.md b/README.md index 4c0ae842747..a624acdf49c 100644 --- a/README.md +++ b/README.md @@ -105,11 +105,11 @@ Flags: unsybolized location --metastore="badgerinmemory" Which metastore implementation to use - --debug-infod-upstream-servers=https://debuginfod.systemtap.org,... - Upstream private/public servers for debuginfod - files. Defaults to - https://debuginfod.systemtap.org. It is an - ordered list of servers to try. + --debug-infod-upstream-servers=https://debuginfod.elfutils.org,... + Upstream debuginfod servers. Defaults to + https://debuginfod.elfutils.org. It is an + ordered list of servers to try. Learn more at + https://sourceware.org/elfutils/Debuginfod.html --debug-infod-http-request-timeout=5m Timeout duration for HTTP request to upstream debuginfod server. Defaults to 5m diff --git a/deploy/dev.jsonnet b/deploy/dev.jsonnet index 64a8a724218..b400a78b159 100644 --- a/deploy/dev.jsonnet +++ b/deploy/dev.jsonnet @@ -16,7 +16,7 @@ function(agentVersion='v0.4.1') logLevel: 'debug', configPath: '/parca.yaml', corsAllowedOrigins: '*', - debugInfodUpstreamServers: ['https://debuginfod.systemtap.org'], + debugInfodUpstreamServers: ["https://debuginfod.elfutils.org"], // debugInfodHTTPRequestTimeout: '5m', }); diff --git a/deploy/tilt/parca-server-deployment.yaml b/deploy/tilt/parca-server-deployment.yaml index 13a37fbe5e7..de186b509ec 100644 --- a/deploy/tilt/parca-server-deployment.yaml +++ b/deploy/tilt/parca-server-deployment.yaml @@ -29,7 +29,7 @@ spec: - --config-path=/parca.yaml - --log-level=debug - --cors-allowed-origins=* - - --debug-infod-upstream-servers=https://debuginfod.systemtap.org + - --debug-infod-upstream-servers=https://debuginfod.elfutils.org - --debug-infod-http-request-timeout=5m image: parca.io/parca/parca:dev livenessProbe: diff --git a/pkg/debuginfo/debuginfod.go b/pkg/debuginfo/debuginfod.go index 7e0023bd31b..bba351fd10c 100644 --- a/pkg/debuginfo/debuginfod.go +++ b/pkg/debuginfo/debuginfod.go @@ -53,6 +53,7 @@ type DebugInfodClientObjectStorageCache struct { bucket objstore.Bucket } +// NewHTTPDebugInfodClient returns a new HTTP debug info client. func NewHTTPDebugInfodClient(logger log.Logger, serverURLs []string, timeoutDuration time.Duration) (*HTTPDebugInfodClient, error) { logger = log.With(logger, "component", "debuginfod") parsedURLs := make([]*url.URL, 0, len(serverURLs)) @@ -73,6 +74,7 @@ func NewHTTPDebugInfodClient(logger log.Logger, serverURLs []string, timeoutDura }, nil } +// NewDebugInfodClientWithObjectStorageCache creates a new DebugInfodClient that caches the debug information in the object storage. func NewDebugInfodClientWithObjectStorageCache(logger log.Logger, config *Config, h DebugInfodClient) (DebugInfodClient, error) { logger = log.With(logger, "component", "debuginfod") cfg, err := yaml.Marshal(config.Bucket) @@ -101,11 +103,12 @@ type readCloser struct { closer } +// GetDebugInfo returns debug info for given buildid while caching it in object storage. func (c *DebugInfodClientObjectStorageCache) GetDebugInfo(ctx context.Context, buildID string) (io.ReadCloser, error) { logger := log.With(c.logger, "buildid", buildID) debugInfo, err := c.client.GetDebugInfo(ctx, buildID) if err != nil { - return nil, errDebugInfoNotFound + return nil, err } r, w := io.Pipe() @@ -131,41 +134,66 @@ func (c *DebugInfodClientObjectStorageCache) GetDebugInfo(ctx context.Context, b }, nil } +// GetDebugInfo returns debug information file for given buildID by downloading it from upstream servers. func (c *HTTPDebugInfodClient) GetDebugInfo(ctx context.Context, buildID string) (io.ReadCloser, error) { logger := log.With(c.logger, "buildid", buildID) + + // e.g: + //"https://debuginfod.elfutils.org/" + //"https://debuginfod.systemtap.org/" + //"https://debuginfod.opensuse.org/" + //"https://debuginfod.s.voidlinux.org/" + //"https://debuginfod.debian.net/" + //"https://debuginfod.fedoraproject.org/" + //"https://debuginfod.altlinux.org/" + //"https://debuginfod.archlinux.org/" + //"https://debuginfod.centos.org/" for _, u := range c.UpstreamServers { + ctx, cancel := context.WithTimeout(ctx, c.timeoutDuration) + defer cancel() + serverURL := *u rc, err := c.request(ctx, serverURL, buildID) - if err == nil { - return rc, nil + if err != nil { + level.Warn(logger).Log( + "msg", "failed to download debug info file from upstream debuginfod server, trying next one (if exists)", + "server", serverURL, "err", err, + ) + continue } - level.Warn(logger).Log( - "msg", "failed to get debuginfo from upstream server, trying next one (if exists)", - "server", serverURL, "err", err, - ) + return rc, nil } return nil, errDebugInfoNotFound } -func (c *HTTPDebugInfodClient) request(ctx context.Context, serverURL url.URL, buildID string) (io.ReadCloser, error) { - logger := log.With(c.logger, "buildid", buildID) - - serverURL.Path = path.Join(serverURL.Path, buildID, "debuginfo") - - ctx, cancel := context.WithTimeout(ctx, c.timeoutDuration) - defer cancel() +func (c *HTTPDebugInfodClient) request(ctx context.Context, u url.URL, buildID string) (io.ReadCloser, error) { + // https://www.mankier.com/8/debuginfod#Webapi + // Endpoint: /buildid/BUILDID/debuginfo + // If the given buildid is known to the server, + // this request will result in a binary object that contains the customary .*debug_* sections. + u.Path = path.Join(u.Path, "buildid", buildID, "debuginfo") - req, err := http.NewRequestWithContext(ctx, "GET", serverURL.String(), nil) + req, err := http.NewRequestWithContext(ctx, "GET", u.String(), nil) if err != nil { - level.Debug(logger).Log("msg", "failed to create new HTTP request", "err", err) - return nil, err + return nil, fmt.Errorf("create request: %w", err) } resp, err := http.DefaultClient.Do(req) if err != nil { - level.Debug(logger).Log("msg", "object not found in public server", "object", buildID, "err", err) - return nil, errDebugInfoNotFound + return nil, fmt.Errorf("request failed: %w", err) } - return resp.Body, nil + switch resp.StatusCode / 100 { + case 2: + return resp.Body, nil + case 4: + if resp.StatusCode == http.StatusNotFound { + return nil, errDebugInfoNotFound + } + return nil, fmt.Errorf("client error: %s", resp.Status) + case 5: + return nil, fmt.Errorf("server error: %s", resp.Status) + default: + return nil, fmt.Errorf("unexpected status code: %s", resp.Status) + } } diff --git a/pkg/debuginfo/debuginfod_test.go b/pkg/debuginfo/debuginfod_test.go new file mode 100644 index 00000000000..7c9597b6553 --- /dev/null +++ b/pkg/debuginfo/debuginfod_test.go @@ -0,0 +1,100 @@ +// Copyright (c) 2022 The Parca Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package debuginfo + +import ( + "io" + "io/ioutil" + "net/url" + "os" + "os/exec" + "strings" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/stretchr/testify/require" + "golang.org/x/net/context" +) + +func TestHTTPDebugInfodClient_request(t *testing.T) { + type fields struct { + UpstreamServers []*url.URL + timeoutDuration time.Duration + } + type args struct { + u url.URL + buildID string + } + tests := []struct { + name string + fields fields + args args + want string + wantErr bool + }{ + { + name: "success", + fields: fields{ + timeoutDuration: time.Minute, + }, + args: args{ + u: url.URL{ + Scheme: "http", + Host: "debuginfod.elfutils.org", + }, + buildID: "d278249792061c6b74d1693ca59513be1def13f2", + }, + want: `ELF 64-bit LSB shared object, x86-64, version 1 (GNU/Linux), dynamically linked, interpreter \011, BuildID[sha1]=d278249792061c6b74d1693ca59513be1def13f2, for GNU/Linux 3.2.0, with debug_info, not stripped`, + wantErr: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + c := &HTTPDebugInfodClient{ + logger: log.NewNopLogger(), + timeoutDuration: tt.fields.timeoutDuration, + } + r, err := c.request(context.TODO(), tt.args.u, tt.args.buildID) + if (err != nil) != tt.wantErr { + t.Errorf("request() error = %v, wantErr %v", err, tt.wantErr) + return + } + t.Cleanup(func() { + r.Close() + }) + + tmpfile, err := ioutil.TempFile("", "debuginfod-download-*") + require.NoError(t, err) + + t.Cleanup(func() { + os.Remove(tmpfile.Name()) + }) + + _, err = io.Copy(tmpfile, r) + require.NoError(t, err) + + require.NoError(t, tmpfile.Close()) + + cmd := exec.Command("file", tmpfile.Name()) + + stdout, err := cmd.Output() + require.NoError(t, err) + + got := strings.TrimSpace(strings.Split(string(stdout), ":")[1]) + require.Equal(t, tt.want, got) + }) + } +} diff --git a/pkg/debuginfo/store.go b/pkg/debuginfo/store.go index 00aed3c8c0d..7d5e92cdfa0 100644 --- a/pkg/debuginfo/store.go +++ b/pkg/debuginfo/store.go @@ -271,28 +271,31 @@ func (s *Store) Symbolize(ctx context.Context, m *pb.Mapping, locations ...*meta objFile, err := s.fetchObjectFile(ctx, buildID) if err != nil { // It's ok if we don't have the symbols for given BuildID, it happens too often. - level.Debug(logger).Log("msg", "failed to fetch object", "err", err) - if errors.Is(err, errDebugInfoNotFound) { - level.Debug(logger).Log("msg", "attempting to download from debuginfod servers") - // Try downloading the debuginfo file from the debuginfod server. - r, err := s.debuginfodClient.GetDebugInfo(ctx, buildID) - if err != nil { - level.Debug(logger).Log("msg", "failed to download debuginfo from debuginfod", "err", err) - return nil, nil - } - defer r.Close() - level.Info(logger).Log("msg", "debug info downloaded from debuginfod server") + level.Warn(logger).Log("msg", "failed to fetch object", "err", err) - // Cache the file locally. - if err := cache(objFile, r); err != nil { - level.Debug(logger).Log("msg", "failed to cache debuginfo", "err", err) - return nil, nil - } + // Let's try to find a debug file from debuginfod servers. + objFile, err = s.fetchDebuginfodFile(ctx, buildID) + if err != nil { + return nil, fmt.Errorf("failed to fetch: %w", err) } + } - return nil, fmt.Errorf("failed to fetch: %w", err) + // Let's make sure we have the best version of the debug file. + hasDWARF, err := elfutils.HasDWARF(objFile) + if err != nil { + return nil, fmt.Errorf("failed to check for DWARF: %w", err) + } + if !hasDWARF { + dbgFile, err := s.fetchDebuginfodFile(ctx, buildID) + if err != nil { + level.Warn(logger).Log("msg", "failed to fetch debuginfod file", "err", err) + } else { + objFile = dbgFile + } } + // At this point we have the best version of the debug information file that we could find. + // Let's symbolize it. locationLines, err := s.symbolizer.Symbolize(ctx, m, locations, objFile) if err != nil { if errors.Is(err, symbol.ErrLinerCreationFailedBefore) { @@ -308,7 +311,7 @@ func (s *Store) Symbolize(ctx context.Context, m *pb.Mapping, locations ...*meta func (s *Store) fetchObjectFile(ctx context.Context, buildID string) (string, error) { logger := log.With(s.logger, "buildid", buildID) - objFile := path.Join(s.cacheDir, buildID, "debuginfo") + objFile := s.localCachePath(buildID) // Check if it's already cached locally; if not download. if _, err := os.Stat(objFile); os.IsNotExist(err) { // Download the debuginfo file from the bucket. @@ -330,6 +333,33 @@ func (s *Store) fetchObjectFile(ctx context.Context, buildID string) (string, er return objFile, nil } +func (s *Store) fetchDebuginfodFile(ctx context.Context, buildID string) (string, error) { + logger := log.With(s.logger, "buildid", buildID) + level.Debug(logger).Log("msg", "attempting to download from debuginfod servers") + + objFile := s.localCachePath(buildID) + // Try downloading the debuginfo file from the debuginfod server. + r, err := s.debuginfodClient.GetDebugInfo(ctx, buildID) + if err != nil { + level.Debug(logger).Log("msg", "failed to download debuginfo from debuginfod", "err", err) + return "", fmt.Errorf("failed to fetch from debuginfod: %w", err) + } + defer r.Close() + level.Info(logger).Log("msg", "debug info downloaded from debuginfod server") + + // Cache the file locally. + if err := cache(objFile, r); err != nil { + level.Debug(logger).Log("msg", "failed to cache debuginfo", "err", err) + return "", fmt.Errorf("failed to fetch from debuginfod: %w", err) + } + + return objFile, nil +} + +func (s *Store) localCachePath(buildID string) string { + return path.Join(s.cacheDir, buildID, "debuginfo") +} + func cache(localPath string, r io.ReadCloser) error { tmpfile, err := ioutil.TempFile("", "symbol-download-*") if err != nil { diff --git a/pkg/parca/parca.go b/pkg/parca/parca.go index d04018c70a6..1171d70fd80 100644 --- a/pkg/parca/parca.go +++ b/pkg/parca/parca.go @@ -83,7 +83,7 @@ type Flags struct { Metastore string `default:"badgerinmemory" help:"Which metastore implementation to use" enum:"badgerinmemory"` - DebugInfodUpstreamServers []string `default:"https://debuginfod.systemtap.org" help:"Upstream private/public servers for debuginfod files. Defaults to https://debuginfod.systemtap.org. It is an ordered list of servers to try."` + DebugInfodUpstreamServers []string `default:"https://debuginfod.elfutils.org" help:"Upstream debuginfod servers. Defaults to https://debuginfod.elfutils.org. It is an ordered list of servers to try. Learn more at https://sourceware.org/elfutils/Debuginfod.html"` DebugInfodHTTPRequestTimeout time.Duration `default:"5m" help:"Timeout duration for HTTP request to upstream debuginfod server. Defaults to 5m"` StoreAddress string `kong:"help='gRPC address to send profiles and symbols to.'"`