From 8cc0e11823cf14e94c35ab56b85f8170950f5980 Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Sun, 19 Jan 2025 16:45:34 +0100 Subject: [PATCH 1/8] libnetwork: un-export Controller.DiagnosticServer It's only accessed internally, so doesn't have to be exported. Signed-off-by: Sebastiaan van Stijn --- libnetwork/agent.go | 2 +- libnetwork/controller.go | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/libnetwork/agent.go b/libnetwork/agent.go index 440f82cd39969..b68b15ac2d70e 100644 --- a/libnetwork/agent.go +++ b/libnetwork/agent.go @@ -332,7 +332,7 @@ func (c *Controller) agentInit(listenAddr, bindAddrOrInterface, advertiseAddr, d } // Register the diagnostic handlers - nDB.RegisterDiagnosticHandlers(c.DiagnosticServer) + nDB.RegisterDiagnosticHandlers(c.diagnosticServer) var cancelList []func() ch, cancel := nDB.Watch(libnetworkEPTable, "") diff --git a/libnetwork/controller.go b/libnetwork/controller.go index 89134aee85959..4031dbaf90f66 100644 --- a/libnetwork/controller.go +++ b/libnetwork/controller.go @@ -97,7 +97,7 @@ type Controller struct { agentInitDone chan struct{} agentStopDone chan struct{} keys []*types.EncryptionKey - DiagnosticServer *diagnostic.Server + diagnosticServer *diagnostic.Server mu sync.Mutex // FIXME(thaJeztah): defOsSbox is always nil on non-Linux: move these fields to Linux-only files. @@ -122,7 +122,7 @@ func New(cfgOptions ...config.Option) (*Controller, error) { serviceBindings: make(map[serviceKey]*service), agentInitDone: make(chan struct{}), networkLocker: locker.New(), - DiagnosticServer: diagnostic.New(), + diagnosticServer: diagnostic.New(), } c.drvRegistry.Notify = c @@ -1094,8 +1094,8 @@ func (c *Controller) Stop() { // StartDiagnostic starts the network diagnostic server listening on port. func (c *Controller) StartDiagnostic(port int) { c.mu.Lock() - if !c.DiagnosticServer.IsDiagnosticEnabled() { - c.DiagnosticServer.EnableDiagnostic("127.0.0.1", port) + if !c.diagnosticServer.IsDiagnosticEnabled() { + c.diagnosticServer.EnableDiagnostic("127.0.0.1", port) } c.mu.Unlock() } @@ -1103,8 +1103,8 @@ func (c *Controller) StartDiagnostic(port int) { // StopDiagnostic stops the network diagnostic server. func (c *Controller) StopDiagnostic() { c.mu.Lock() - if c.DiagnosticServer.IsDiagnosticEnabled() { - c.DiagnosticServer.DisableDiagnostic() + if c.diagnosticServer.IsDiagnosticEnabled() { + c.diagnosticServer.DisableDiagnostic() } c.mu.Unlock() } @@ -1113,5 +1113,5 @@ func (c *Controller) StopDiagnostic() { func (c *Controller) IsDiagnosticEnabled() bool { c.mu.Lock() defer c.mu.Unlock() - return c.DiagnosticServer.IsDiagnosticEnabled() + return c.diagnosticServer.IsDiagnosticEnabled() } From 1e6449dfc7d6ab4c62ce69526dea770f64a1ccd4 Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Sun, 19 Jan 2025 17:21:35 +0100 Subject: [PATCH 2/8] libnetwork/diagnostic: print newline after stackdump log path The response would not have a trailing newline, which made it difficult to copy the path. While updating, also include the path of the stackdump in the daemon log that's produced. Before this: root@fa87ff1bcd00:/go/src/github.com/docker/docker# curl -s http://127.0.0.1:123/stackdump OK goroutine stacks written to /tmp/goroutine-stacks-2025-01-19T160337Z.logroot@fa87ff1bcd00:/go/src/github.com/docker/docker# After this: root@fa87ff1bcd00:/go/src/github.com/docker/docker# curl -s http://127.0.0.1:123/stackdump OK goroutine stacks written to /tmp/goroutine-stacks-2025-01-19T160922Z.log root@fa87ff1bcd00:/go/src/github.com/docker/docker# Signed-off-by: Sebastiaan van Stijn --- libnetwork/diagnostic/server.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libnetwork/diagnostic/server.go b/libnetwork/diagnostic/server.go index 3614243aeee10..cdd097c3c0428 100644 --- a/libnetwork/diagnostic/server.go +++ b/libnetwork/diagnostic/server.go @@ -176,15 +176,16 @@ func stackTrace(w http.ResponseWriter, r *http.Request) { // audit logs logger := log.G(context.TODO()).WithFields(log.Fields{"component": "diagnostic", "remoteIP": r.RemoteAddr, "method": caller.Name(0), "url": r.URL.String()}) - logger.Info("stack trace") + logger.Info("collecting stack trace") + // FIXME(thaJeztah): make path configurable, or use same location as used by daemon.setupDumpStackTrap path, err := stack.DumpToFile("/tmp/") if err != nil { - logger.WithError(err).Error("failed to write goroutines dump") + logger.WithError(err).Error("failed to write stack trace to file") _, _ = HTTPReply(w, FailCommand(err), jsonOutput) } else { - logger.Info("stack trace done") - _, _ = HTTPReply(w, CommandSucceed(&StringCmd{Info: "goroutine stacks written to " + path}), jsonOutput) + logger.WithField("file", path).Info("wrote stack trace to file") + _, _ = HTTPReply(w, CommandSucceed(&StringCmd{Info: "goroutine stacks written to " + path + "\n"}), jsonOutput) } } From e899092b254d2516efe533228ebff4dd6dca8a9a Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Sun, 19 Jan 2025 17:24:06 +0100 Subject: [PATCH 3/8] libnetwork/diagnostic: make DisableDiagnostic idempotent Handle situations where the server is already stopped internally, instead of requiring the caller to do this. Signed-off-by: Sebastiaan van Stijn --- libnetwork/diagnostic/server.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libnetwork/diagnostic/server.go b/libnetwork/diagnostic/server.go index cdd097c3c0428..f95ae6f3d4812 100644 --- a/libnetwork/diagnostic/server.go +++ b/libnetwork/diagnostic/server.go @@ -105,6 +105,9 @@ func (s *Server) EnableDiagnostic(ip string, port int) { func (s *Server) DisableDiagnostic() { s.mu.Lock() defer s.mu.Unlock() + if !s.enable { + return + } s.srv.Shutdown(context.Background()) //nolint:errcheck s.srv = nil From e4abcad7ac5406fd9a90932d9e19e65e99e55aca Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Sun, 19 Jan 2025 17:32:21 +0100 Subject: [PATCH 4/8] libnetwork/diagnostic: make EnableDiagnostic, DisableDiagnostic idempotent Handle situations where the server is already started or stopped internally, instead of requiring the caller to do this. Signed-off-by: Sebastiaan van Stijn --- libnetwork/controller.go | 8 ++------ libnetwork/diagnostic/server.go | 8 +++++--- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/libnetwork/controller.go b/libnetwork/controller.go index 4031dbaf90f66..36dfdae067721 100644 --- a/libnetwork/controller.go +++ b/libnetwork/controller.go @@ -1094,18 +1094,14 @@ func (c *Controller) Stop() { // StartDiagnostic starts the network diagnostic server listening on port. func (c *Controller) StartDiagnostic(port int) { c.mu.Lock() - if !c.diagnosticServer.IsDiagnosticEnabled() { - c.diagnosticServer.EnableDiagnostic("127.0.0.1", port) - } + c.diagnosticServer.EnableDiagnostic("127.0.0.1", port) c.mu.Unlock() } // StopDiagnostic stops the network diagnostic server. func (c *Controller) StopDiagnostic() { c.mu.Lock() - if c.diagnosticServer.IsDiagnosticEnabled() { - c.diagnosticServer.DisableDiagnostic() - } + c.diagnosticServer.DisableDiagnostic() c.mu.Unlock() } diff --git a/libnetwork/diagnostic/server.go b/libnetwork/diagnostic/server.go index f95ae6f3d4812..57b72cda7c0e2 100644 --- a/libnetwork/diagnostic/server.go +++ b/libnetwork/diagnostic/server.go @@ -77,6 +77,7 @@ func (s *Server) EnableDiagnostic(ip string, port int) { s.port = port + // FIXME(thaJeztah): this check won't allow re-configuring the port on reload. if s.enable { log.G(context.TODO()).Info("The server is already up and running") return @@ -108,9 +109,10 @@ func (s *Server) DisableDiagnostic() { if !s.enable { return } - - s.srv.Shutdown(context.Background()) //nolint:errcheck - s.srv = nil + if s.srv != nil { + s.srv.Shutdown(context.Background()) //nolint:errcheck + s.srv = nil + } s.enable = false log.G(context.TODO()).Info("Disabling the diagnostic server") } From 8f1a49fa8ccced74b2428b3ddfe3903f0a7ac8e4 Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Sun, 19 Jan 2025 17:36:27 +0100 Subject: [PATCH 5/8] libnetwork: Controller: remove redundant mutex for diagnosticServer diagnosticServer is only written to during controller.New, and the diagnostic server itself already has a mutex on EnableDiagnostic, DisableDiagnostic, and IsDiagnosticEnabled, which should prevent issues trying to concurrently change its state. Signed-off-by: Sebastiaan van Stijn --- libnetwork/controller.go | 6 ------ 1 file changed, 6 deletions(-) diff --git a/libnetwork/controller.go b/libnetwork/controller.go index 36dfdae067721..9c627344b87b4 100644 --- a/libnetwork/controller.go +++ b/libnetwork/controller.go @@ -1093,21 +1093,15 @@ func (c *Controller) Stop() { // StartDiagnostic starts the network diagnostic server listening on port. func (c *Controller) StartDiagnostic(port int) { - c.mu.Lock() c.diagnosticServer.EnableDiagnostic("127.0.0.1", port) - c.mu.Unlock() } // StopDiagnostic stops the network diagnostic server. func (c *Controller) StopDiagnostic() { - c.mu.Lock() c.diagnosticServer.DisableDiagnostic() - c.mu.Unlock() } // IsDiagnosticEnabled returns true if the diagnostic server is running. func (c *Controller) IsDiagnosticEnabled() bool { - c.mu.Lock() - defer c.mu.Unlock() return c.diagnosticServer.IsDiagnosticEnabled() } From 16cc0be0e1cb7ed4e9d182ee1964c0c746b53ced Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Sun, 19 Jan 2025 18:18:32 +0100 Subject: [PATCH 6/8] libnetwork/diagnostic: move and improve logs for starting/stoping Signed-off-by: Sebastiaan van Stijn --- daemon/reload.go | 1 - libnetwork/diagnostic/server.go | 14 +++++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/daemon/reload.go b/daemon/reload.go index 689e74074911c..0ef40294b4918 100644 --- a/daemon/reload.go +++ b/daemon/reload.go @@ -275,7 +275,6 @@ func (daemon *Daemon) reloadNetworkDiagnosticPort(txn *reloadTxn, newCfg *config return nil } // Enable the network diagnostic if the flag is set with a valid port within the range - log.G(context.TODO()).WithFields(log.Fields{"port": conf.NetworkDiagnosticPort, "ip": "127.0.0.1"}).Warn("Starting network diagnostic server") daemon.netController.StartDiagnostic(conf.NetworkDiagnosticPort) return nil }) diff --git a/libnetwork/diagnostic/server.go b/libnetwork/diagnostic/server.go index 57b72cda7c0e2..05c93e5ff9854 100644 --- a/libnetwork/diagnostic/server.go +++ b/libnetwork/diagnostic/server.go @@ -76,16 +76,18 @@ func (s *Server) EnableDiagnostic(ip string, port int) { defer s.mu.Unlock() s.port = port + log.G(context.TODO()).WithFields(log.Fields{"port": s.port, "ip": ip}).Warn("Starting network diagnostic server") // FIXME(thaJeztah): this check won't allow re-configuring the port on reload. if s.enable { - log.G(context.TODO()).Info("The server is already up and running") + log.G(context.TODO()).WithFields(log.Fields{"port": s.port, "ip": ip}).Info("Network diagnostic server is already up and running") return } - log.G(context.TODO()).Infof("Starting the diagnostic server listening on %d for commands", port) + addr := net.JoinHostPort(ip, strconv.Itoa(s.port)) + log.G(context.TODO()).WithFields(log.Fields{"port": s.port, "ip": ip}).Infof("Starting network diagnostic server listening on %s for commands", addr) srv := &http.Server{ - Addr: net.JoinHostPort(ip, strconv.Itoa(port)), + Addr: addr, Handler: s, ReadHeaderTimeout: 5 * time.Minute, // "G112: Potential Slowloris Attack (gosec)"; not a real concern for our use, so setting a long timeout. } @@ -110,11 +112,13 @@ func (s *Server) DisableDiagnostic() { return } if s.srv != nil { - s.srv.Shutdown(context.Background()) //nolint:errcheck + if err := s.srv.Shutdown(context.Background()); err != nil { + log.G(context.TODO()).WithError(err).Warn("Error during network diagnostic server shutdown") + } s.srv = nil } s.enable = false - log.G(context.TODO()).Info("Disabling the diagnostic server") + log.G(context.TODO()).Info("Network diagnostic server shutdown complete") } // IsDiagnosticEnabled returns true when the debug is enabled From 370c7a30e225e5998dc7339842e45765faebf4ff Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Sun, 19 Jan 2025 18:29:50 +0100 Subject: [PATCH 7/8] libnetwork/diagnostic: rename methods - EnableDiagnostic -> Enable - DisableDiagnostic -> Shutdown - IsDiagnosticEnabled -> Enabled Signed-off-by: Sebastiaan van Stijn --- libnetwork/cmd/networkdb-test/dbserver/ndbServer.go | 2 +- libnetwork/controller.go | 6 +++--- libnetwork/diagnostic/server.go | 12 ++++++------ 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/libnetwork/cmd/networkdb-test/dbserver/ndbServer.go b/libnetwork/cmd/networkdb-test/dbserver/ndbServer.go index 83ef8931c4668..5d1449a4e94ae 100644 --- a/libnetwork/cmd/networkdb-test/dbserver/ndbServer.go +++ b/libnetwork/cmd/networkdb-test/dbserver/ndbServer.go @@ -62,7 +62,7 @@ func Server(args []string) { nDB.RegisterDiagnosticHandlers(server) server.HandleFunc("/myip", ipaddress) dummyclient.RegisterDiagnosticHandlers(server, nDB) - server.EnableDiagnostic("", port) + server.Enable("", port) // block here select {} } diff --git a/libnetwork/controller.go b/libnetwork/controller.go index 9c627344b87b4..751acb84b471a 100644 --- a/libnetwork/controller.go +++ b/libnetwork/controller.go @@ -1093,15 +1093,15 @@ func (c *Controller) Stop() { // StartDiagnostic starts the network diagnostic server listening on port. func (c *Controller) StartDiagnostic(port int) { - c.diagnosticServer.EnableDiagnostic("127.0.0.1", port) + c.diagnosticServer.Enable("127.0.0.1", port) } // StopDiagnostic stops the network diagnostic server. func (c *Controller) StopDiagnostic() { - c.diagnosticServer.DisableDiagnostic() + c.diagnosticServer.Shutdown() } // IsDiagnosticEnabled returns true if the diagnostic server is running. func (c *Controller) IsDiagnosticEnabled() bool { - return c.diagnosticServer.IsDiagnosticEnabled() + return c.diagnosticServer.Enabled() } diff --git a/libnetwork/diagnostic/server.go b/libnetwork/diagnostic/server.go index 05c93e5ff9854..abf94081cbd70 100644 --- a/libnetwork/diagnostic/server.go +++ b/libnetwork/diagnostic/server.go @@ -70,8 +70,8 @@ func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { s.mux.ServeHTTP(w, r) } -// EnableDiagnostic opens a TCP socket to debug the passed network DB -func (s *Server) EnableDiagnostic(ip string, port int) { +// Enable opens a TCP socket to debug the passed network DB +func (s *Server) Enable(ip string, port int) { s.mu.Lock() defer s.mu.Unlock() @@ -104,8 +104,8 @@ func (s *Server) EnableDiagnostic(ip string, port int) { }(s) } -// DisableDiagnostic stop the debug and closes the tcp socket -func (s *Server) DisableDiagnostic() { +// Shutdown stop the debug and closes the tcp socket +func (s *Server) Shutdown() { s.mu.Lock() defer s.mu.Unlock() if !s.enable { @@ -121,8 +121,8 @@ func (s *Server) DisableDiagnostic() { log.G(context.TODO()).Info("Network diagnostic server shutdown complete") } -// IsDiagnosticEnabled returns true when the debug is enabled -func (s *Server) IsDiagnosticEnabled() bool { +// Enabled returns true when the debug is enabled +func (s *Server) Enabled() bool { s.mu.Lock() defer s.mu.Unlock() return s.enable From 6b14bdb7c7a10a2194e73ee2caea5e18ff608dc4 Mon Sep 17 00:00:00 2001 From: Sebastiaan van Stijn Date: Sun, 19 Jan 2025 19:15:15 +0100 Subject: [PATCH 8/8] daemon/config: validate network-diagnostic-port with this patch: dockerd --network-diagnostic-port -1 --validate unable to configure the Docker daemon with file /etc/docker/daemon.json: merged configuration validation from file and command line flags failed: invalid network-diagnostic-port (-1): value must be between 0 and 65535 Signed-off-by: Sebastiaan van Stijn --- daemon/config/config.go | 3 +++ daemon/config/config_test.go | 18 ++++++++++++++++++ daemon/reload.go | 3 +-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/daemon/config/config.go b/daemon/config/config.go index 2bafd74659e09..2f6e7f61511ef 100644 --- a/daemon/config/config.go +++ b/daemon/config/config.go @@ -734,6 +734,9 @@ func Validate(config *Config) error { if config.MaxDownloadAttempts < 0 { return errors.Errorf("invalid max download attempts: %d", config.MaxDownloadAttempts) } + if config.NetworkDiagnosticPort < 0 || config.NetworkDiagnosticPort > 65535 { + return errors.Errorf("invalid network-diagnostic-port (%d): value must be between 0 and 65535", config.NetworkDiagnosticPort) + } if _, err := ParseGenericResources(config.NodeGenericResources); err != nil { return err diff --git a/daemon/config/config_test.go b/daemon/config/config_test.go index 6b3909aa9e9a6..2a9a662470f35 100644 --- a/daemon/config/config_test.go +++ b/daemon/config/config_test.go @@ -318,6 +318,24 @@ func TestValidateConfigurationErrors(t *testing.T) { expectedErr: "invalid max download attempts: 0", }, */ + { + name: "negative network-diagnostic-port", + config: &Config{ + CommonConfig: CommonConfig{ + NetworkDiagnosticPort: -1, + }, + }, + expectedErr: "invalid network-diagnostic-port (-1): value must be between 0 and 65535", + }, + { + name: "network-diagnostic-port out of range", + config: &Config{ + CommonConfig: CommonConfig{ + NetworkDiagnosticPort: 65536, + }, + }, + expectedErr: "invalid network-diagnostic-port (65536): value must be between 0 and 65535", + }, { name: "generic resource without =", config: &Config{ diff --git a/daemon/reload.go b/daemon/reload.go index 0ef40294b4918..14fcd6fec4f81 100644 --- a/daemon/reload.go +++ b/daemon/reload.go @@ -266,8 +266,7 @@ func (daemon *Daemon) reloadLiveRestore(txn *reloadTxn, newCfg *configStore, con // reloadNetworkDiagnosticPort updates the network controller starting the diagnostic if the config is valid func (daemon *Daemon) reloadNetworkDiagnosticPort(txn *reloadTxn, newCfg *configStore, conf *config.Config, attributes map[string]string) error { txn.OnCommit(func() error { - if conf == nil || daemon.netController == nil || !conf.IsValueSet("network-diagnostic-port") || - conf.NetworkDiagnosticPort < 1 || conf.NetworkDiagnosticPort > 65535 { + if conf == nil || daemon.netController == nil || !conf.IsValueSet("network-diagnostic-port") || conf.NetworkDiagnosticPort == 0 { // If there is no config make sure that the diagnostic is off if daemon.netController != nil { daemon.netController.StopDiagnostic()