From a0a7284a31179f003406604c86c4c3ee867be1c1 Mon Sep 17 00:00:00 2001 From: Luiz Pegoraro Date: Thu, 1 Feb 2024 12:13:32 -0300 Subject: [PATCH 1/5] fix(agent): warn instead of error the policy removal error when policy was not found. --- agent/backend/pktvisor/policy.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/agent/backend/pktvisor/policy.go b/agent/backend/pktvisor/policy.go index 5e4a3dd6a..d0ce5ece9 100644 --- a/agent/backend/pktvisor/policy.go +++ b/agent/backend/pktvisor/policy.go @@ -8,6 +8,7 @@ import ( "bytes" "fmt" "net/http" + "strings" "github.com/orb-community/orb/agent/policies" "go.uber.org/zap" @@ -60,8 +61,11 @@ func (p *pktvisorBackend) RemovePolicy(data policies.PolicyData) error { } else { name = data.Name } - err := p.request(fmt.Sprintf("policies/%s", name), &resp, http.MethodDelete, http.NoBody, "application/json", RemovePolicyTimeout) - if err != nil { + if err := p.request(fmt.Sprintf("policies/%s", name), &resp, http.MethodDelete, http.NoBody, "application/json", RemovePolicyTimeout); err != nil { + if strings.Contains(err.Error(), "404") { + p.logger.Warn("ignoring error from removing a policy which was not found", zap.String("policy_id", data.ID), zap.String("policy_name", name)) + return nil + } return err } return nil From e98140b001a1397c5f3decebf5ec839a12361029 Mon Sep 17 00:00:00 2001 From: Luiz Pegoraro Date: Thu, 1 Feb 2024 15:16:00 -0300 Subject: [PATCH 2/5] feat(agent): if mqtt client reports a bug, stops the agent for quick failure. --- agent/comms.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/agent/comms.go b/agent/comms.go index 448c19527..b5cddd110 100644 --- a/agent/comms.go +++ b/agent/comms.go @@ -8,6 +8,7 @@ import ( "context" "crypto/tls" "fmt" + "strings" "time" mqtt "github.com/eclipse/paho.mqtt.golang" @@ -27,8 +28,12 @@ func (a *orbAgent) connect(ctx context.Context, config config.MQTTConfig) (mqtt. }) opts.SetConnectionLostHandler(func(client mqtt.Client, err error) { a.logger.Error("connection to mqtt lost", zap.Error(err)) + // If it is a bug on the mqttclient, stop the agent + if strings.Contains(err.Error(), "BUG") { + a.Stop(ctx) + } a.logger.Info("reconnecting....") - client.Connect() + a.requestReconnection(ctx, a.client, config) }) opts.SetPingTimeout(5 * time.Second) opts.SetAutoReconnect(false) From ff54966ebbf7e44d7d9a3aaf968044aa9f51326f Mon Sep 17 00:00:00 2001 From: Luiz Pegoraro Date: Thu, 1 Feb 2024 15:37:02 -0300 Subject: [PATCH 3/5] feat(agent): try to fix panic on pktvisor connection lost. --- agent/otel/otlpmqttexporter/factory.go | 7 ++++--- agent/otel/otlpmqttexporter/otlp.go | 7 +++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/agent/otel/otlpmqttexporter/factory.go b/agent/otel/otlpmqttexporter/factory.go index 57f609c78..5c460c4fc 100644 --- a/agent/otel/otlpmqttexporter/factory.go +++ b/agent/otel/otlpmqttexporter/factory.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "go.opentelemetry.io/otel/sdk/metric" + "go.opentelemetry.io/otel/trace/noop" "github.com/orb-community/orb/agent/otel" "go.uber.org/zap" @@ -13,7 +14,6 @@ import ( "go.opentelemetry.io/collector/consumer" "go.opentelemetry.io/collector/exporter" "go.opentelemetry.io/collector/exporter/exporterhelper" - "go.opentelemetry.io/otel/trace" ) const ( @@ -55,7 +55,7 @@ func CreateDefaultSettings(logger *zap.Logger) exporter.CreateSettings { return exporter.CreateSettings{ TelemetrySettings: component.TelemetrySettings{ Logger: logger, - TracerProvider: trace.NewNoopTracerProvider(), + TracerProvider: noop.NewTracerProvider(), MeterProvider: metric.NewMeterProvider(), }, BuildInfo: component.NewDefaultBuildInfo(), @@ -134,7 +134,8 @@ func CreateMetricsExporter( // explicitly disable since we rely on http.Client timeout logic. exporterhelper.WithTimeout(exporterhelper.TimeoutSettings{Timeout: 0}), exporterhelper.WithRetry(oCfg.RetrySettings), - exporterhelper.WithQueue(oCfg.QueueSettings)) + exporterhelper.WithQueue(oCfg.QueueSettings), + exporterhelper.WithShutdown(oce.shutdown)) } func CreateLogsExporter( diff --git a/agent/otel/otlpmqttexporter/otlp.go b/agent/otel/otlpmqttexporter/otlp.go index f2f0cb9f0..d19f77e02 100644 --- a/agent/otel/otlpmqttexporter/otlp.go +++ b/agent/otel/otlpmqttexporter/otlp.go @@ -196,6 +196,13 @@ func (e *baseExporter) injectScopeLogsAttribute(logsScope plog.ScopeLogs, attrib return logsScope } +func (e *baseExporter) shutdown(_ context.Context) error { + if e.config.Client == nil || !(*e.config.Client).IsConnected() { + (*e.config.Client).Disconnect(0) + } + return nil +} + func (e *baseExporter) pushLogs(ctx context.Context, ld plog.Logs) error { tr := plogotlp.NewExportRequest() ref := tr.Logs().ResourceLogs().AppendEmpty() From e541515a5f417a7b50c0cb93254544e11fecdc58 Mon Sep 17 00:00:00 2001 From: Luiz Pegoraro Date: Thu, 1 Feb 2024 15:49:00 -0300 Subject: [PATCH 4/5] feat(agent): reword warning. --- agent/backend/pktvisor/policy.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agent/backend/pktvisor/policy.go b/agent/backend/pktvisor/policy.go index d0ce5ece9..7e7eef0fc 100644 --- a/agent/backend/pktvisor/policy.go +++ b/agent/backend/pktvisor/policy.go @@ -63,7 +63,7 @@ func (p *pktvisorBackend) RemovePolicy(data policies.PolicyData) error { } if err := p.request(fmt.Sprintf("policies/%s", name), &resp, http.MethodDelete, http.NoBody, "application/json", RemovePolicyTimeout); err != nil { if strings.Contains(err.Error(), "404") { - p.logger.Warn("ignoring error from removing a policy which was not found", zap.String("policy_id", data.ID), zap.String("policy_name", name)) + p.logger.Warn("ignoring error from removing a policy not found", zap.String("policy_id", data.ID), zap.String("policy_name", name)) return nil } return err From 3ea651830fc92f9d14593a85d274b063629bb9b1 Mon Sep 17 00:00:00 2001 From: Luiz Henrique Pegoraro Date: Tue, 6 Feb 2024 12:53:48 -0300 Subject: [PATCH 5/5] fix(agent): invert logic and explicit the termination on stop agent. --- agent/comms.go | 1 + agent/otel/otlpmqttexporter/otlp.go | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/agent/comms.go b/agent/comms.go index b5cddd110..e2d2aecd3 100644 --- a/agent/comms.go +++ b/agent/comms.go @@ -31,6 +31,7 @@ func (a *orbAgent) connect(ctx context.Context, config config.MQTTConfig) (mqtt. // If it is a bug on the mqttclient, stop the agent if strings.Contains(err.Error(), "BUG") { a.Stop(ctx) + return } a.logger.Info("reconnecting....") a.requestReconnection(ctx, a.client, config) diff --git a/agent/otel/otlpmqttexporter/otlp.go b/agent/otel/otlpmqttexporter/otlp.go index d19f77e02..beedd373b 100644 --- a/agent/otel/otlpmqttexporter/otlp.go +++ b/agent/otel/otlpmqttexporter/otlp.go @@ -197,7 +197,7 @@ func (e *baseExporter) injectScopeLogsAttribute(logsScope plog.ScopeLogs, attrib } func (e *baseExporter) shutdown(_ context.Context) error { - if e.config.Client == nil || !(*e.config.Client).IsConnected() { + if e.config.Client != nil && (*e.config.Client).IsConnected() { (*e.config.Client).Disconnect(0) } return nil