From 0034a4041760449df7cb82fa4d076df9af62d6a8 Mon Sep 17 00:00:00 2001 From: Jake Schuurmans <143427381+jakeschuurmans@users.noreply.github.com> Date: Tue, 19 Mar 2024 11:38:56 -0400 Subject: [PATCH] FS-1259; Add proc collector and custom metrics. Remove references to serverservice where possible. Small refactor of inventory task. (#40) --- cmd/inventory.go | 10 ++-- internal/app/config.go | 8 ++-- internal/client/clients.go | 2 +- internal/client/tasks.go | 73 ++++++++++++++++-------------- internal/metrics/custom_metrics.go | 49 ++++++++++++++++++++ internal/version/version.go | 52 ++++++++++----------- 6 files changed, 125 insertions(+), 69 deletions(-) create mode 100644 internal/metrics/custom_metrics.go diff --git a/cmd/inventory.go b/cmd/inventory.go index c26a117..196e6fb 100644 --- a/cmd/inventory.go +++ b/cmd/inventory.go @@ -49,6 +49,8 @@ func inventory(ctx context.Context) error { metricsPusher := metrics.NewPusher(logger, "inventory") metricsPusher.AddCollector(collectors.NewGoCollector()) + metricsPusher.AddCollector(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})) + metrics.AddCustomMetrics(metricsPusher) err = metricsPusher.Start() if err != nil { return err @@ -62,10 +64,10 @@ func inventory(ctx context.Context) error { v := version.Current() logger.WithFields(logrus.Fields{ - "GitCommit": v.GitCommit, - "AppVersion": v.AppVersion, - "ServerServiceVersion:": v.ServerserviceVersion, // TODO; Swap out with fleetdb once migrated to fleetdb - "ConditionOrcVersion:": v.ConditionorcVersion, + "GitCommit": v.GitCommit, + "AppVersion": v.AppVersion, + "FleetDBVersion:": v.FleetDBVersion, + "ConditionOrcVersion:": v.ConditionorcVersion, }).Info("running task: inventory") newClient, err := client.New(otelCtxWithCancel, cfg, logger) diff --git a/internal/app/config.go b/internal/app/config.go index 93a4ed0..e50aeb1 100644 --- a/internal/app/config.go +++ b/internal/app/config.go @@ -13,7 +13,7 @@ import ( const ( appName string = "fleet_scheduler" - defaultFleetDBClientID = "fleetscheduler-serverservice-api" + defaultFleetDBClientID = "fleetscheduler-serverservice-api" // FleetDB still uses the ServerService Client ID defaultConditionOrcClientID = "fleetscheduler-condition-api" configEnvVariableName = "FLEET_SCHEDULER_CONFIG" @@ -27,7 +27,7 @@ type Configuration struct { // FacilityCode limits this fleet scheduler to events in a facility. FacilityCode string `mapstructure:"facility_code"` - // Defines the fleetdb (serverservice) client configuration parameters + // Defines the fleetdb client configuration parameters FdbCfg *ConfigOIDC `mapstructure:"fleetdb_api"` // Defines the condition orchestrator client configuration parameters CoCfg *ConfigOIDC `mapstructure:"conditionorc_api"` @@ -37,7 +37,7 @@ type ConfigOIDC struct { // Skips OAuth setup if true DisableOAuth bool `mapstructure:"disable_oauth"` - // ServerService OAuth2 parameters + // OAuth2 parameters Endpoint string `mapstructure:"endpoint"` ClientID string `mapstructure:"oidc_client_id"` IssuerEndpoint string `mapstructure:"oidc_issuer_endpoint"` @@ -97,7 +97,7 @@ func validateClientParams(cfg *Configuration) error { cfg.LogLevel = "debug" } - // FleetDB (serverservice) Configuration + // FleetDB Configuration if cfg.FdbCfg == nil { return errors.Wrap(ErrInvalidConfig, "fleetdb_api entry doesnt exist") } diff --git a/internal/client/clients.go b/internal/client/clients.go index 0efe6cd..62b4c02 100644 --- a/internal/client/clients.go +++ b/internal/client/clients.go @@ -37,7 +37,7 @@ func New(ctx context.Context, cfg *app.Configuration, log *logrus.Logger) (*Clie err := client.newFleetDBClient() if err != nil { - return nil, errors.Wrap(err, "Failed to initialize FleetDB Client (Serverservice)") + return nil, errors.Wrap(err, "Failed to initialize FleetDB Client") } err = client.newConditionOrcClient() diff --git a/internal/client/tasks.go b/internal/client/tasks.go index 5c29693..43d7596 100644 --- a/internal/client/tasks.go +++ b/internal/client/tasks.go @@ -1,61 +1,66 @@ package client import ( + "github.com/metal-toolbox/fleet-scheduler/internal/metrics" + fleetdbapi "github.com/metal-toolbox/fleetdb/pkg/api/v1" + "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" ) func (c *Client) CreateConditionInventoryForAllServers(pageSize int) error { // First page, use the response from it to figure out how many pages we have to loop through - // Dont change page size - servers, response, err := c.getServerPage(pageSize, 1) + response, err := c.getServerPageAndCreateInventory(1, pageSize, 0) if err != nil { - c.log.WithFields(logrus.Fields{ - "pageSize": pageSize, - "pageIndex": 1, - }).Logger.Errorf("Failed to get list of servers: %s", err.Error()) return err } totalPages := response.TotalPages - // send first page of servers to the channel - for i := range servers { - err = c.CreateConditionInventory(servers[i].UUID) + // Now that we know how many pages to expect, lets loop through the rest of the pages + for i := 2; i <= totalPages; i++ { + _, err := c.getServerPageAndCreateInventory(i, pageSize, totalPages) if err != nil { return err } } + return nil +} + +func (c *Client) getServerPageAndCreateInventory(pageIndex, pageSize, totalPages int) (*fleetdbapi.ServerResponse, error) { + servers, response, err := c.getServerPage(pageSize, pageIndex) + if err != nil { + c.log.WithFields(logrus.Fields{ + "pageIndex": pageIndex, + "pageSize": pageSize, + "totalPages": totalPages, + }).Logger.Errorf("Failed to get page of servers, attempting to continue: %s", err.Error()) + + metrics.FleetdbErrorCounter.With( + prometheus.Labels{"errors": err.Error()}, + ).Inc() + + return response, err + } + c.log.WithFields(logrus.Fields{ - "index": 1, - "iterations": totalPages, - "got": len(servers), + "pageIndex": pageIndex, + "pageSize": pageSize, + "totalPages": totalPages, }).Trace("Got server page") - // Start the second page, and loop through rest the pages - for i := 2; i <= totalPages; i++ { - servers, _, err = c.getServerPage(pageSize, i) + for i := range servers { + err = c.CreateConditionInventory(servers[i].UUID) if err != nil { - c.log.WithFields(logrus.Fields{ - "pageSize": pageSize, - "pageIndex": i, - }).Logger.Errorf("Failed to get page of servers, attempting to continue: %s", err.Error()) - - continue + metrics.ConditionorcErrorCounter.With( + prometheus.Labels{"errors": err.Error()}, + ).Inc() + return response, err } - c.log.WithFields(logrus.Fields{ - "index": i, - "iterations": totalPages, - "got": len(servers), - }).Trace("Got server page") - - for i := range servers { - err = c.CreateConditionInventory(servers[i].UUID) - if err != nil { - return err - } - } + metrics.InventoryCounter.With( + prometheus.Labels{}, + ).Inc() } - return nil + return response, nil } diff --git a/internal/metrics/custom_metrics.go b/internal/metrics/custom_metrics.go new file mode 100644 index 0000000..a93d856 --- /dev/null +++ b/internal/metrics/custom_metrics.go @@ -0,0 +1,49 @@ +package metrics + +import "github.com/prometheus/client_golang/prometheus" + +var ( + ConditionorcErrorCounter *prometheus.CounterVec + FleetdbErrorCounter *prometheus.CounterVec + + InventoryCounter *prometheus.CounterVec +) + +func init() { + ConditionorcErrorCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "fleet-scheduler", + Subsystem: "conditionorc", + Name: "errors_total", + Help: "a count of all errors attempting to reach conditionorc", + }, []string{ + "errors", + }, + ) + + FleetdbErrorCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "fleet-scheduler", + Subsystem: "fleetdb", + Name: "errors_total", + Help: "a count of all errors attempting to reach fleetdb", + }, []string{ + "errors", + }, + ) + + InventoryCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "fleet-scheduler", + Subsystem: "core", + Name: "errors_total", + Help: "a count of all errors attempting to reach fleet-scheduler dependencies", + }, []string{}, + ) +} + +func AddCustomMetrics(pusher *Pusher) { + pusher.AddCollector(ConditionorcErrorCounter) + pusher.AddCollector(FleetdbErrorCounter) + pusher.AddCollector(InventoryCounter) +} diff --git a/internal/version/version.go b/internal/version/version.go index 93d1d0a..1e5334a 100644 --- a/internal/version/version.go +++ b/internal/version/version.go @@ -8,37 +8,37 @@ import ( ) var ( - GitCommit string - GitBranch string - GitSummary string - BuildDate string - AppVersion string - ConditionorcVersion = conditionorcVersion() - ServerserviceVersion = serverserviceVersion() - GoVersion = runtime.Version() + GitCommit string + GitBranch string + GitSummary string + BuildDate string + AppVersion string + ConditionorcVersion = conditionorcVersion() + FleetDBVersion = fleetdbVersion() + GoVersion = runtime.Version() ) type Version struct { - GitCommit string `json:"git_commit"` - GitBranch string `json:"git_branch"` - GitSummary string `json:"git_summary"` - BuildDate string `json:"build_date"` - AppVersion string `json:"app_version"` - GoVersion string `json:"go_version"` - ServerserviceVersion string `json:"serverservice_version"` - ConditionorcVersion string `json:"conditionorc_version"` + GitCommit string `json:"git_commit"` + GitBranch string `json:"git_branch"` + GitSummary string `json:"git_summary"` + BuildDate string `json:"build_date"` + AppVersion string `json:"app_version"` + GoVersion string `json:"go_version"` + FleetDBVersion string `json:"fleetdb_version"` + ConditionorcVersion string `json:"conditionorc_version"` } func Current() *Version { return &Version{ - GitBranch: GitBranch, - GitCommit: GitCommit, - GitSummary: GitSummary, - BuildDate: BuildDate, - AppVersion: AppVersion, - GoVersion: GoVersion, - ConditionorcVersion: ConditionorcVersion, - ServerserviceVersion: ServerserviceVersion, + GitBranch: GitBranch, + GitCommit: GitCommit, + GitSummary: GitSummary, + BuildDate: BuildDate, + AppVersion: AppVersion, + GoVersion: GoVersion, + ConditionorcVersion: ConditionorcVersion, + FleetDBVersion: FleetDBVersion, } } @@ -46,14 +46,14 @@ func (v *Version) String() string { return fmt.Sprintf("version=%s ref=%s branch=%s built=%s", v.AppVersion, v.GitCommit, v.GitBranch, v.BuildDate) } -func serverserviceVersion() string { +func fleetdbVersion() string { buildInfo, ok := rdebug.ReadBuildInfo() if !ok { return "" } for _, d := range buildInfo.Deps { - if strings.Contains(d.Path, "serverservice") { + if strings.Contains(d.Path, "fleetdb") { return d.Version } }