From 877b5a5083c0baa3587fc900908c0b995fd2b59b Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser Date: Wed, 7 Feb 2024 16:15:14 -0500 Subject: [PATCH 01/59] Initial commit for addition of HPE DL380 --- hpe/dl380/memory.go | 37 +++++++++++++++++ hpe/dl380/network_adapter.go | 65 ++++++++++++++++++++++++++++++ hpe/dl380/power.go | 77 ++++++++++++++++++++++++++++++++++++ hpe/dl380/thermal.go | 54 +++++++++++++++++++++++++ 4 files changed, 233 insertions(+) create mode 100644 hpe/dl380/memory.go create mode 100644 hpe/dl380/network_adapter.go create mode 100644 hpe/dl380/power.go create mode 100644 hpe/dl380/thermal.go diff --git a/hpe/dl380/memory.go b/hpe/dl380/memory.go new file mode 100644 index 0000000..7cd6401 --- /dev/null +++ b/hpe/dl380/memory.go @@ -0,0 +1,37 @@ +/* + * Copyright 2023 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package dl380 + +// /redfish/v1/systems/1/ + +// MemoryMetrics is the top level json object for DL360 Memory metadata +type MemoryMetrics struct { + ID string `json:"Id"` + MemorySummary MemorySummary `json:"MemorySummary"` +} + +// MemorySummary is the json object for DL360 MemorySummary metadata +type MemorySummary struct { + Status StatusMemory `json:"Status"` + TotalSystemMemoryGiB int `json:"TotalSystemMemoryGiB"` + TotalSystemPersistentMemoryGiB int `json:"TotalSystemPersistentMemoryGiB"` +} + +// StatusMemory is the variable to determine if the memory is OK or not +type StatusMemory struct { + HealthRollup string `json:"HealthRollup"` +} diff --git a/hpe/dl380/network_adapter.go b/hpe/dl380/network_adapter.go new file mode 100644 index 0000000..b91622d --- /dev/null +++ b/hpe/dl380/network_adapter.go @@ -0,0 +1,65 @@ +/* + * Copyright 2023 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package dl380 + +// /redfish/v1/Systems/1/BaseNetworkAdapters + +// NetworkAdapter is the top level json object for DL380 Network Adapter metadata +type NetworkAdapter struct { + ID string `json:"Id"` + Firmware Firmware `json:"Firmware"` + Name string `json:"Name"` + PartNumber string `json:"PartNumber"` + PhysicalPorts []PhysicalPorts `json:"PhysicalPorts"` + SerialNumber string `json:"SerialNumber"` + StructuredName string `json:"StructuredName"` + Status Status `json:"Status"` + UEFIDevicePath string `json:"UEFIDevicePath"` +} + +// Firmware is the top level json object for DL380 Network Adapter metadata +type Firmware struct { + Current FirmwareCurrent `json:"Current"` +} + +// FirmwareCurrent contains the version in string format +type FirmwareCurrent struct { + Version string `json:"VersionString"` +} + +// PhysicalPorts contains the metadata for the Chassis NICs +type PhysicalPorts struct { + FullDuplex bool `json:"FullDuplex"` + IPv4Addresses []Addr `json:"IPv4Addresses"` + IPv6Addresses []Addr `json:"IPv6Addresses"` + LinkStatus string `json:"LinkStatus"` + MacAddress string `json:"MacAddress"` + Name string `json:"Name"` + SpeedMbps int `json:"SpeedMbps"` + Status Status `json:"Status"` +} + +// Addr contains the IPv4 or IPv6 Address in string format +type Addr struct { + Address string `json:"Address"` +} + +// Status contains metadata for the health of a particular component/module +type Status struct { + Health string `json:"Health"` + State string `json:"State,omitempty"` +} diff --git a/hpe/dl380/power.go b/hpe/dl380/power.go new file mode 100644 index 0000000..17cc23c --- /dev/null +++ b/hpe/dl380/power.go @@ -0,0 +1,77 @@ +/* + * Copyright 2023 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package dl380 + +// /redfish/v1/Chassis/1/Power/ + +// PowerMetrics is the top level json object for Power metadata +type PowerMetrics struct { + ID string `json:"Id"` + Name string `json:"Name"` + PowerControl []PowerControl `json:"PowerControl"` + PowerSupplies []PowerSupply `json:"PowerSupplies"` +} + +// PowerControl is the top level json object for metadata on power supply consumption +type PowerControl struct { + MemberID string `json:"MemberId"` + PowerCapacityWatts int `json:"PowerCapacityWatts"` + PowerConsumedWatts int `json:"PowerConsumedWatts"` + PowerMetrics PowerMetric `json:"PowerMetrics"` +} + +// PowerMetric contains avg/min/max power metadata +type PowerMetric struct { + AverageConsumedWatts int `json:"AverageConsumedWatts"` + IntervalInMin int `json:"IntervalInMin"` + MaxConsumedWatts int `json:"MaxConsumedWatts"` + MinConsumedWatts int `json:"MinConsumedWatts"` +} + +// PowerSupply is the top level json object for metadata on power supply product info +type PowerSupply struct { + FirmwareVersion string `json:"FirmwareVersion"` + LastPowerOutputWatts int `json:"LastPowerOutputWatts"` + LineInputVoltage int `json:"LineInputVoltage"` + LineInputVoltageType string `json:"LineInputVoltageType"` + Manufacturer string `json:"Manufacturer"` + MemberID string `json:"MemberId"` + Model string `json:"Model"` + Name string `json:"Name"` + Oem OemPower `json:"Oem"` + PowerCapacityWatts int `json:"PowerCapacityWatts"` + PowerSupplyType string `json:"PowerSupplyType"` + SerialNumber string `json:"SerialNumber"` + SparePartNumber string `json:"SparePartNumber"` + Status Status `json:"Status"` +} + +// OemPower is the top level json object for historical data for wattage +type OemPower struct { + Hpe Hpe `json:"Hpe"` +} + +// Hpe contains metadata on power supply product info +type Hpe struct { + AveragePowerOutputWatts int `json:"AveragePowerOutputWatts"` + BayNumber int `json:"BayNumber"` + HotplugCapable bool `json:"HotplugCapable"` + MaxPowerOutputWatts int `json:"MaxPowerOutputWatts"` + Mismatched bool `json:"Mismatched"` + PowerSupplyStatus Status `json:"PowerSupplyStatus"` + IPDUCapable bool `json:"iPDUCapable"` +} diff --git a/hpe/dl380/thermal.go b/hpe/dl380/thermal.go new file mode 100644 index 0000000..a31a50e --- /dev/null +++ b/hpe/dl380/thermal.go @@ -0,0 +1,54 @@ +/* + * Copyright 2023 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package dl380 + +// /redfish/v1/Chassis/1/Thermal/ + +// ThermalMetrics is the top level json object for DL380 Thermal metadata +type ThermalMetrics struct { + ID string `json:"Id"` + Fans []Fan `json:"Fans"` + Name string `json:"Name"` + Temperatures []Temperature `json:"Temperatures"` +} + +// Fan is the json object for a DL380 fan module +type Fan struct { + MemberID string `json:"MemberId"` + Name string `json:"Name"` + Reading int `json:"Reading"` + ReadingUnits string `json:"ReadingUnits"` + Status StatusThermal `json:"Status"` +} + +// StatusThermal is the variable to determine if a fan or temperature sensor module is OK or not +type StatusThermal struct { + Health string `json:"Health"` + State string `json:"State"` +} + +// Temperature is the json object for a DL380 temperature sensor module +type Temperature struct { + MemberID string `json:"MemberId"` + Name string `json:"Name"` + PhysicalContext string `json:"PhysicalContext"` + ReadingCelsius int `json:"ReadingCelsius"` + SensorNumber int `json:"SensorNumber"` + Status StatusThermal `json:"Status"` + UpperThresholdCritical int `json:"UpperThresholdCritical"` + UpperThresholdFatal int `json:"UpperThresholdFatal"` +} From 90e93ceb8c452b42532b8060bd611ca72c8715b2 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser Date: Wed, 7 Feb 2024 16:17:00 -0500 Subject: [PATCH 02/59] Update copyright info --- hpe/dl380/memory.go | 6 +++--- hpe/dl380/network_adapter.go | 2 +- hpe/dl380/power.go | 2 +- hpe/dl380/thermal.go | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/hpe/dl380/memory.go b/hpe/dl380/memory.go index 7cd6401..ce443cf 100644 --- a/hpe/dl380/memory.go +++ b/hpe/dl380/memory.go @@ -1,5 +1,5 @@ /* - * Copyright 2023 Comcast Cable Communications Management, LLC + * Copyright 2024 Comcast Cable Communications Management, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,13 +18,13 @@ package dl380 // /redfish/v1/systems/1/ -// MemoryMetrics is the top level json object for DL360 Memory metadata +// MemoryMetrics is the top level json object for DL380 Memory metadata type MemoryMetrics struct { ID string `json:"Id"` MemorySummary MemorySummary `json:"MemorySummary"` } -// MemorySummary is the json object for DL360 MemorySummary metadata +// MemorySummary is the json object for DL380 MemorySummary metadata type MemorySummary struct { Status StatusMemory `json:"Status"` TotalSystemMemoryGiB int `json:"TotalSystemMemoryGiB"` diff --git a/hpe/dl380/network_adapter.go b/hpe/dl380/network_adapter.go index b91622d..ea778ac 100644 --- a/hpe/dl380/network_adapter.go +++ b/hpe/dl380/network_adapter.go @@ -1,5 +1,5 @@ /* - * Copyright 2023 Comcast Cable Communications Management, LLC + * Copyright 2024 Comcast Cable Communications Management, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/hpe/dl380/power.go b/hpe/dl380/power.go index 17cc23c..3d6b343 100644 --- a/hpe/dl380/power.go +++ b/hpe/dl380/power.go @@ -1,5 +1,5 @@ /* - * Copyright 2023 Comcast Cable Communications Management, LLC + * Copyright 2024 Comcast Cable Communications Management, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/hpe/dl380/thermal.go b/hpe/dl380/thermal.go index a31a50e..ff1eb86 100644 --- a/hpe/dl380/thermal.go +++ b/hpe/dl380/thermal.go @@ -1,5 +1,5 @@ /* - * Copyright 2023 Comcast Cable Communications Management, LLC + * Copyright 2024 Comcast Cable Communications Management, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 09bec39ed0d36f8f3391b3cbf5e4e7ca5aaca7a9 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser Date: Wed, 7 Feb 2024 18:04:40 -0500 Subject: [PATCH 03/59] Initialize drive, exporter, and metrics.go files --- hpe/dl380/drive.go | 1 + hpe/dl380/exporter.go | 376 ++++++++++++++++++++++++++++++++++++++++++ hpe/dl380/metrics.go | 72 ++++++++ 3 files changed, 449 insertions(+) create mode 100644 hpe/dl380/drive.go create mode 100644 hpe/dl380/exporter.go create mode 100644 hpe/dl380/metrics.go diff --git a/hpe/dl380/drive.go b/hpe/dl380/drive.go new file mode 100644 index 0000000..0ffdd02 --- /dev/null +++ b/hpe/dl380/drive.go @@ -0,0 +1 @@ +// TODO \ No newline at end of file diff --git a/hpe/dl380/exporter.go b/hpe/dl380/exporter.go new file mode 100644 index 0000000..4933e25 --- /dev/null +++ b/hpe/dl380/exporter.go @@ -0,0 +1,376 @@ +/* + * Copyright 2024 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package dl380 + + import ( + "context" + "crypto/tls" + "encoding/json" + "fmt" + "net" + "net/http" + "net/url" + "strconv" + "strings" + "sync" + "time" + + "github.com/comcast/fishymetrics/common" + "github.com/comcast/fishymetrics/config" + "github.com/comcast/fishymetrics/pool" + "go.uber.org/zap" + + "github.com/hashicorp/go-retryablehttp" + "github.com/prometheus/client_golang/prometheus" + ) + + const ( + // DL380 is a HPE Hardware Device we scrape + DL380 = "DL380" + // THERMAL represents the thermal metric endpoint + THERMAL = "ThermalMetrics" + // POWER represents the power metric endpoint + POWER = "PowerMetrics" + // DRIVE represents the logical drive metric endpoints + DRIVE = "DriveMetrics" + // MEMORY represents the memory metric endpoints + MEMORY = "MemoryMetrics" + // OK is a string representation of the float 1.0 for device status + OK = 1.0 + // BAD is a string representation of the float 0.0 for device status + BAD = 0.0 + // DISABLED is a string representation of the float -1.0 for device status + DISABLED = -1.0 + ) + + var ( + log *zap.Logger + ) + + // Exporter collects chassis manager stats from the given URI and exports them using + // the prometheus metrics package. + type Exporter struct { + ctx context.Context + mutex sync.RWMutex + pool *pool.Pool + host string + + up prometheus.Gauge + deviceMetrics *map[string]*metrics + } + + // NewExporter returns an initialized Exporter for HPE DL380 device. + func NewExporter(ctx context.Context, target, uri string) *Exporter { + var fqdn *url.URL + var tasks []*pool.Task + + log = zap.L() + + tr := &http.Transport{ + Dial: (&net.Dialer{ + Timeout: 3 * time.Second, + }).Dial, + MaxIdleConns: 1, + MaxConnsPerHost: 1, + MaxIdleConnsPerHost: 1, + IdleConnTimeout: 90 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, + }, + TLSHandshakeTimeout: 10 * time.Second, + } + + retryClient := retryablehttp.NewClient() + retryClient.CheckRetry = retryablehttp.ErrorPropagatedRetryPolicy + retryClient.HTTPClient.Transport = tr + retryClient.HTTPClient.Timeout = 30 * time.Second + retryClient.Logger = nil + retryClient.RetryWaitMin = 2 * time.Second + retryClient.RetryWaitMax = 2 * time.Second + retryClient.RetryMax = 2 + retryClient.RequestLogHook = func(l retryablehttp.Logger, r *http.Request, i int) { + retryCount := i + if retryCount > 0 { + log.Error("api call "+r.URL.String()+" failed, retry #"+strconv.Itoa(retryCount), zap.Any("trace_id", ctx.Value("traceID"))) + } + } + + // Check that the target passed in has http:// or https:// prefixed + fqdn, err := url.ParseRequestURI(target) + if err != nil { + fqdn = &url.URL{ + Scheme: config.GetConfig().OOBScheme, + Host: target, + } + } + + // TODO: Change targets below + tasks = append(tasks, + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Thermal", THERMAL, target, retryClient)), + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Power", POWER, target, retryClient)), + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/1", DRIVE, target, retryClient)), + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1", MEMORY, target, retryClient))) + + p := pool.NewPool(tasks, 1) + + // Create new map[string]*metrics for each new Exporter + metrx := NewDeviceMetrics() + + return &Exporter{ + ctx: ctx, + pool: p, + host: fqdn.Host, + up: prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "up", + Help: "Was the last scrape of chassis monitor successful.", + }), + deviceMetrics: metrx, + } + } + + // Describe describes all the metrics ever exported by the fishymetrics exporter. It + // implements prometheus.Collector. + func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { + for _, m := range *e.deviceMetrics { + for _, n := range *m { + n.Describe(ch) + } + } + ch <- e.up.Desc() + } + + // Collect fetches the stats from configured fishymetrics location and delivers them + // as Prometheus metrics. It implements prometheus.Collector. + func (e *Exporter) Collect(ch chan<- prometheus.Metric) { + e.mutex.Lock() // To protect metrics from concurrent collects. + defer e.mutex.Unlock() + + e.resetMetrics() + + // perform scrape if target is not on ignored list + if _, ok := common.IgnoredDevices[e.host]; !ok { + e.scrape() + } else { + e.up.Set(float64(2)) + } + + ch <- e.up + e.collectMetrics(ch) + } + + func (e *Exporter) resetMetrics() { + for _, m := range *e.deviceMetrics { + for _, n := range *m { + n.Reset() + } + } + } + + func (e *Exporter) collectMetrics(metrics chan<- prometheus.Metric) { + for _, m := range *e.deviceMetrics { + for _, n := range *m { + n.Collect(metrics) + } + } + } + + func (e *Exporter) scrape() { + + var result uint8 + state := uint8(1) + scrapes := len(e.pool.Tasks) + scrapeChan := make(chan uint8, scrapes) + + // Concurrently call the endpoints to help prevent reaching the maxiumum number of 4 simultaneous sessions + e.pool.Run() + for _, task := range e.pool.Tasks { + var err error + if task.Err != nil { + deviceState := uint8(0) + // If credentials are incorrect we will add host to be ignored until manual intervention + if strings.Contains(task.Err.Error(), "401") { + common.IgnoredDevices[e.host] = common.IgnoredDevice{ + Name: e.host, + Endpoint: "https://" + e.host + "/redfish/v1/Chassis", + Module: DL380, + } + log.Info("added host "+e.host+" to ignored list", zap.Any("trace_id", e.ctx.Value("traceID"))) + deviceState = 2 + } else { + deviceState = 0 + } + e.up.Set(float64(deviceState)) + log.Error("error from "+DL380, zap.Error(task.Err), zap.String("api", task.MetricType), zap.Any("trace_id", e.ctx.Value("traceID"))) + return + } + + // TODO: Make sure these align with above metric types + switch task.MetricType { + case THERMAL: + err = e.exportThermalMetrics(task.Body) + case POWER: + err = e.exportPowerMetrics(task.Body) + case DRIVE: + err = e.exportDriveMetrics(task.Body) + case MEMORY: + err = e.exportMemoryMetrics(task.Body) + } + + if err != nil { + log.Error("error exporting metrics - from "+DL380, zap.Error(err), zap.String("api", task.MetricType), zap.Any("trace_id", e.ctx.Value("traceID"))) + continue + } + scrapeChan <- 1 + } + + // Get scrape results from goroutine(s) and perform bitwise AND, any failures should + // result in a scrape failure + for i := 0; i < scrapes; i++ { + result = <-scrapeChan + state &= result + } + + e.up.Set(float64(state)) + + } + + // TODO: Modify the below PowerMetrics to fit the DL380 data + // exportPowerMetrics collects the DL380's power metrics in json format and sets the prometheus gauges + func (e *Exporter) exportPowerMetrics(body []byte) error { + + var state float64 + var pm PowerMetrics + var dlPower = (*e.deviceMetrics)["powerMetrics"] + err := json.Unmarshal(body, &pm) + if err != nil { + return fmt.Errorf("Error Unmarshalling DL380 PowerMetrics - " + err.Error()) + } + + for _, pc := range pm.PowerControl { + (*dlPower)["supplyTotalConsumed"].WithLabelValues(pc.MemberID).Set(float64(pc.PowerConsumedWatts)) + (*dlPower)["supplyTotalCapacity"].WithLabelValues(pc.MemberID).Set(float64(pc.PowerCapacityWatts)) + } + + for _, ps := range pm.PowerSupplies { + if ps.Status.State == "Enabled" { + (*dlPower)["supplyOutput"].WithLabelValues(ps.MemberID, ps.SparePartNumber).Set(float64(ps.LastPowerOutputWatts)) + if ps.Status.Health == "OK" { + state = OK + } else { + state = BAD + } + (*dlPower)["supplyStatus"].WithLabelValues(ps.MemberID, ps.SparePartNumber).Set(state) + } + } + + return nil + } + + // TODO: Modify the below ThermalMetrics to fit the DL380 data + // exportThermalMetrics collects the DL380's thermal and fan metrics in json format and sets the prometheus gauges + func (e *Exporter) exportThermalMetrics(body []byte) error { + + var state float64 + var tm ThermalMetrics + var dlThermal = (*e.deviceMetrics)["thermalMetrics"] + err := json.Unmarshal(body, &tm) + if err != nil { + return fmt.Errorf("Error Unmarshalling DL380 ThermalMetrics - " + err.Error()) + } + + // Iterate through fans + for _, fan := range tm.Fans { + // Check fan status and convert string to numeric values + if fan.Status.State == "Enabled" { + (*dlThermal)["fanSpeed"].WithLabelValues(fan.Name).Set(float64(fan.Reading)) + if fan.Status.Health == "OK" { + state = OK + } else { + state = BAD + } + (*dlThermal)["fanStatus"].WithLabelValues(fan.Name).Set(state) + } + } + + // Iterate through sensors + for _, sensor := range tm.Temperatures { + // Check sensor status and convert string to numeric values + if sensor.Status.State == "Enabled" { + (*dlThermal)["sensorTemperature"].WithLabelValues(strings.TrimRight(sensor.Name, " ")).Set(float64(sensor.ReadingCelsius)) + if sensor.Status.Health == "OK" { + state = OK + } else { + state = BAD + } + (*dlThermal)["sensorStatus"].WithLabelValues(strings.TrimRight(sensor.Name, " ")).Set(state) + } + } + + return nil + } + + // TODO: Modify the below DriveMetrics to fit the DL380 data + // exportDriveMetrics collects the DL380 drive metrics in json format and sets the prometheus gauges + func (e *Exporter) exportDriveMetrics(body []byte) error { + + var state float64 + var dld DriveMetrics + var dlDrive = (*e.deviceMetrics)["driveMetrics"] + err := json.Unmarshal(body, &dld) + if err != nil { + return fmt.Errorf("Error Unmarshalling DL380 DriveMetrics - " + err.Error()) + } + // Check logical drive is enabled then check status and convert string to numeric values + if dld.Status.State == "Enabled" { + if dld.Status.Health == "OK" { + state = OK + } else { + state = BAD + } + } else { + state = DISABLED + } + + (*dlDrive)["logicalDriveStatus"].WithLabelValues(dld.Name, strconv.Itoa(dld.LogicalDriveNumber), dld.Raid).Set(state) + + return nil + } + + // TODO: Modify the below MemoryMetrics to fit the DL380 data + // exportMemoryMetrics collects the DL380 drive metrics in json format and sets the prometheus gauges + func (e *Exporter) exportMemoryMetrics(body []byte) error { + + var state float64 + var dlm MemoryMetrics + var dlMemory = (*e.deviceMetrics)["memoryMetrics"] + err := json.Unmarshal(body, &dlm) + if err != nil { + return fmt.Errorf("Error Unmarshalling DL380 MemoryMetrics - " + err.Error()) + } + // Check memory status and convert string to numeric values + if dlm.MemorySummary.Status.HealthRollup == "OK" { + state = OK + } else { + state = BAD + } + + (*dlMemory)["memoryStatus"].WithLabelValues(strconv.Itoa(dlm.MemorySummary.TotalSystemMemoryGiB)).Set(state) + + return nil + } + \ No newline at end of file diff --git a/hpe/dl380/metrics.go b/hpe/dl380/metrics.go new file mode 100644 index 0000000..5fbff6b --- /dev/null +++ b/hpe/dl380/metrics.go @@ -0,0 +1,72 @@ +// TODO: Make sure all metric names align with what's in the exporter. + +/* + * Copyright 2024 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package dl380 + + import ( + "github.com/prometheus/client_golang/prometheus" + ) + + type metrics map[string]*prometheus.GaugeVec + + func newServerMetric(metricName string, docString string, constLabels prometheus.Labels, labelNames []string) *prometheus.GaugeVec { + return prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: metricName, + Help: docString, + ConstLabels: constLabels, + }, + labelNames, + ) + } + + func NewDeviceMetrics() *map[string]*metrics { + var ( + ThermalMetrics = &metrics{ + "fanSpeed": newServerMetric("dl380_thermal_fan_speed", "Current fan speed in the unit of percentage, possible values are 0 - 100", nil, []string{"name"}), + "fanStatus": newServerMetric("dl380_thermal_fan_status", "Current fan status 1 = OK, 0 = BAD", nil, []string{"name"}), + "sensorTemperature": newServerMetric("dl380_thermal_sensor_temperature", "Current sensor temperature reading in Celsius", nil, []string{"name"}), + "sensorStatus": newServerMetric("dl380_thermal_sensor_status", "Current sensor status 1 = OK, 0 = BAD", nil, []string{"name"}), + } + + PowerMetrics = &metrics{ + "supplyOutput": newServerMetric("dl380_power_supply_output", "Power supply output in watts", nil, []string{"memberId", "sparePartNumber"}), + "supplyStatus": newServerMetric("dl380_power_supply_status", "Current power supply status 1 = OK, 0 = BAD", nil, []string{"memberId", "sparePartNumber"}), + "supplyTotalConsumed": newServerMetric("dl380_power_supply_total_consumed", "Total output of all power supplies in watts", nil, []string{"memberId"}), + "supplyTotalCapacity": newServerMetric("dl380_power_supply_total_capacity", "Total output capacity of all the power supplies", nil, []string{"memberId"}), + } + + DriveMetrics = &metrics{ + "logicalDriveStatus": newServerMetric("dl380_logical_drive_status", "Current logical drive status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"name", "logicalDriveNumber", "raid"}), + } + + MemoryMetrics = &metrics{ + "memoryStatus": newServerMetric("dl380_memory_status", "Current memory status 1 = OK, 0 = BAD", nil, []string{"totalSystemMemoryGiB"}), + } + + Metrics = &map[string]*metrics{ + "thermalMetrics": ThermalMetrics, + "powerMetrics": PowerMetrics, + "driveMetrics": DriveMetrics, + "memoryMetrics": MemoryMetrics, + } + ) + + return Metrics + } + \ No newline at end of file From 2e80b17663c0fe9869799ec441eaa579e976dc16 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser Date: Wed, 7 Feb 2024 18:10:49 -0500 Subject: [PATCH 04/59] Add HPE DL380 to cmd/fishymetrics/main.go --- cmd/fishymetrics/main.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cmd/fishymetrics/main.go b/cmd/fishymetrics/main.go index c3dc4aa..bc46be9 100644 --- a/cmd/fishymetrics/main.go +++ b/cmd/fishymetrics/main.go @@ -138,6 +138,8 @@ func handler(ctx context.Context, w http.ResponseWriter, r *http.Request) { switch moduleName { case "moonshot": exporter = moonshot.NewExporter(r.Context(), target, uri) + case "dl380": + exporter = dl380.NewExporter(r.Context(), target, uri) case "dl360": exporter = dl360.NewExporter(r.Context(), target, uri) case "dl20": @@ -150,7 +152,7 @@ func handler(ctx context.Context, w http.ResponseWriter, r *http.Request) { exporter, err = s3260m5.NewExporter(r.Context(), target, uri) default: log.Error("'module' parameter does not match available options", zap.String("module", moduleName), zap.String("target", target), zap.Any("trace_id", r.Context().Value("traceID"))) - http.Error(w, "'module' parameter does not match available options: [moonshot, dl360, dl20, c220, s3260m4, s3260m5]", http.StatusBadRequest) + http.Error(w, "'module' parameter does not match available options: [moonshot, dl360, dl380, dl20, c220, s3260m4, s3260m5]", http.StatusBadRequest) return } From edbc0906c812382803c4977cbe6d9fd864d26d97 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Thu, 8 Feb 2024 18:24:39 -0500 Subject: [PATCH 05/59] Add DL380 metrics mapping to drive.go --- hpe/dl380/drive.go | 94 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 93 insertions(+), 1 deletion(-) diff --git a/hpe/dl380/drive.go b/hpe/dl380/drive.go index 0ffdd02..aa01a94 100644 --- a/hpe/dl380/drive.go +++ b/hpe/dl380/drive.go @@ -1 +1,93 @@ -// TODO \ No newline at end of file +/* + * Copyright 2024 Comcast Cable Communications Management, LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package dl380 + +// NVME's +// /redfish/v1/chassis/1/ + +// NVMeMetrics is the top level json object for DL380 NVMe Metrics Metadata +// TODO: Ensure Physical Location maps to the ServiceLabel string within PartLocation +// TODO: Ensure Status maps to the Health string within StatusNVMe +type NVMeMetrics struct { + ID string `json:"Id"` + Model string `json:"Model"` + MediaType string `json:"MediaType"` + PhysicalLocation PartLocation `json:"PhysicalLocation"` + Protocol string `json:"Protocol"` + Status StatusNVMe `json:"Status"` + FailurePredicted bool `json:"FailurePredicted"` + CapacityBytes int `json:"CapacityBytes"` +} + +// PartLocation is a variable that determines the Box and the Bay location of a part +type PartLocation struct { + ServiceLabel string `json:"ServiceLabel"` +} + +// Status/Health for the NVMe drive +type StatusNVMe struct { + Health string `json:"Health"` +} + +// Smart Array Drives +// /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ +// Loop through "Members" [] for each Array controller +// Example Member: "@odata.id": "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/0/" +// Check at the ArrayController Member level to see if there is anything in "Links":"LogicalDrives" -> If so, this is where the LOGICAL DRIVE info can be found, else, continue on to "DiskDrives" +// If a member is present in /redfish/v1/Systems/1/SmartStorage/ArrayControllers//LogicalDrives/ , LOOP through the "Members" list and follow that data link +// Example Memeber: "@odata.id": "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/3/LogicalDrives/1/" + +// (only iterate through this if the member count of is > 0.) +// Logical Drives +// TODO: Make sure Status maps to Health string in LogicalDriveStatus +type LogicalDriveMetrics struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + LogicalDriveName string `json:"LogicalDriveName"` + LogicalDriveNumber int `json:"LogicalDriveNumber"` + Name string `json:"Name"` + Raid string `json:"Raid"` + Status LogicalDriveStatus `json:"Status"` + StripeSizebytes int `json:"StripeSizebytes"` +} + +// Logical Drive Status +type LogicalDriveStatus struct { + Health string `json:"Health"` + State string `json:"Enabled"` +} + +// (Always iterate through this /DiskDrives) +// Disk Drives +// TODO: Make sure Status maps to Health string in DiskDriveStatus +type DiskDriveMetrics struct { + Id string `json:"Id"` + CapacityMiB int `json:"CapacityMiB"` + Description string `json:"Description"` + InterfaceType string `json:"InterfaceType"` + Name string `json:"Name"` + Model string `json:"Model"` + Status DiskDriveStatus `json:"Status"` +} + +// Disk Drive Status +type DiskDriveStatus struct { + Health string `json:"Health"` + State string `json:"State"` +} From 79546b7fa23ebf1e88dd3f62d7e53ddac8a29cee Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Fri, 9 Feb 2024 18:29:34 -0500 Subject: [PATCH 06/59] Update exporter.go to include drive logic handling, added todo items --- hpe/dl380/exporter.go | 804 +++++++++++++++++++++++------------------- 1 file changed, 444 insertions(+), 360 deletions(-) diff --git a/hpe/dl380/exporter.go b/hpe/dl380/exporter.go index 4933e25..13df655 100644 --- a/hpe/dl380/exporter.go +++ b/hpe/dl380/exporter.go @@ -14,363 +14,447 @@ * limitations under the License. */ - package dl380 - - import ( - "context" - "crypto/tls" - "encoding/json" - "fmt" - "net" - "net/http" - "net/url" - "strconv" - "strings" - "sync" - "time" - - "github.com/comcast/fishymetrics/common" - "github.com/comcast/fishymetrics/config" - "github.com/comcast/fishymetrics/pool" - "go.uber.org/zap" - - "github.com/hashicorp/go-retryablehttp" - "github.com/prometheus/client_golang/prometheus" - ) - - const ( - // DL380 is a HPE Hardware Device we scrape - DL380 = "DL380" - // THERMAL represents the thermal metric endpoint - THERMAL = "ThermalMetrics" - // POWER represents the power metric endpoint - POWER = "PowerMetrics" - // DRIVE represents the logical drive metric endpoints - DRIVE = "DriveMetrics" - // MEMORY represents the memory metric endpoints - MEMORY = "MemoryMetrics" - // OK is a string representation of the float 1.0 for device status - OK = 1.0 - // BAD is a string representation of the float 0.0 for device status - BAD = 0.0 - // DISABLED is a string representation of the float -1.0 for device status - DISABLED = -1.0 - ) - - var ( - log *zap.Logger - ) - - // Exporter collects chassis manager stats from the given URI and exports them using - // the prometheus metrics package. - type Exporter struct { - ctx context.Context - mutex sync.RWMutex - pool *pool.Pool - host string - - up prometheus.Gauge - deviceMetrics *map[string]*metrics - } - - // NewExporter returns an initialized Exporter for HPE DL380 device. - func NewExporter(ctx context.Context, target, uri string) *Exporter { - var fqdn *url.URL - var tasks []*pool.Task - - log = zap.L() - - tr := &http.Transport{ - Dial: (&net.Dialer{ - Timeout: 3 * time.Second, - }).Dial, - MaxIdleConns: 1, - MaxConnsPerHost: 1, - MaxIdleConnsPerHost: 1, - IdleConnTimeout: 90 * time.Second, - ExpectContinueTimeout: 1 * time.Second, - TLSClientConfig: &tls.Config{ - InsecureSkipVerify: true, - }, - TLSHandshakeTimeout: 10 * time.Second, - } - - retryClient := retryablehttp.NewClient() - retryClient.CheckRetry = retryablehttp.ErrorPropagatedRetryPolicy - retryClient.HTTPClient.Transport = tr - retryClient.HTTPClient.Timeout = 30 * time.Second - retryClient.Logger = nil - retryClient.RetryWaitMin = 2 * time.Second - retryClient.RetryWaitMax = 2 * time.Second - retryClient.RetryMax = 2 - retryClient.RequestLogHook = func(l retryablehttp.Logger, r *http.Request, i int) { - retryCount := i - if retryCount > 0 { - log.Error("api call "+r.URL.String()+" failed, retry #"+strconv.Itoa(retryCount), zap.Any("trace_id", ctx.Value("traceID"))) - } - } - - // Check that the target passed in has http:// or https:// prefixed - fqdn, err := url.ParseRequestURI(target) - if err != nil { - fqdn = &url.URL{ - Scheme: config.GetConfig().OOBScheme, - Host: target, - } - } - - // TODO: Change targets below - tasks = append(tasks, - pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Thermal", THERMAL, target, retryClient)), - pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Power", POWER, target, retryClient)), - pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/1", DRIVE, target, retryClient)), - pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1", MEMORY, target, retryClient))) - - p := pool.NewPool(tasks, 1) - - // Create new map[string]*metrics for each new Exporter - metrx := NewDeviceMetrics() - - return &Exporter{ - ctx: ctx, - pool: p, - host: fqdn.Host, - up: prometheus.NewGauge(prometheus.GaugeOpts{ - Name: "up", - Help: "Was the last scrape of chassis monitor successful.", - }), - deviceMetrics: metrx, - } - } - - // Describe describes all the metrics ever exported by the fishymetrics exporter. It - // implements prometheus.Collector. - func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { - for _, m := range *e.deviceMetrics { - for _, n := range *m { - n.Describe(ch) - } - } - ch <- e.up.Desc() - } - - // Collect fetches the stats from configured fishymetrics location and delivers them - // as Prometheus metrics. It implements prometheus.Collector. - func (e *Exporter) Collect(ch chan<- prometheus.Metric) { - e.mutex.Lock() // To protect metrics from concurrent collects. - defer e.mutex.Unlock() - - e.resetMetrics() - - // perform scrape if target is not on ignored list - if _, ok := common.IgnoredDevices[e.host]; !ok { - e.scrape() - } else { - e.up.Set(float64(2)) - } - - ch <- e.up - e.collectMetrics(ch) - } - - func (e *Exporter) resetMetrics() { - for _, m := range *e.deviceMetrics { - for _, n := range *m { - n.Reset() - } - } - } - - func (e *Exporter) collectMetrics(metrics chan<- prometheus.Metric) { - for _, m := range *e.deviceMetrics { - for _, n := range *m { - n.Collect(metrics) - } - } - } - - func (e *Exporter) scrape() { - - var result uint8 - state := uint8(1) - scrapes := len(e.pool.Tasks) - scrapeChan := make(chan uint8, scrapes) - - // Concurrently call the endpoints to help prevent reaching the maxiumum number of 4 simultaneous sessions - e.pool.Run() - for _, task := range e.pool.Tasks { - var err error - if task.Err != nil { - deviceState := uint8(0) - // If credentials are incorrect we will add host to be ignored until manual intervention - if strings.Contains(task.Err.Error(), "401") { - common.IgnoredDevices[e.host] = common.IgnoredDevice{ - Name: e.host, - Endpoint: "https://" + e.host + "/redfish/v1/Chassis", - Module: DL380, - } - log.Info("added host "+e.host+" to ignored list", zap.Any("trace_id", e.ctx.Value("traceID"))) - deviceState = 2 - } else { - deviceState = 0 - } - e.up.Set(float64(deviceState)) - log.Error("error from "+DL380, zap.Error(task.Err), zap.String("api", task.MetricType), zap.Any("trace_id", e.ctx.Value("traceID"))) - return - } - - // TODO: Make sure these align with above metric types - switch task.MetricType { - case THERMAL: - err = e.exportThermalMetrics(task.Body) - case POWER: - err = e.exportPowerMetrics(task.Body) - case DRIVE: - err = e.exportDriveMetrics(task.Body) - case MEMORY: - err = e.exportMemoryMetrics(task.Body) - } - - if err != nil { - log.Error("error exporting metrics - from "+DL380, zap.Error(err), zap.String("api", task.MetricType), zap.Any("trace_id", e.ctx.Value("traceID"))) - continue - } - scrapeChan <- 1 - } - - // Get scrape results from goroutine(s) and perform bitwise AND, any failures should - // result in a scrape failure - for i := 0; i < scrapes; i++ { - result = <-scrapeChan - state &= result - } - - e.up.Set(float64(state)) - - } - - // TODO: Modify the below PowerMetrics to fit the DL380 data - // exportPowerMetrics collects the DL380's power metrics in json format and sets the prometheus gauges - func (e *Exporter) exportPowerMetrics(body []byte) error { - - var state float64 - var pm PowerMetrics - var dlPower = (*e.deviceMetrics)["powerMetrics"] - err := json.Unmarshal(body, &pm) - if err != nil { - return fmt.Errorf("Error Unmarshalling DL380 PowerMetrics - " + err.Error()) - } - - for _, pc := range pm.PowerControl { - (*dlPower)["supplyTotalConsumed"].WithLabelValues(pc.MemberID).Set(float64(pc.PowerConsumedWatts)) - (*dlPower)["supplyTotalCapacity"].WithLabelValues(pc.MemberID).Set(float64(pc.PowerCapacityWatts)) - } - - for _, ps := range pm.PowerSupplies { - if ps.Status.State == "Enabled" { - (*dlPower)["supplyOutput"].WithLabelValues(ps.MemberID, ps.SparePartNumber).Set(float64(ps.LastPowerOutputWatts)) - if ps.Status.Health == "OK" { - state = OK - } else { - state = BAD - } - (*dlPower)["supplyStatus"].WithLabelValues(ps.MemberID, ps.SparePartNumber).Set(state) - } - } - - return nil - } - - // TODO: Modify the below ThermalMetrics to fit the DL380 data - // exportThermalMetrics collects the DL380's thermal and fan metrics in json format and sets the prometheus gauges - func (e *Exporter) exportThermalMetrics(body []byte) error { - - var state float64 - var tm ThermalMetrics - var dlThermal = (*e.deviceMetrics)["thermalMetrics"] - err := json.Unmarshal(body, &tm) - if err != nil { - return fmt.Errorf("Error Unmarshalling DL380 ThermalMetrics - " + err.Error()) - } - - // Iterate through fans - for _, fan := range tm.Fans { - // Check fan status and convert string to numeric values - if fan.Status.State == "Enabled" { - (*dlThermal)["fanSpeed"].WithLabelValues(fan.Name).Set(float64(fan.Reading)) - if fan.Status.Health == "OK" { - state = OK - } else { - state = BAD - } - (*dlThermal)["fanStatus"].WithLabelValues(fan.Name).Set(state) - } - } - - // Iterate through sensors - for _, sensor := range tm.Temperatures { - // Check sensor status and convert string to numeric values - if sensor.Status.State == "Enabled" { - (*dlThermal)["sensorTemperature"].WithLabelValues(strings.TrimRight(sensor.Name, " ")).Set(float64(sensor.ReadingCelsius)) - if sensor.Status.Health == "OK" { - state = OK - } else { - state = BAD - } - (*dlThermal)["sensorStatus"].WithLabelValues(strings.TrimRight(sensor.Name, " ")).Set(state) - } - } - - return nil - } - - // TODO: Modify the below DriveMetrics to fit the DL380 data - // exportDriveMetrics collects the DL380 drive metrics in json format and sets the prometheus gauges - func (e *Exporter) exportDriveMetrics(body []byte) error { - - var state float64 - var dld DriveMetrics - var dlDrive = (*e.deviceMetrics)["driveMetrics"] - err := json.Unmarshal(body, &dld) - if err != nil { - return fmt.Errorf("Error Unmarshalling DL380 DriveMetrics - " + err.Error()) - } - // Check logical drive is enabled then check status and convert string to numeric values - if dld.Status.State == "Enabled" { - if dld.Status.Health == "OK" { - state = OK - } else { - state = BAD - } - } else { - state = DISABLED - } - - (*dlDrive)["logicalDriveStatus"].WithLabelValues(dld.Name, strconv.Itoa(dld.LogicalDriveNumber), dld.Raid).Set(state) - - return nil - } - - // TODO: Modify the below MemoryMetrics to fit the DL380 data - // exportMemoryMetrics collects the DL380 drive metrics in json format and sets the prometheus gauges - func (e *Exporter) exportMemoryMetrics(body []byte) error { - - var state float64 - var dlm MemoryMetrics - var dlMemory = (*e.deviceMetrics)["memoryMetrics"] - err := json.Unmarshal(body, &dlm) - if err != nil { - return fmt.Errorf("Error Unmarshalling DL380 MemoryMetrics - " + err.Error()) - } - // Check memory status and convert string to numeric values - if dlm.MemorySummary.Status.HealthRollup == "OK" { - state = OK - } else { - state = BAD - } - - (*dlMemory)["memoryStatus"].WithLabelValues(strconv.Itoa(dlm.MemorySummary.TotalSystemMemoryGiB)).Set(state) - - return nil - } - \ No newline at end of file +package dl380 + +import ( + "context" + "crypto/tls" + "encoding/json" + "fmt" + "net" + "net/http" + "net/url" + "strconv" + "strings" + "sync" + "time" + + "github.com/comcast/fishymetrics/common" + "github.com/comcast/fishymetrics/config" + "github.com/comcast/fishymetrics/pool" + "go.uber.org/zap" + + "github.com/hashicorp/go-retryablehttp" + "github.com/prometheus/client_golang/prometheus" +) + +const ( + // DL380 is a HPE Hardware Device we scrape + DL380 = "DL380" + // THERMAL represents the thermal metric endpoint + THERMAL = "ThermalMetrics" + // POWER represents the power metric endpoint + POWER = "PowerMetrics" + // NVME represents the NVMe drive metric endpoint + NVME = "NVMeDriveMetrics" + // DISKDRIVE represents the Disk Drive metric endpoints + DISKDRIVE = "DiskDriveMetrics" + // LOGICALDRIVE represents the Logical drive metric endpoint + LOGICALDRIVE = "LogicalDriveMetrics" + //// DRIVE represents the logical drive metric endpoints + //DRIVE = "DriveMetrics" + // MEMORY represents the memory metric endpoints + MEMORY = "MemoryMetrics" + // OK is a string representation of the float 1.0 for device status + OK = 1.0 + // BAD is a string representation of the float 0.0 for device status + BAD = 0.0 + // DISABLED is a string representation of the float -1.0 for device status + DISABLED = -1.0 +) + +var ( + log *zap.Logger +) + +// Exporter collects chassis manager stats from the given URI and exports them using +// the prometheus metrics package. +type Exporter struct { + ctx context.Context + mutex sync.RWMutex + pool *pool.Pool + host string + + up prometheus.Gauge + deviceMetrics *map[string]*metrics +} + +// NewExporter returns an initialized Exporter for HPE DL380 device. +func NewExporter(ctx context.Context, target, uri string) *Exporter { + var fqdn *url.URL + var tasks []*pool.Task + + log = zap.L() + + tr := &http.Transport{ + Dial: (&net.Dialer{ + Timeout: 3 * time.Second, + }).Dial, + MaxIdleConns: 1, + MaxConnsPerHost: 1, + MaxIdleConnsPerHost: 1, + IdleConnTimeout: 90 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, + }, + TLSHandshakeTimeout: 10 * time.Second, + } + + retryClient := retryablehttp.NewClient() + retryClient.CheckRetry = retryablehttp.ErrorPropagatedRetryPolicy + retryClient.HTTPClient.Transport = tr + retryClient.HTTPClient.Timeout = 30 * time.Second + retryClient.Logger = nil + retryClient.RetryWaitMin = 2 * time.Second + retryClient.RetryWaitMax = 2 * time.Second + retryClient.RetryMax = 2 + retryClient.RequestLogHook = func(l retryablehttp.Logger, r *http.Request, i int) { + retryCount := i + if retryCount > 0 { + log.Error("api call "+r.URL.String()+" failed, retry #"+strconv.Itoa(retryCount), zap.Any("trace_id", ctx.Value("traceID"))) + } + } + + // Check that the target passed in has http:// or https:// prefixed + fqdn, err := url.ParseRequestURI(target) + if err != nil { + fqdn = &url.URL{ + Scheme: config.GetConfig().OOBScheme, + Host: target, + } + } + // TODO: iterate through ArrayControllers: + // List of everything passed to each common.Fetch: e.g.: (fqdn.String()+uri+"/Chassis/1/Thermal", THERMAL, target, retryClient) + // For each item in list, parse with new common.FetchIterate, returning a list of every endpoint needed + // then iterate through the list, creating pool.NewTask for each, putting in list newTasks + // then end with (?) tasks = append(tasks, newTasks) + // all logic of finding "links" and parsing whether LogicalDrives or DiskDrives has members will be handled here + + tasks = append(tasks, + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Thermal", THERMAL, target, retryClient)), + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Power", POWER, target, retryClient)), + pool.NewTask(common.Fetch(fqdn.String()+uri+"Chassis/1", NVME, target, retryClient)), + // if a logical drive, it will be in ArrayControllers/x/ -> Links -> DiskDrives/x/ + // example: /redfish/v1/Systems/1/SmartStorage/ArrayControllers/2/DiskDrives/0/ + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", DISKDRIVE, target, retryClient)), + // if a logical drive, it will be in ArrayControllers/x/ -> Links -> LogicalDrives/x/ + // example: /redfish/v1/Systems/1/SmartStorage/ArrayControllers/3/LogicalDrives/1/ + // pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/1", DRIVE, target, retryClient)), + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", LOGICALDRIVE, target, retryClient)), + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1", MEMORY, target, retryClient))) + + // tasks need to be refactored, so that tasks will need to be iterated over + // TODO: + + p := pool.NewPool(tasks, 1) + + // Create new map[string]*metrics for each new Exporter + metrx := NewDeviceMetrics() + + return &Exporter{ + ctx: ctx, + pool: p, + host: fqdn.Host, + up: prometheus.NewGauge(prometheus.GaugeOpts{ + Name: "up", + Help: "Was the last scrape of chassis monitor successful.", + }), + deviceMetrics: metrx, + } +} + +// Describe describes all the metrics ever exported by the fishymetrics exporter. It +// implements prometheus.Collector. +func (e *Exporter) Describe(ch chan<- *prometheus.Desc) { + for _, m := range *e.deviceMetrics { + for _, n := range *m { + n.Describe(ch) + } + } + ch <- e.up.Desc() +} + +// Collect fetches the stats from configured fishymetrics location and delivers them +// as Prometheus metrics. It implements prometheus.Collector. +func (e *Exporter) Collect(ch chan<- prometheus.Metric) { + e.mutex.Lock() // To protect metrics from concurrent collects. + defer e.mutex.Unlock() + + e.resetMetrics() + + // perform scrape if target is not on ignored list + if _, ok := common.IgnoredDevices[e.host]; !ok { + e.scrape() + } else { + e.up.Set(float64(2)) + } + + ch <- e.up + e.collectMetrics(ch) +} + +func (e *Exporter) resetMetrics() { + for _, m := range *e.deviceMetrics { + for _, n := range *m { + n.Reset() + } + } +} + +func (e *Exporter) collectMetrics(metrics chan<- prometheus.Metric) { + for _, m := range *e.deviceMetrics { + for _, n := range *m { + n.Collect(metrics) + } + } +} + +func (e *Exporter) scrape() { + + var result uint8 + state := uint8(1) + scrapes := len(e.pool.Tasks) + scrapeChan := make(chan uint8, scrapes) + + // Concurrently call the endpoints to help prevent reaching the maxiumum number of 4 simultaneous sessions + e.pool.Run() + for _, task := range e.pool.Tasks { + var err error + if task.Err != nil { + deviceState := uint8(0) + // If credentials are incorrect we will add host to be ignored until manual intervention + if strings.Contains(task.Err.Error(), "401") { + common.IgnoredDevices[e.host] = common.IgnoredDevice{ + Name: e.host, + Endpoint: "https://" + e.host + "/redfish/v1/Chassis", + Module: DL380, + } + log.Info("added host "+e.host+" to ignored list", zap.Any("trace_id", e.ctx.Value("traceID"))) + deviceState = 2 + } else { + deviceState = 0 + } + e.up.Set(float64(deviceState)) + log.Error("error from "+DL380, zap.Error(task.Err), zap.String("api", task.MetricType), zap.Any("trace_id", e.ctx.Value("traceID"))) + return + } + + switch task.MetricType { + case THERMAL: + err = e.exportThermalMetrics(task.Body) + case POWER: + err = e.exportPowerMetrics(task.Body) + // TODO: does the DRIVE case need to be split into NVME, DDRIVE, LDRIVE? + // case DRIVE: + // err = e.exportDriveMetrics(task.Body) + case NVME: + err = e.exportNVMeDriveMetrics(task.Body) + case DISKDRIVE: + err = e.exportDiskDriveMetrics(task.Body) + case LOGICALDRIVE: + err = e.exportLogicalDriveMetrics(task.Body) + case MEMORY: + err = e.exportMemoryMetrics(task.Body) + } + + if err != nil { + log.Error("error exporting metrics - from "+DL380, zap.Error(err), zap.String("api", task.MetricType), zap.Any("trace_id", e.ctx.Value("traceID"))) + continue + } + scrapeChan <- 1 + } + + // Get scrape results from goroutine(s) and perform bitwise AND, any failures should + // result in a scrape failure + for i := 0; i < scrapes; i++ { + result = <-scrapeChan + state &= result + } + + e.up.Set(float64(state)) + +} + +// TODO: Modify the below PowerMetrics to fit the DL380 data +// exportPowerMetrics collects the DL380's power metrics in json format and sets the prometheus gauges +func (e *Exporter) exportPowerMetrics(body []byte) error { + + var state float64 + var pm PowerMetrics + var dlPower = (*e.deviceMetrics)["powerMetrics"] + err := json.Unmarshal(body, &pm) + if err != nil { + return fmt.Errorf("Error Unmarshalling DL380 PowerMetrics - " + err.Error()) + } + + for _, pc := range pm.PowerControl { + (*dlPower)["supplyTotalConsumed"].WithLabelValues(pc.MemberID).Set(float64(pc.PowerConsumedWatts)) + (*dlPower)["supplyTotalCapacity"].WithLabelValues(pc.MemberID).Set(float64(pc.PowerCapacityWatts)) + } + + for _, ps := range pm.PowerSupplies { + if ps.Status.State == "Enabled" { + (*dlPower)["supplyOutput"].WithLabelValues(ps.MemberID, ps.SparePartNumber).Set(float64(ps.LastPowerOutputWatts)) + if ps.Status.Health == "OK" { + state = OK + } else { + state = BAD + } + (*dlPower)["supplyStatus"].WithLabelValues(ps.MemberID, ps.SparePartNumber).Set(state) + } + } + + return nil +} + +// exportThermalMetrics collects the DL380's thermal and fan metrics in json format and sets the prometheus gauges +func (e *Exporter) exportThermalMetrics(body []byte) error { + + var state float64 + var tm ThermalMetrics + var dlThermal = (*e.deviceMetrics)["thermalMetrics"] + err := json.Unmarshal(body, &tm) + if err != nil { + return fmt.Errorf("Error Unmarshalling DL380 ThermalMetrics - " + err.Error()) + } + + // Iterate through fans + for _, fan := range tm.Fans { + // Check fan status and convert string to numeric values + if fan.Status.State == "Enabled" { + (*dlThermal)["fanSpeed"].WithLabelValues(fan.Name).Set(float64(fan.Reading)) + if fan.Status.Health == "OK" { + state = OK + } else { + state = BAD + } + (*dlThermal)["fanStatus"].WithLabelValues(fan.Name).Set(state) + } + } + + // Iterate through sensors + for _, sensor := range tm.Temperatures { + // Check sensor status and convert string to numeric values + if sensor.Status.State == "Enabled" { + (*dlThermal)["sensorTemperature"].WithLabelValues(strings.TrimRight(sensor.Name, " ")).Set(float64(sensor.ReadingCelsius)) + if sensor.Status.Health == "OK" { + state = OK + } else { + state = BAD + } + (*dlThermal)["sensorStatus"].WithLabelValues(strings.TrimRight(sensor.Name, " ")).Set(state) + } + } + + return nil +} + +// exportNVMeDriveMetrics collects the DL380 NVME drive metrics in json format and sets the prometheus gauges +func (e *Exporter) exportNVMeDriveMetrics(body []byte) error { + + var state float64 + var dlnvme NVMeDriveMetrics + var dlnvmedrive = (*e.deviceMetrics)["nvmeDriveMetrics"] + err := json.Unmarshal(body, &dlnvme) + if err != nil { + return fmt.Errorf("Error Unmarshalling DL380 NVMeDriveMetrics - " + err.Error()) + } + // Check logical drive is enabled then check status and convert string to numeric values + if dlnvme.Status.State == "Enabled" { + if dlnvme.Status.Health == "OK" { + state = OK + } else { + state = BAD + } + } else { + state = DISABLED + } + + (*dlnvmedrive)["nvmeDriveStatus"].WithLabelValues(dlnvme.Protocol, dlnvme.ID, dlnvme.Name).Set(state) + + return nil +} + +// exportDriveMetrics collects the DL380 drive metrics in json format and sets the prometheus gauges +func (e *Exporter) exportDiskDriveMetrics(body []byte) error { + + var state float64 + var dd DiskDriveMetrics + var dDrive = (*e.deviceMetrics)["diskDriveMetrics"] + err := json.Unmarshal(body, &dd) + if err != nil { + return fmt.Errorf("Error Unmarshalling DL380 DiskDriveMetrics - " + err.Error()) + } + + // Check disk drive is enabled then check status and convert string to numeric values + if dd.Status.State == "Enabled" { + if dd.Status.Health == "OK" { + state = OK + } else { + state = BAD + } + } else { + state = DISABLED + } + + // Check "disk drive" if it is actually a logical drive + if dd.LogicalDriveName == "" { + // if drive is actually a logical drive, then skip it, otherwise, add metrics. + (*dDrive)["diskDriveStatus"].WithLabelValues(dd.Name, strconv.Itoa(dd.CapacityMiB)).Set(state) + } + + return nil +} + +// exportDriveMetrics collects the DL380 logicaldrive metrics in json format and sets the prometheus gauges +func (e *Exporter) exportLogicalDriveMetrics(body []byte) error { + + var state float64 + var dld LogicalDriveMetrics + var dlDrive = (*e.deviceMetrics)["logicalDriveMetrics"] + err := json.Unmarshal(body, &dld) + if err != nil { + return fmt.Errorf("Error Unmarshalling DL380 DriveMetrics - " + err.Error()) + } + // Check logical drive is enabled then check status and convert string to numeric values + if dld.Status.State == "Enabled" { + if dld.Status.Health == "OK" { + state = OK + } else { + state = BAD + } + } else { + state = DISABLED + } + // Check if drive is actually a logical drive + if dld.LogicalDriveName != "" { + // if drive is actually a logical drive, then add metrics. + (*dlDrive)["logicalDriveStatus"].WithLabelValues(dld.Name, strconv.Itoa(dld.LogicalDriveNumber), dld.Raid).Set(state) + } + + return nil +} + +// TODO: Modify the below MemoryMetrics to fit the DL380 data +// exportMemoryMetrics collects the DL380 drive metrics in json format and sets the prometheus gauges +func (e *Exporter) exportMemoryMetrics(body []byte) error { + + var state float64 + var dlm MemoryMetrics + var dlMemory = (*e.deviceMetrics)["memoryMetrics"] + err := json.Unmarshal(body, &dlm) + if err != nil { + return fmt.Errorf("Error Unmarshalling DL380 MemoryMetrics - " + err.Error()) + } + // Check memory status and convert string to numeric values + if dlm.MemorySummary.Status.HealthRollup == "OK" { + state = OK + } else { + state = BAD + } + + (*dlMemory)["memoryStatus"].WithLabelValues(strconv.Itoa(dlm.MemorySummary.TotalSystemMemoryGiB)).Set(state) + + return nil +} From 7e9fc41118f99eff91011806f38b35a1634de40c Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Fri, 9 Feb 2024 18:30:20 -0500 Subject: [PATCH 07/59] Update util.go to include todo tasks to add FetchIterate function --- common/util.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/common/util.go b/common/util.go index cea987d..07cc57e 100644 --- a/common/util.go +++ b/common/util.go @@ -70,6 +70,13 @@ type AaaLogoutPayload struct { InCookie string `xml:"inCookie,attr"` } +// TODO: Create a FetchIterate function to further iterate through to get drive endpoints: +// if no sublinks, return list of length 1 +// fetch_iterate would import Fetch +// input is everything that would go to fetch +// output is a [] of fetch returns +// + func Fetch(uri, metricType, host string, client *retryablehttp.Client) func() ([]byte, string, error) { var resp *http.Response var credential *Credential From 51f1bc1bc448e947548c6629183ca5b7e7b4d42e Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Fri, 9 Feb 2024 18:31:06 -0500 Subject: [PATCH 08/59] Update drive.go to include additional metrics --- hpe/dl380/drive.go | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/hpe/dl380/drive.go b/hpe/dl380/drive.go index aa01a94..7405bba 100644 --- a/hpe/dl380/drive.go +++ b/hpe/dl380/drive.go @@ -22,25 +22,39 @@ package dl380 // NVMeMetrics is the top level json object for DL380 NVMe Metrics Metadata // TODO: Ensure Physical Location maps to the ServiceLabel string within PartLocation // TODO: Ensure Status maps to the Health string within StatusNVMe -type NVMeMetrics struct { - ID string `json:"Id"` - Model string `json:"Model"` - MediaType string `json:"MediaType"` - PhysicalLocation PartLocation `json:"PhysicalLocation"` - Protocol string `json:"Protocol"` - Status StatusNVMe `json:"Status"` - FailurePredicted bool `json:"FailurePredicted"` - CapacityBytes int `json:"CapacityBytes"` +type NVMeDriveMetrics struct { + ID string `json:"Id"` + Model string `json:"Model"` + Name string `json:"Name"` + MediaType string `json:"MediaType"` + PhysicalLocation PartLocation `json:"PhysicalLocation"` + Protocol string `json:"Protocol"` + Status nvmeDriveStatus `json:"Status"` + FailurePredicted bool `json:"FailurePredicted"` + CapacityBytes int `json:"CapacityBytes"` } -// PartLocation is a variable that determines the Box and the Bay location of a part +// PartLocation is a variable that determines the Box and the Bay location of the NVMe drive type PartLocation struct { ServiceLabel string `json:"ServiceLabel"` } +// Contents of Oem +type Oem struct { + Hpe HpeCont `json:"Hpe"` + NVMeID string `json:"NVMeId"` +} + +// Contents of Hpe +type HpeCont struct { + CurrentTemperatureCelsius int `json:"CurrentTemperatureCelsius"` + DriveStatus nvmeDriveStatus `json:"nvmeDriveStatus"` +} + // Status/Health for the NVMe drive -type StatusNVMe struct { +type nvmeDriveStatus struct { Health string `json:"Health"` + State string `json:"State"` } // Smart Array Drives @@ -84,6 +98,8 @@ type DiskDriveMetrics struct { Name string `json:"Name"` Model string `json:"Model"` Status DiskDriveStatus `json:"Status"` + // Check for logical drive, if disk drive, should return nothing. + LogicalDriveName string `json:"LogicalDriveName,omitempty"` } // Disk Drive Status From cdb95b1b65c1ddebfb67e78fcffbf387bf140cc2 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Fri, 9 Feb 2024 18:31:48 -0500 Subject: [PATCH 09/59] Update metrics.go to handle various drive metrics --- hpe/dl380/metrics.go | 121 ++++++++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 53 deletions(-) diff --git a/hpe/dl380/metrics.go b/hpe/dl380/metrics.go index 5fbff6b..8726b60 100644 --- a/hpe/dl380/metrics.go +++ b/hpe/dl380/metrics.go @@ -16,57 +16,72 @@ * limitations under the License. */ - package dl380 +package dl380 - import ( - "github.com/prometheus/client_golang/prometheus" - ) - - type metrics map[string]*prometheus.GaugeVec - - func newServerMetric(metricName string, docString string, constLabels prometheus.Labels, labelNames []string) *prometheus.GaugeVec { - return prometheus.NewGaugeVec( - prometheus.GaugeOpts{ - Name: metricName, - Help: docString, - ConstLabels: constLabels, - }, - labelNames, - ) - } - - func NewDeviceMetrics() *map[string]*metrics { - var ( - ThermalMetrics = &metrics{ - "fanSpeed": newServerMetric("dl380_thermal_fan_speed", "Current fan speed in the unit of percentage, possible values are 0 - 100", nil, []string{"name"}), - "fanStatus": newServerMetric("dl380_thermal_fan_status", "Current fan status 1 = OK, 0 = BAD", nil, []string{"name"}), - "sensorTemperature": newServerMetric("dl380_thermal_sensor_temperature", "Current sensor temperature reading in Celsius", nil, []string{"name"}), - "sensorStatus": newServerMetric("dl380_thermal_sensor_status", "Current sensor status 1 = OK, 0 = BAD", nil, []string{"name"}), - } - - PowerMetrics = &metrics{ - "supplyOutput": newServerMetric("dl380_power_supply_output", "Power supply output in watts", nil, []string{"memberId", "sparePartNumber"}), - "supplyStatus": newServerMetric("dl380_power_supply_status", "Current power supply status 1 = OK, 0 = BAD", nil, []string{"memberId", "sparePartNumber"}), - "supplyTotalConsumed": newServerMetric("dl380_power_supply_total_consumed", "Total output of all power supplies in watts", nil, []string{"memberId"}), - "supplyTotalCapacity": newServerMetric("dl380_power_supply_total_capacity", "Total output capacity of all the power supplies", nil, []string{"memberId"}), - } - - DriveMetrics = &metrics{ - "logicalDriveStatus": newServerMetric("dl380_logical_drive_status", "Current logical drive status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"name", "logicalDriveNumber", "raid"}), - } - - MemoryMetrics = &metrics{ - "memoryStatus": newServerMetric("dl380_memory_status", "Current memory status 1 = OK, 0 = BAD", nil, []string{"totalSystemMemoryGiB"}), - } - - Metrics = &map[string]*metrics{ - "thermalMetrics": ThermalMetrics, - "powerMetrics": PowerMetrics, - "driveMetrics": DriveMetrics, - "memoryMetrics": MemoryMetrics, - } - ) - - return Metrics - } - \ No newline at end of file +import ( + "github.com/prometheus/client_golang/prometheus" +) + +type metrics map[string]*prometheus.GaugeVec + +func newServerMetric(metricName string, docString string, constLabels prometheus.Labels, labelNames []string) *prometheus.GaugeVec { + return prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: metricName, + Help: docString, + ConstLabels: constLabels, + }, + labelNames, + ) +} + +func NewDeviceMetrics() *map[string]*metrics { + var ( + ThermalMetrics = &metrics{ + "fanSpeed": newServerMetric("dl380_thermal_fan_speed", "Current fan speed in the unit of percentage, possible values are 0 - 100", nil, []string{"name"}), + "fanStatus": newServerMetric("dl380_thermal_fan_status", "Current fan status 1 = OK, 0 = BAD", nil, []string{"name"}), + "sensorTemperature": newServerMetric("dl380_thermal_sensor_temperature", "Current sensor temperature reading in Celsius", nil, []string{"name"}), + "sensorStatus": newServerMetric("dl380_thermal_sensor_status", "Current sensor status 1 = OK, 0 = BAD", nil, []string{"name"}), + } + + PowerMetrics = &metrics{ + "supplyOutput": newServerMetric("dl380_power_supply_output", "Power supply output in watts", nil, []string{"memberId", "sparePartNumber"}), + "supplyStatus": newServerMetric("dl380_power_supply_status", "Current power supply status 1 = OK, 0 = BAD", nil, []string{"memberId", "sparePartNumber"}), + "supplyTotalConsumed": newServerMetric("dl380_power_supply_total_consumed", "Total output of all power supplies in watts", nil, []string{"memberId"}), + "supplyTotalCapacity": newServerMetric("dl380_power_supply_total_capacity", "Total output capacity of all the power supplies", nil, []string{"memberId"}), + } + //TODO: Ensure casing of vars passed in the string are valid + // Splitting out the three different types of drives to gather metrics on each (NVMe, Disk Drive, and Logical Drive) + NVMeDriveMetrics = &metrics{ + "nvmeDriveStatus": newServerMetric("dl380_nvme_drive_status", "Current NVME status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"nvmeDriveStatus"}), // StatusNVMe values? + } + //TODO: Ensure casing of vars passed in the string are valid + DiskDriveMetrics = &metrics{ + "diskDriveStatus": newServerMetric("dl380_disk_drive_status", "Current Disk Drive status 1 = OK, 0 = BAD", nil, []string{"diskDriveStatus"}), // DiskDriveStatus values? + } + //TODO: Ensure casing of vars passed in the string are valid + LogicalDriveMetrics = &metrics{ + "logicalDriveStatus": newServerMetric("dl380_logical_drive_status", "Current Logical Drive Status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"name", "logicalDriveNumber", "raid"}), // LogicalDriveMetrics values? + } + + // DriveMetrics = &metrics{ + // "logicalDriveStatus": newServerMetric("dl380_logical_drive_status", "Current logical drive status 1 = OK, 0 = BAD, -1 = DISABLED", nil, []string{"name", "logicalDriveNumber", "raid"}), + // } + + MemoryMetrics = &metrics{ + "memoryStatus": newServerMetric("dl380_memory_status", "Current memory status 1 = OK, 0 = BAD", nil, []string{"totalSystemMemoryGiB"}), + } + + Metrics = &map[string]*metrics{ + "thermalMetrics": ThermalMetrics, + "powerMetrics": PowerMetrics, + "nvmeMetrics": NVMeDriveMetrics, + "diskDriveMetrics": DiskDriveMetrics, + "logicalDriveMetrics": LogicalDriveMetrics, + //"driveMetrics": DriveMetrics, + "memoryMetrics": MemoryMetrics, + } + ) + + return Metrics +} From 45ae7777e06a613dce8e8113ac8acd98939cb0ca Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Sun, 11 Feb 2024 11:44:33 -0500 Subject: [PATCH 10/59] Update main.go to include DL380 import --- cmd/fishymetrics/main.go | 1 + 1 file changed, 1 insertion(+) diff --git a/cmd/fishymetrics/main.go b/cmd/fishymetrics/main.go index bc46be9..3a6b5ee 100644 --- a/cmd/fishymetrics/main.go +++ b/cmd/fishymetrics/main.go @@ -40,6 +40,7 @@ import ( "github.com/comcast/fishymetrics/config" "github.com/comcast/fishymetrics/hpe/dl20" "github.com/comcast/fishymetrics/hpe/dl360" + "github.com/comcast/fishymetrics/hpe/dl380" "github.com/comcast/fishymetrics/hpe/moonshot" "github.com/comcast/fishymetrics/logger" "github.com/comcast/fishymetrics/middleware/muxprom" From 999db6056824ffdeafbab186d6d417b278cec3f7 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Sun, 11 Feb 2024 12:15:04 -0500 Subject: [PATCH 11/59] Update util.go to include FetchIterate logic and todo --- common/util.go | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/common/util.go b/common/util.go index 07cc57e..d105572 100644 --- a/common/util.go +++ b/common/util.go @@ -77,6 +77,92 @@ type AaaLogoutPayload struct { // output is a [] of fetch returns // +func FetchIterate(uri, metricType, host string, client *retryablehttp.Client) func() ([]byte, string, error) { + var resp *http.Response + var credential *Credential + var err error + retryCount := 0 + endpoints := []string{uri} // Store the initial endpoint + output := [][]byte{} // Output list of fetch returns + + return func() ([]byte, string, error) { + // Add a 100 milliseconds delay in between requests because cisco devices respond in a non idiomatic manner + time.Sleep(100 * time.Millisecond) + + // Check if there are any endpoints left to iterate through + if len(endpoints) == 0 { + if len(output) == 0 { + return nil, metricType, fmt.Errorf("no more endpoints to iterate through") + } + return output[0], metricType, nil + } + + // Get the next endpoint from the list + nextEndpoint := endpoints[0] + endpoints = endpoints[1:] + + req := BuildRequest(nextEndpoint, host) + resp, err = DoRequest(client, req) + if err != nil { + return nil, metricType, err + } + defer resp.Body.Close() + if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { + if resp.StatusCode == http.StatusNotFound { + for retryCount < 3 && resp.StatusCode == http.StatusNotFound { + time.Sleep(client.RetryWaitMin) + resp, err = DoRequest(client, req) + retryCount = retryCount + 1 + } + if err != nil { + return nil, metricType, err + } else if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { + return nil, metricType, fmt.Errorf("HTTP status %d", resp.StatusCode) + } + } else if resp.StatusCode == http.StatusUnauthorized { + // Credentials may have rotated, go to vault and get the latest + credential, err = ChassisCreds.GetCredentials(context.Background(), host) + if err != nil { + return nil, metricType, fmt.Errorf("issue retrieving credentials from vault using target: %s", host) + } + ChassisCreds.Set(host, credential) + + // build new request with updated credentials + req = BuildRequest(nextEndpoint, host) + + time.Sleep(client.RetryWaitMin) + resp, err = DoRequest(client, req) + if err != nil { + return nil, metricType, fmt.Errorf("Retry DoRequest failed - " + err.Error()) + } + if resp.StatusCode == http.StatusUnauthorized { + return nil, metricType, fmt.Errorf("HTTP status %d", resp.StatusCode) + } + } else { + return nil, metricType, fmt.Errorf("HTTP status %d", resp.StatusCode) + } + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, metricType, fmt.Errorf("Error reading Response Body - " + err.Error()) + } + + // Parse the body to extract any additional endpoints + var endpointsFromResponse []string + // TODO: Parse the body to extract the endpoints and store them in the endpointsFromResponse slice + + // Append the new endpoints to the existing list + endpoints = append(endpoints, endpointsFromResponse...) + + // Add the body to the output list + output = append(output, body) + + // Recursively call the FetchIterate function to iterate through the remaining endpoints + return FetchIterate(endpoints[0], metricType, host, client)() + } +} + func Fetch(uri, metricType, host string, client *retryablehttp.Client) func() ([]byte, string, error) { var resp *http.Response var credential *Credential From 423c672541b85f523de73a95a5d4347f4eeccb89 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Mon, 12 Feb 2024 10:24:24 -0500 Subject: [PATCH 12/59] Update drive.go to include Collection struct for urls within the arraycontrollers endpoint --- hpe/dl380/drive.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hpe/dl380/drive.go b/hpe/dl380/drive.go index 7405bba..5360686 100644 --- a/hpe/dl380/drive.go +++ b/hpe/dl380/drive.go @@ -107,3 +107,11 @@ type DiskDriveStatus struct { Health string `json:"Health"` State string `json:"State"` } + +// Collection returns an array of the endpoints from the /ArrayControllers endpoint +type Collection struct { + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members"` + MembersCount int `json:"Members@odata.count"` +} From f1ced8d745cd240fd2935e28d74d277bffbd7d02 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Mon, 12 Feb 2024 10:36:32 -0500 Subject: [PATCH 13/59] Update exporter.go to include getArrayControllerEndpoint func --- hpe/dl380/exporter.go | 54 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 52 insertions(+), 2 deletions(-) diff --git a/hpe/dl380/exporter.go b/hpe/dl380/exporter.go index 13df655..527eaeb 100644 --- a/hpe/dl380/exporter.go +++ b/hpe/dl380/exporter.go @@ -21,6 +21,7 @@ import ( "crypto/tls" "encoding/json" "fmt" + "io/ioutil" "net" "net/http" "net/url" @@ -135,15 +136,22 @@ func NewExporter(ctx context.Context, target, uri string) *Exporter { pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Thermal", THERMAL, target, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Power", POWER, target, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"Chassis/1", NVME, target, retryClient)), + // // if a logical drive, it will be in ArrayControllers/x/ -> Links -> DiskDrives/x/ // example: /redfish/v1/Systems/1/SmartStorage/ArrayControllers/2/DiskDrives/0/ pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", DISKDRIVE, target, retryClient)), // if a logical drive, it will be in ArrayControllers/x/ -> Links -> LogicalDrives/x/ - // example: /redfish/v1/Systems/1/SmartStorage/ArrayControllers/3/LogicalDrives/1/ - // pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers/0/LogicalDrives/1", DRIVE, target, retryClient)), + // example: /redfish/v1/Systems/1/SmartStorage/ArrayControllers/3/DiskDrives/1/ + // pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", LOGICALDRIVE, target, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1", MEMORY, target, retryClient))) + // Loop through Members in ArrayControllers. Further loop through each of those to find anything in the /LogicalDrives or /DiskDrives enpoints + for _, ac := range arrayControllers.Members { + tasks = append(tasks, + pool.NewTask(common.Fetch(fqdn.String()+uri+"/", DISKDRIVE, target, retryClient))) + } + // tasks need to be refactored, so that tasks will need to be iterated over // TODO: @@ -458,3 +466,45 @@ func (e *Exporter) exportMemoryMetrics(body []byte) error { return nil } + +func getArrayControllerEndpoint(url, host string, client *retryablehttp.Client) (Collection, error) { + var ac Collection + var resp *http.Response + var err error + retryCount := 0 + req := common.BuildRequest(url, host) + + resp, err = common.DoRequest(client, req) + if err != nil { + return ac, err + } + defer resp.Body.Close() + if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { + if resp.StatusCode == http.StatusNotFound { + for retryCount < 3 && resp.StatusCode == http.StatusNotFound { + time.Sleep(client.RetryWaitMin) + resp, err = common.DoRequest(client, req) + retryCount = retryCount + 1 + } + if err != nil { + return ac, err + } else if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { + return ac, fmt.Errorf("Http status %d", resp.StatusCode) + } + } else { + return ac, fmt.Errorf("Http status %d", resp.StatusCode) + } + } + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return ac, fmt.Errorf("Error reading Response Body - ", + err.Error()) + } + + err = json.Unmarshal(body, &ac) + if err != nil { + return ac, fmt.Errorf("Error Unmarshalling HPE DL380 ArrayController struct - " + err.Error()) + } + + return ac, nil + +} From 1a14e486e0b3aba03d67c4e103402e857d85e255 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Mon, 12 Feb 2024 13:38:08 -0500 Subject: [PATCH 14/59] Add arrayControllerIterate function to DL380 exporter.go --- hpe/dl380/exporter.go | 59 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/hpe/dl380/exporter.go b/hpe/dl380/exporter.go index 527eaeb..939b349 100644 --- a/hpe/dl380/exporter.go +++ b/hpe/dl380/exporter.go @@ -21,7 +21,7 @@ import ( "crypto/tls" "encoding/json" "fmt" - "io/ioutil" + "io" "net" "net/http" "net/url" @@ -84,6 +84,8 @@ type Exporter struct { func NewExporter(ctx context.Context, target, uri string) *Exporter { var fqdn *url.URL var tasks []*pool.Task + // controller is used when looping through ArrayControllers endpoint and appended to the endpoint for further looping + var controller string log = zap.L() @@ -125,6 +127,18 @@ func NewExporter(ctx context.Context, target, uri string) *Exporter { Host: target, } } + + // ArrayControllers endpoint used for DiskDrives and LogicalDrives scrapes + arrayControllersEndpoints, err := getArrayControllerEndpoint(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", target, retryClient) + if err != nil { + log.Error("error when getting array controllers endpoint from "+DL380, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } + + if len(arrayControllersEndpoints.Members) > 0 { + controller = arrayControllersEndpoints.Links. + } + // TODO: iterate through ArrayControllers: // List of everything passed to each common.Fetch: e.g.: (fqdn.String()+uri+"/Chassis/1/Thermal", THERMAL, target, retryClient) // For each item in list, parse with new common.FetchIterate, returning a list of every endpoint needed @@ -147,7 +161,7 @@ func NewExporter(ctx context.Context, target, uri string) *Exporter { pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1", MEMORY, target, retryClient))) // Loop through Members in ArrayControllers. Further loop through each of those to find anything in the /LogicalDrives or /DiskDrives enpoints - for _, ac := range arrayControllers.Members { + for _, controller := range ac.Members { tasks = append(tasks, pool.NewTask(common.Fetch(fqdn.String()+uri+"/", DISKDRIVE, target, retryClient))) } @@ -253,9 +267,6 @@ func (e *Exporter) scrape() { err = e.exportThermalMetrics(task.Body) case POWER: err = e.exportPowerMetrics(task.Body) - // TODO: does the DRIVE case need to be split into NVME, DDRIVE, LDRIVE? - // case DRIVE: - // err = e.exportDriveMetrics(task.Body) case NVME: err = e.exportNVMeDriveMetrics(task.Body) case DISKDRIVE: @@ -423,7 +434,7 @@ func (e *Exporter) exportLogicalDriveMetrics(body []byte) error { var dlDrive = (*e.deviceMetrics)["logicalDriveMetrics"] err := json.Unmarshal(body, &dld) if err != nil { - return fmt.Errorf("Error Unmarshalling DL380 DriveMetrics - " + err.Error()) + return fmt.Errorf("Error Unmarshalling DL380 logicalDriveMetrics - " + err.Error()) } // Check logical drive is enabled then check status and convert string to numeric values if dld.Status.State == "Enabled" { @@ -467,6 +478,7 @@ func (e *Exporter) exportMemoryMetrics(body []byte) error { return nil } +// getArrayControllerEndpoint collects the DL380 ArrayController members and adds them to the Collection for further looping func getArrayControllerEndpoint(url, host string, client *retryablehttp.Client) (Collection, error) { var ac Collection var resp *http.Response @@ -495,9 +507,9 @@ func getArrayControllerEndpoint(url, host string, client *retryablehttp.Client) return ac, fmt.Errorf("Http status %d", resp.StatusCode) } } - body, err := ioutil.ReadAll(resp.Body) + body, err := io.ReadAll(resp.Body) if err != nil { - return ac, fmt.Errorf("Error reading Response Body - ", + err.Error()) + return ac, fmt.Errorf("Error reading Response Body - " + err.Error()) } err = json.Unmarshal(body, &ac) @@ -508,3 +520,34 @@ func getArrayControllerEndpoint(url, host string, client *retryablehttp.Client) return ac, nil } + +// arrayControllerIterate loops through members in the arrayController, iterates through the "Links" of each, +// and exports the metrics if found in LogicalDrives or PhysicalDrives endpoints. + +func (e *Exporter) arrayControllerIterate(ac Collection) { + for _, member := range ac.Members { + for _, link := range member.Links { + if link.Rel == "LogicalDrives" { + logicalDrives, err := getLogicalDrivesEndpoint(link.Href, e.host, e.pool.Client) + if err != nil { + log.Error("Error getting LogicalDrives endpoint", zap.Error(err)) + continue + } + e.exportLogicalDriveMetrics(logicalDrives) + + } else if link.Rel == "PhysicalDrives" { + physicalDrives, err := getPhysicalDrivesEndpoint(link.Href, e.host, e.pool.Client) + if err != nil { + log.Error("Error getting PhysicalDrives endpoint", zap.Error(err)) + continue + } + e.exportPhysicalDriveMetrics(physicalDrives) + } + } + } +} + +// TODO write getLogicalDrivesEndpoint function + + +// TODO write getPhysicalDrivesEndpoint function From bd50cff22f86cebc8774d3ea037d25e90ccba173 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Tue, 13 Feb 2024 10:51:58 -0500 Subject: [PATCH 15/59] Update drive.go to include ArrayControllerObject and Links structures --- hpe/dl380/drive.go | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/hpe/dl380/drive.go b/hpe/dl380/drive.go index 5360686..c4c0d8c 100644 --- a/hpe/dl380/drive.go +++ b/hpe/dl380/drive.go @@ -115,3 +115,20 @@ type Collection struct { } `json:"Members"` MembersCount int `json:"Members@odata.count"` } + +// Main ArrayController JSON object + +type ArrayControllerObject struct { + Links Links `json:"Links"` + Model string `json:"Model"` +} + +// Main ArrayController JSON object Links +type Links struct { + LogicalDrives struct { + URL string `json:"@odata.id"` + } + PhysicalDrives struct { + URL string `json:"@odata.id"` + } +} From 1ddabddc0046ac97886a67172e4aad84d2f00f85 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Tue, 13 Feb 2024 16:56:23 -0500 Subject: [PATCH 16/59] Update drive.go to include nested array controller structs --- hpe/dl380/drive.go | 72 ++++++++++++++++++++++++++++++---------------- 1 file changed, 47 insertions(+), 25 deletions(-) diff --git a/hpe/dl380/drive.go b/hpe/dl380/drive.go index c4c0d8c..299c508 100644 --- a/hpe/dl380/drive.go +++ b/hpe/dl380/drive.go @@ -57,15 +57,6 @@ type nvmeDriveStatus struct { State string `json:"State"` } -// Smart Array Drives -// /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ -// Loop through "Members" [] for each Array controller -// Example Member: "@odata.id": "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/0/" -// Check at the ArrayController Member level to see if there is anything in "Links":"LogicalDrives" -> If so, this is where the LOGICAL DRIVE info can be found, else, continue on to "DiskDrives" -// If a member is present in /redfish/v1/Systems/1/SmartStorage/ArrayControllers//LogicalDrives/ , LOOP through the "Members" list and follow that data link -// Example Memeber: "@odata.id": "/redfish/v1/Systems/1/SmartStorage/ArrayControllers/3/LogicalDrives/1/" - -// (only iterate through this if the member count of is > 0.) // Logical Drives // TODO: Make sure Status maps to Health string in LogicalDriveStatus type LogicalDriveMetrics struct { @@ -108,27 +99,58 @@ type DiskDriveStatus struct { State string `json:"State"` } -// Collection returns an array of the endpoints from the /ArrayControllers endpoint -type Collection struct { - Members []struct { - URL string `json:"@odata.id"` - } `json:"Members"` - MembersCount int `json:"Members@odata.count"` +// ArrayController +type ArrayController struct { + Members Members `json:"Members"` + MembersCount int `json:"@odata.count"` } -// Main ArrayController JSON object +// ArrayController Members +type Members struct { + URL string `json:"@odata.id"` +} + +type Controller struct { + Links Links `json:"Links"` +} -type ArrayControllerObject struct { - Links Links `json:"Links"` - Model string `json:"Model"` +// ArrayController LinksInMembers +type LinksInMembers struct { + Links Links `json:"Links"` } -// Main ArrayController JSON object Links +// ArrayController Links type Links struct { - LogicalDrives struct { - URL string `json:"@odata.id"` - } - PhysicalDrives struct { + LogicalDrives driveURL `json:"LogicalDrives"` + DiskDrives driveURL `json:"DiskDrives"` +} + +// URL string from within Logical Drives or Disk Drives +type driveURL struct { + URL string `json:"@odata.id"` +} + +// // Main ArrayController JSON object + +// type ArrayControllerObject struct { +// Links Links `json:"Links"` +// Model string `json:"Model"` +// } + +// // Main ArrayController JSON object Links +// type Links struct { +// LogicalDrives struct { +// URL string `json:"@odata.id"` +// } +// PhysicalDrives struct { +// URL string `json:"@odata.id"` +// } +// } + +// Collection returns an array of the endpoints from the /ArrayControllers endpoint +type Collection struct { + Members []struct { URL string `json:"@odata.id"` - } + } `json:"Members"` + MembersCount int `json:"Members@odata.count"` } From f31d17cb6aef4a5d2c6207a003e398897008d64c Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Tue, 13 Feb 2024 18:13:07 -0500 Subject: [PATCH 17/59] Update exporter.go to include looping logic for ArrayController drives --- hpe/dl380/exporter.go | 129 ++++++++++++++++++++++++------------------ 1 file changed, 75 insertions(+), 54 deletions(-) diff --git a/hpe/dl380/exporter.go b/hpe/dl380/exporter.go index 939b349..49a8f0d 100644 --- a/hpe/dl380/exporter.go +++ b/hpe/dl380/exporter.go @@ -52,8 +52,8 @@ const ( DISKDRIVE = "DiskDriveMetrics" // LOGICALDRIVE represents the Logical drive metric endpoint LOGICALDRIVE = "LogicalDriveMetrics" - //// DRIVE represents the logical drive metric endpoints - //DRIVE = "DriveMetrics" + // ARRAYCONTROLLER represents the Array Controller metric endpoint + ARRAYCONTROLLER = "ArrayControllerMetrics" // MEMORY represents the memory metric endpoints MEMORY = "MemoryMetrics" // OK is a string representation of the float 1.0 for device status @@ -85,7 +85,6 @@ func NewExporter(ctx context.Context, target, uri string) *Exporter { var fqdn *url.URL var tasks []*pool.Task // controller is used when looping through ArrayControllers endpoint and appended to the endpoint for further looping - var controller string log = zap.L() @@ -128,47 +127,29 @@ func NewExporter(ctx context.Context, target, uri string) *Exporter { } } - // ArrayControllers endpoint used for DiskDrives and LogicalDrives scrapes - arrayControllersEndpoints, err := getArrayControllerEndpoint(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", target, retryClient) + // ArrayControllers endpoint array for disk discovery + ac, err := getArrayControllerEndpoint(fqdn.String()+uri+"/SmartStorage/ArrayControllers", target, retryClient) if err != nil { - log.Error("error when getting array controllers endpoint from "+DL380, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - return nil, err + log.Error("error when getting ArrayControllers endpoint from "+DL380, zap.Error(err), zap.Any("trace_id", ctx.Value("TraceID"))) } - if len(arrayControllersEndpoints.Members) > 0 { - controller = arrayControllersEndpoints.Links. - } - - // TODO: iterate through ArrayControllers: - // List of everything passed to each common.Fetch: e.g.: (fqdn.String()+uri+"/Chassis/1/Thermal", THERMAL, target, retryClient) - // For each item in list, parse with new common.FetchIterate, returning a list of every endpoint needed - // then iterate through the list, creating pool.NewTask for each, putting in list newTasks - // then end with (?) tasks = append(tasks, newTasks) - // all logic of finding "links" and parsing whether LogicalDrives or DiskDrives has members will be handled here - + // Tasks for pool to perform tasks = append(tasks, pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Thermal", THERMAL, target, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Power", POWER, target, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"Chassis/1", NVME, target, retryClient)), - // - // if a logical drive, it will be in ArrayControllers/x/ -> Links -> DiskDrives/x/ - // example: /redfish/v1/Systems/1/SmartStorage/ArrayControllers/2/DiskDrives/0/ pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", DISKDRIVE, target, retryClient)), - // if a logical drive, it will be in ArrayControllers/x/ -> Links -> LogicalDrives/x/ - // example: /redfish/v1/Systems/1/SmartStorage/ArrayControllers/3/DiskDrives/1/ - // pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", LOGICALDRIVE, target, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1", MEMORY, target, retryClient))) - // Loop through Members in ArrayControllers. Further loop through each of those to find anything in the /LogicalDrives or /DiskDrives enpoints + // DRIVES + // Loop through Members in ArrayControllers using the URL from the ArrayControllers endpoint for _, controller := range ac.Members { tasks = append(tasks, - pool.NewTask(common.Fetch(fqdn.String()+uri+"/", DISKDRIVE, target, retryClient))) + pool.NewTask(common.Fetch(fqdn.String()+controller.URL, ARRAYCONTROLLER, target, retryClient))) } - // tasks need to be refactored, so that tasks will need to be iterated over - // TODO: - + // Prepare the pool of tasks p := pool.NewPool(tasks, 1) // Create new map[string]*metrics for each new Exporter @@ -395,6 +376,7 @@ func (e *Exporter) exportNVMeDriveMetrics(body []byte) error { return nil } +// TODO: Make this work with new logic // exportDriveMetrics collects the DL380 drive metrics in json format and sets the prometheus gauges func (e *Exporter) exportDiskDriveMetrics(body []byte) error { @@ -523,31 +505,70 @@ func getArrayControllerEndpoint(url, host string, client *retryablehttp.Client) // arrayControllerIterate loops through members in the arrayController, iterates through the "Links" of each, // and exports the metrics if found in LogicalDrives or PhysicalDrives endpoints. - -func (e *Exporter) arrayControllerIterate(ac Collection) { - for _, member := range ac.Members { - for _, link := range member.Links { - if link.Rel == "LogicalDrives" { - logicalDrives, err := getLogicalDrivesEndpoint(link.Href, e.host, e.pool.Client) - if err != nil { - log.Error("Error getting LogicalDrives endpoint", zap.Error(err)) - continue - } - e.exportLogicalDriveMetrics(logicalDrives) - - } else if link.Rel == "PhysicalDrives" { - physicalDrives, err := getPhysicalDrivesEndpoint(link.Href, e.host, e.pool.Client) - if err != nil { - log.Error("Error getting PhysicalDrives endpoint", zap.Error(err)) - continue - } - e.exportPhysicalDriveMetrics(physicalDrives) - } - } - } +// func (e *Exporter) arrayControllerIterate(body []byte) error { +// var ac Collection +// var driveMemberData ArrayControllerObject +// var dlLogicalDrive = (*e.deviceMetrics)["logicalDriveMetrics"] +// var dlDiskDrive = (*e.deviceMetrics)["diskDriveMetrics"] +// for _, member := range ac.Members { +// // make an additional call and get the json from the URL for the member +// driveMemberData, err := e.getDriveMemberData(member.URL) +// if err != nil { +// log.Error("Error getting member data", zap.Error(err)) +// continue +// } +// // make an additional call and get the json from the URL listed in Links/LogicalDrives for the individual member. +// for _, member := range driveMemberData { +// url := member.driveMemberData.Links.LogicalDrives.URL +// // should the syntax be this? +// //link := member.Links.LogicalDrives.URL + +// // Get the link to the Logical Drive +// logicalDrivesData, err := e.getLogicalDrives(url) // TODO fix this too many arguments + +// // Process the Logical Drive Data into the LogicalDriveMetrics struct +// for _, drive := range logicalDrivesData { + +// dlLogicalDrive, err := e.processLogicalDriveData() +// } + +// if err != nil { +// log.Error("Error getting LogicalDrives endpoint", zap.Error(err)) +// continue +// } + +// } +// // make an additional call and get the json from the URL listed in Links/PhysicalDrives +// for _, member := range driveMemberData { +// link := member.driveMemberData.Links.PhysicalDrives.URL +// // should the syntax be this? +// //link := member.Links.PhysicalDrives.URL +// physicalDrivesData, err := e.getPhysicalDrives(link, e.host, e.pool.Client) // TODO fix this too many arguments +// if err != nil { +// log.Error("Error getting PhysicalDrives endpoint", zap.Error(err)) +// continue +// } +// } + +// } +// } + +// TODO process the logicalDrivesData +// if "Members@odata.count" == 0: no logical drives, check next endpoint. +// if "Members@odata.count" > 0: logical drives exist. +// loop through Members, and grab the URL from @odata.id +// from here, you can grab all the metrics found in LogicalDriveMetrics struct. + +func processLogicalDriveData() ([]byte, error) { + // TODO process the logicalDrivesData } -// TODO write getLogicalDrivesEndpoint function - +// TODO process the physicalDrivesData +// if "Members@odata.count" == 0: no physical drives, check next endpoint. +// if "Members@odata.count" > 0: physical Disk Drives exist. +// loop through Members, and grab the URL from @odata.id +// from here, you can grab all the metrics found in DiskDriveMetrics struct. -// TODO write getPhysicalDrivesEndpoint function +func processPhysicalDriveData() { + // TODO process the physicalDrivesData +} From cf12aff85b8831d8c46d285fe7e2c900fbe27414 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Fri, 16 Feb 2024 11:17:09 -0500 Subject: [PATCH 18/59] Remove fetchIterate function from util.go --- common/util.go | 93 -------------------------------------------------- 1 file changed, 93 deletions(-) diff --git a/common/util.go b/common/util.go index d105572..cea987d 100644 --- a/common/util.go +++ b/common/util.go @@ -70,99 +70,6 @@ type AaaLogoutPayload struct { InCookie string `xml:"inCookie,attr"` } -// TODO: Create a FetchIterate function to further iterate through to get drive endpoints: -// if no sublinks, return list of length 1 -// fetch_iterate would import Fetch -// input is everything that would go to fetch -// output is a [] of fetch returns -// - -func FetchIterate(uri, metricType, host string, client *retryablehttp.Client) func() ([]byte, string, error) { - var resp *http.Response - var credential *Credential - var err error - retryCount := 0 - endpoints := []string{uri} // Store the initial endpoint - output := [][]byte{} // Output list of fetch returns - - return func() ([]byte, string, error) { - // Add a 100 milliseconds delay in between requests because cisco devices respond in a non idiomatic manner - time.Sleep(100 * time.Millisecond) - - // Check if there are any endpoints left to iterate through - if len(endpoints) == 0 { - if len(output) == 0 { - return nil, metricType, fmt.Errorf("no more endpoints to iterate through") - } - return output[0], metricType, nil - } - - // Get the next endpoint from the list - nextEndpoint := endpoints[0] - endpoints = endpoints[1:] - - req := BuildRequest(nextEndpoint, host) - resp, err = DoRequest(client, req) - if err != nil { - return nil, metricType, err - } - defer resp.Body.Close() - if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { - if resp.StatusCode == http.StatusNotFound { - for retryCount < 3 && resp.StatusCode == http.StatusNotFound { - time.Sleep(client.RetryWaitMin) - resp, err = DoRequest(client, req) - retryCount = retryCount + 1 - } - if err != nil { - return nil, metricType, err - } else if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { - return nil, metricType, fmt.Errorf("HTTP status %d", resp.StatusCode) - } - } else if resp.StatusCode == http.StatusUnauthorized { - // Credentials may have rotated, go to vault and get the latest - credential, err = ChassisCreds.GetCredentials(context.Background(), host) - if err != nil { - return nil, metricType, fmt.Errorf("issue retrieving credentials from vault using target: %s", host) - } - ChassisCreds.Set(host, credential) - - // build new request with updated credentials - req = BuildRequest(nextEndpoint, host) - - time.Sleep(client.RetryWaitMin) - resp, err = DoRequest(client, req) - if err != nil { - return nil, metricType, fmt.Errorf("Retry DoRequest failed - " + err.Error()) - } - if resp.StatusCode == http.StatusUnauthorized { - return nil, metricType, fmt.Errorf("HTTP status %d", resp.StatusCode) - } - } else { - return nil, metricType, fmt.Errorf("HTTP status %d", resp.StatusCode) - } - } - - body, err := io.ReadAll(resp.Body) - if err != nil { - return nil, metricType, fmt.Errorf("Error reading Response Body - " + err.Error()) - } - - // Parse the body to extract any additional endpoints - var endpointsFromResponse []string - // TODO: Parse the body to extract the endpoints and store them in the endpointsFromResponse slice - - // Append the new endpoints to the existing list - endpoints = append(endpoints, endpointsFromResponse...) - - // Add the body to the output list - output = append(output, body) - - // Recursively call the FetchIterate function to iterate through the remaining endpoints - return FetchIterate(endpoints[0], metricType, host, client)() - } -} - func Fetch(uri, metricType, host string, client *retryablehttp.Client) func() ([]byte, string, error) { var resp *http.Response var credential *Credential From 008060c537a16cc49adb2b723df9b6575a9145ca Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Fri, 16 Feb 2024 14:53:50 -0500 Subject: [PATCH 19/59] Update drive.go --- hpe/dl380/drive.go | 38 +++++++++----------------------------- 1 file changed, 9 insertions(+), 29 deletions(-) diff --git a/hpe/dl380/drive.go b/hpe/dl380/drive.go index 299c508..a41869e 100644 --- a/hpe/dl380/drive.go +++ b/hpe/dl380/drive.go @@ -58,7 +58,6 @@ type nvmeDriveStatus struct { } // Logical Drives -// TODO: Make sure Status maps to Health string in LogicalDriveStatus type LogicalDriveMetrics struct { Id string `json:"Id"` CapacityMiB int `json:"CapacityMiB"` @@ -78,9 +77,7 @@ type LogicalDriveStatus struct { State string `json:"Enabled"` } -// (Always iterate through this /DiskDrives) // Disk Drives -// TODO: Make sure Status maps to Health string in DiskDriveStatus type DiskDriveMetrics struct { Id string `json:"Id"` CapacityMiB int `json:"CapacityMiB"` @@ -99,13 +96,13 @@ type DiskDriveStatus struct { State string `json:"State"` } -// ArrayController +// ArrayController: /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ (1) type ArrayController struct { Members Members `json:"Members"` MembersCount int `json:"@odata.count"` } -// ArrayController Members +// ArrayController Members (2) type Members struct { URL string `json:"@odata.id"` } @@ -114,12 +111,12 @@ type Controller struct { Links Links `json:"Links"` } -// ArrayController LinksInMembers +// ArrayController LinksInMembers (3) /redfish/v1/Systems/1/SmartStorage/ArrayControllers/{member}/ type LinksInMembers struct { Links Links `json:"Links"` } -// ArrayController Links +// ArrayController Links (4) type Links struct { LogicalDrives driveURL `json:"LogicalDrives"` DiskDrives driveURL `json:"DiskDrives"` @@ -130,27 +127,10 @@ type driveURL struct { URL string `json:"@odata.id"` } -// // Main ArrayController JSON object - -// type ArrayControllerObject struct { -// Links Links `json:"Links"` -// Model string `json:"Model"` -// } - -// // Main ArrayController JSON object Links -// type Links struct { -// LogicalDrives struct { +// // Collection returns an array of the endpoints from the /ArrayControllers endpoint +// type Collection struct { +// Members []struct { // URL string `json:"@odata.id"` -// } -// PhysicalDrives struct { -// URL string `json:"@odata.id"` -// } +// } `json:"Members"` +// MembersCount int `json:"Members@odata.count"` // } - -// Collection returns an array of the endpoints from the /ArrayControllers endpoint -type Collection struct { - Members []struct { - URL string `json:"@odata.id"` - } `json:"Members"` - MembersCount int `json:"Members@odata.count"` -} From e7a45dd6ece8028cde20682c9874e5ba0cb13afd Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Fri, 16 Feb 2024 14:54:16 -0500 Subject: [PATCH 20/59] Update exporter.go prep for new logic --- hpe/dl380/exporter.go | 213 +++++------------------------------------- 1 file changed, 25 insertions(+), 188 deletions(-) diff --git a/hpe/dl380/exporter.go b/hpe/dl380/exporter.go index 49a8f0d..d3f27d9 100644 --- a/hpe/dl380/exporter.go +++ b/hpe/dl380/exporter.go @@ -21,7 +21,6 @@ import ( "crypto/tls" "encoding/json" "fmt" - "io" "net" "net/http" "net/url" @@ -128,10 +127,12 @@ func NewExporter(ctx context.Context, target, uri string) *Exporter { } // ArrayControllers endpoint array for disk discovery - ac, err := getArrayControllerEndpoint(fqdn.String()+uri+"/SmartStorage/ArrayControllers", target, retryClient) - if err != nil { - log.Error("error when getting ArrayControllers endpoint from "+DL380, zap.Error(err), zap.Any("trace_id", ctx.Value("TraceID"))) - } + // ac, err := getArrayControllerEndpoint(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", target, retryClient) + // if err != nil { + // log.Error("error when getting ArrayControllers endpoint from "+DL380, zap.Error(err), zap.Any("trace_id", ctx.Value("TraceID"))) + // } + + // TODO: Add getArrayMetricsEndpoint func here to recursively call drive endpoints before task pool executes // Tasks for pool to perform tasks = append(tasks, @@ -142,12 +143,12 @@ func NewExporter(ctx context.Context, target, uri string) *Exporter { pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", LOGICALDRIVE, target, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1", MEMORY, target, retryClient))) - // DRIVES - // Loop through Members in ArrayControllers using the URL from the ArrayControllers endpoint - for _, controller := range ac.Members { - tasks = append(tasks, - pool.NewTask(common.Fetch(fqdn.String()+controller.URL, ARRAYCONTROLLER, target, retryClient))) - } + // // DRIVES + // // Loop through Members in ArrayControllers using the URL from the ArrayControllers endpoint + // for _, controller := range ac.Members { + // tasks = append(tasks, + // pool.NewTask(common.Fetch(fqdn.String()+controller.URL, ARRAYCONTROLLER, target, retryClient))) + // } // Prepare the pool of tasks p := pool.NewPool(tasks, 1) @@ -251,7 +252,7 @@ func (e *Exporter) scrape() { case NVME: err = e.exportNVMeDriveMetrics(task.Body) case DISKDRIVE: - err = e.exportDiskDriveMetrics(task.Body) + err = e.exportPhysicalDriveMetrics(task.Body) case LOGICALDRIVE: err = e.exportLogicalDriveMetrics(task.Body) case MEMORY: @@ -276,7 +277,18 @@ func (e *Exporter) scrape() { } -// TODO: Modify the below PowerMetrics to fit the DL380 data +// TODO: Fill out this export function +// exportPhysicalDriveMetrics collects the DL380's physical drive metrics in json format and sets the prometheus gauges +func (e *Exporter) exportPhysicalDriveMetrics(body []byte) error { + return nil +} + +// TODO: Fill out this export function +// exportPhysicalDriveMetrics collects the DL380's physical drive metrics in json format and sets the prometheus gauges +func (e *Exporter) exportLogicalDriveMetrics(body []byte) error { + return nil +} + // exportPowerMetrics collects the DL380's power metrics in json format and sets the prometheus gauges func (e *Exporter) exportPowerMetrics(body []byte) error { @@ -376,68 +388,6 @@ func (e *Exporter) exportNVMeDriveMetrics(body []byte) error { return nil } -// TODO: Make this work with new logic -// exportDriveMetrics collects the DL380 drive metrics in json format and sets the prometheus gauges -func (e *Exporter) exportDiskDriveMetrics(body []byte) error { - - var state float64 - var dd DiskDriveMetrics - var dDrive = (*e.deviceMetrics)["diskDriveMetrics"] - err := json.Unmarshal(body, &dd) - if err != nil { - return fmt.Errorf("Error Unmarshalling DL380 DiskDriveMetrics - " + err.Error()) - } - - // Check disk drive is enabled then check status and convert string to numeric values - if dd.Status.State == "Enabled" { - if dd.Status.Health == "OK" { - state = OK - } else { - state = BAD - } - } else { - state = DISABLED - } - - // Check "disk drive" if it is actually a logical drive - if dd.LogicalDriveName == "" { - // if drive is actually a logical drive, then skip it, otherwise, add metrics. - (*dDrive)["diskDriveStatus"].WithLabelValues(dd.Name, strconv.Itoa(dd.CapacityMiB)).Set(state) - } - - return nil -} - -// exportDriveMetrics collects the DL380 logicaldrive metrics in json format and sets the prometheus gauges -func (e *Exporter) exportLogicalDriveMetrics(body []byte) error { - - var state float64 - var dld LogicalDriveMetrics - var dlDrive = (*e.deviceMetrics)["logicalDriveMetrics"] - err := json.Unmarshal(body, &dld) - if err != nil { - return fmt.Errorf("Error Unmarshalling DL380 logicalDriveMetrics - " + err.Error()) - } - // Check logical drive is enabled then check status and convert string to numeric values - if dld.Status.State == "Enabled" { - if dld.Status.Health == "OK" { - state = OK - } else { - state = BAD - } - } else { - state = DISABLED - } - // Check if drive is actually a logical drive - if dld.LogicalDriveName != "" { - // if drive is actually a logical drive, then add metrics. - (*dlDrive)["logicalDriveStatus"].WithLabelValues(dld.Name, strconv.Itoa(dld.LogicalDriveNumber), dld.Raid).Set(state) - } - - return nil -} - -// TODO: Modify the below MemoryMetrics to fit the DL380 data // exportMemoryMetrics collects the DL380 drive metrics in json format and sets the prometheus gauges func (e *Exporter) exportMemoryMetrics(body []byte) error { @@ -459,116 +409,3 @@ func (e *Exporter) exportMemoryMetrics(body []byte) error { return nil } - -// getArrayControllerEndpoint collects the DL380 ArrayController members and adds them to the Collection for further looping -func getArrayControllerEndpoint(url, host string, client *retryablehttp.Client) (Collection, error) { - var ac Collection - var resp *http.Response - var err error - retryCount := 0 - req := common.BuildRequest(url, host) - - resp, err = common.DoRequest(client, req) - if err != nil { - return ac, err - } - defer resp.Body.Close() - if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { - if resp.StatusCode == http.StatusNotFound { - for retryCount < 3 && resp.StatusCode == http.StatusNotFound { - time.Sleep(client.RetryWaitMin) - resp, err = common.DoRequest(client, req) - retryCount = retryCount + 1 - } - if err != nil { - return ac, err - } else if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { - return ac, fmt.Errorf("Http status %d", resp.StatusCode) - } - } else { - return ac, fmt.Errorf("Http status %d", resp.StatusCode) - } - } - body, err := io.ReadAll(resp.Body) - if err != nil { - return ac, fmt.Errorf("Error reading Response Body - " + err.Error()) - } - - err = json.Unmarshal(body, &ac) - if err != nil { - return ac, fmt.Errorf("Error Unmarshalling HPE DL380 ArrayController struct - " + err.Error()) - } - - return ac, nil - -} - -// arrayControllerIterate loops through members in the arrayController, iterates through the "Links" of each, -// and exports the metrics if found in LogicalDrives or PhysicalDrives endpoints. -// func (e *Exporter) arrayControllerIterate(body []byte) error { -// var ac Collection -// var driveMemberData ArrayControllerObject -// var dlLogicalDrive = (*e.deviceMetrics)["logicalDriveMetrics"] -// var dlDiskDrive = (*e.deviceMetrics)["diskDriveMetrics"] -// for _, member := range ac.Members { -// // make an additional call and get the json from the URL for the member -// driveMemberData, err := e.getDriveMemberData(member.URL) -// if err != nil { -// log.Error("Error getting member data", zap.Error(err)) -// continue -// } -// // make an additional call and get the json from the URL listed in Links/LogicalDrives for the individual member. -// for _, member := range driveMemberData { -// url := member.driveMemberData.Links.LogicalDrives.URL -// // should the syntax be this? -// //link := member.Links.LogicalDrives.URL - -// // Get the link to the Logical Drive -// logicalDrivesData, err := e.getLogicalDrives(url) // TODO fix this too many arguments - -// // Process the Logical Drive Data into the LogicalDriveMetrics struct -// for _, drive := range logicalDrivesData { - -// dlLogicalDrive, err := e.processLogicalDriveData() -// } - -// if err != nil { -// log.Error("Error getting LogicalDrives endpoint", zap.Error(err)) -// continue -// } - -// } -// // make an additional call and get the json from the URL listed in Links/PhysicalDrives -// for _, member := range driveMemberData { -// link := member.driveMemberData.Links.PhysicalDrives.URL -// // should the syntax be this? -// //link := member.Links.PhysicalDrives.URL -// physicalDrivesData, err := e.getPhysicalDrives(link, e.host, e.pool.Client) // TODO fix this too many arguments -// if err != nil { -// log.Error("Error getting PhysicalDrives endpoint", zap.Error(err)) -// continue -// } -// } - -// } -// } - -// TODO process the logicalDrivesData -// if "Members@odata.count" == 0: no logical drives, check next endpoint. -// if "Members@odata.count" > 0: logical drives exist. -// loop through Members, and grab the URL from @odata.id -// from here, you can grab all the metrics found in LogicalDriveMetrics struct. - -func processLogicalDriveData() ([]byte, error) { - // TODO process the logicalDrivesData -} - -// TODO process the physicalDrivesData -// if "Members@odata.count" == 0: no physical drives, check next endpoint. -// if "Members@odata.count" > 0: physical Disk Drives exist. -// loop through Members, and grab the URL from @odata.id -// from here, you can grab all the metrics found in DiskDriveMetrics struct. - -func processPhysicalDriveData() { - // TODO process the physicalDrivesData -} From 72ba40d49897ca3a55c624577261cb40823359d5 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Fri, 16 Feb 2024 17:55:11 -0500 Subject: [PATCH 21/59] Update drive.go to include generic drive type --- hpe/dl380/drive.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hpe/dl380/drive.go b/hpe/dl380/drive.go index a41869e..f36fcd9 100644 --- a/hpe/dl380/drive.go +++ b/hpe/dl380/drive.go @@ -77,6 +77,11 @@ type LogicalDriveStatus struct { State string `json:"Enabled"` } +type GenericDrive struct { + Members Members `json:"Members,omitempty"` + Links Links `json:"Links,omitempty"` +} + // Disk Drives type DiskDriveMetrics struct { Id string `json:"Id"` @@ -96,7 +101,7 @@ type DiskDriveStatus struct { State string `json:"State"` } -// ArrayController: /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ (1) +// ArrayController: /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ (1) like chas Collection type ArrayController struct { Members Members `json:"Members"` MembersCount int `json:"@odata.count"` From e064398870a6914e60ccbcbcc55cc9c660c0fd16 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Fri, 16 Feb 2024 18:04:20 -0500 Subject: [PATCH 22/59] Update exporter.go to include while loop to gather drive endpoints --- hpe/dl380/exporter.go | 123 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 115 insertions(+), 8 deletions(-) diff --git a/hpe/dl380/exporter.go b/hpe/dl380/exporter.go index d3f27d9..390266a 100644 --- a/hpe/dl380/exporter.go +++ b/hpe/dl380/exporter.go @@ -21,6 +21,7 @@ import ( "crypto/tls" "encoding/json" "fmt" + "io/ioutil" "net" "net/http" "net/url" @@ -126,21 +127,43 @@ func NewExporter(ctx context.Context, target, uri string) *Exporter { } } - // ArrayControllers endpoint array for disk discovery - // ac, err := getArrayControllerEndpoint(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", target, retryClient) - // if err != nil { - // log.Error("error when getting ArrayControllers endpoint from "+DL380, zap.Error(err), zap.Any("trace_id", ctx.Value("TraceID"))) - // } + // TODO: work on while loop - endpoints should be gathered recursively + // logic should be applied to sort out the logical from the physical drives based on members & links + // Get logical and physical drive endpoints + // + // variables for use in drive endpoint while loop + done := false + initialParameter := fqdn.String() + uri + "/Systems/1/SmartStorage/ArrayControllers/" + parameter := initialParameter + outputs := []string{initialParameter} + // While loop to append all of the possible logical drive endpoints to the tasks pool + for !done { + + output := getDriveEndpoint(url) + outputs = append(outputs, output) + + // TODO: Add getArrayMetricsEndpoint func here to recursively call drive endpoints before task pool executes + arrayMetricsEndpoint, err := getArrayMetricsEndpoint(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers/", target, retryClient) + if err != nil { + log.Error("error when getting ArrayControllers endpoint from "+DL380, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) + return nil, err + } - // TODO: Add getArrayMetricsEndpoint func here to recursively call drive endpoints before task pool executes + if output == "done" { + done = true + } else { + parameter = output + //fmt.Printf("setting parameter to %s\n", output) + } + } // Tasks for pool to perform tasks = append(tasks, pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Thermal", THERMAL, target, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Power", POWER, target, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"Chassis/1", NVME, target, retryClient)), - pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", DISKDRIVE, target, retryClient)), - pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", LOGICALDRIVE, target, retryClient)), + //pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", DISKDRIVE, target, retryClient)), + //pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", LOGICALDRIVE, target, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1", MEMORY, target, retryClient))) // // DRIVES @@ -409,3 +432,87 @@ func (e *Exporter) exportMemoryMetrics(body []byte) error { return nil } + +// getArrayMetricsEndpoint that gets the initial json response to loop through. +func getArrayMetricsEndpoint(endpoint, host string, client *retryablehttp.Client) (ArrayController, error) { + var ac ArrayController + var resp *http.Response + var err error + retryCount := 0 + req := common.BuildRequest(endpoint, host) + + resp, err = common.DoRequest(client, req) + if err != nil { + return ac, err + } + defer resp.Body.Close() + if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { + if resp.StatusCode == http.StatusNotFound { + for retryCount < 3 && resp.StatusCode == http.StatusNotFound { + time.Sleep(client.RetryWaitMin) + resp, err = common.DoRequest(client, req) + retryCount = retryCount + 1 + } + if err != nil { + return ac, err + } else if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { + return ac, fmt.Errorf("HTTP status %d", resp.StatusCode) + } + } else { + return ac, fmt.Errorf("HTTP status %d", resp.StatusCode) + } + } + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return ac, fmt.Errorf("Error reading response body - " + err.Error()) + } + + err = json.Unmarshal(body, &ac) + if err != nil { + return ac, fmt.Errorf("Error Unmarshalling DL380 ArrayController struct - " + err.Error()) + } + + return ac, nil +} + +func getDriveEndpoint(url, host string, client *retryablehttp.Client) (GenericDrive, error) { + var drive GenericDrive + var resp *http.Response + var err error + retryCount := 0 + req := common.BuildRequest(url, host) + resp, err = common.DoRequest(client, req) + if err != nil { + return drive, err + } + defer resp.Body.Close() + if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { + if resp.StatusCode == http.StatusNotFound { + for retryCount < 3 && resp.StatusCode == http.StatusNotFound { + time.Sleep(client.RetryWaitMin) + resp, err = common.DoRequest(client, req) + retryCount = retryCount + 1 + } + if err != nil { + return drive, err + } else if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { + return drive, fmt.Errorf("HTTP status %d", resp.StatusCode) + } + } else { + return drive, fmt.Errorf("HTTP status %d", resp.StatusCode) + } + } + + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return drive, fmt.Errorf("Error reading Response Body - " + err.Error()) + } + + err = json.Unmarshal(body, &drive) + if err != nil { + return drive, fmt.Errorf("Error Unmarshalling S3260M5 Chassis struct - " + err.Error()) + } + + return drive, nil +} From 1e2331dd763b90db8c3d91643a2bd7c6daeb7109 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Sun, 18 Feb 2024 13:04:02 -0500 Subject: [PATCH 23/59] Update exporter.go with refactored drive logic --- hpe/dl380/exporter.go | 73 ++++++++++++++++++++++--------------------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/hpe/dl380/exporter.go b/hpe/dl380/exporter.go index 390266a..f62774e 100644 --- a/hpe/dl380/exporter.go +++ b/hpe/dl380/exporter.go @@ -127,52 +127,53 @@ func NewExporter(ctx context.Context, target, uri string) *Exporter { } } - // TODO: work on while loop - endpoints should be gathered recursively - // logic should be applied to sort out the logical from the physical drives based on members & links - // Get logical and physical drive endpoints - // - // variables for use in drive endpoint while loop - done := false - initialParameter := fqdn.String() + uri + "/Systems/1/SmartStorage/ArrayControllers/" - parameter := initialParameter - outputs := []string{initialParameter} - // While loop to append all of the possible logical drive endpoints to the tasks pool - for !done { - + // Recursively parsing through drive endpoints until final endpoints are found + var ( + initialURL = (fqdn.String() + uri + "/Systems/1/SmartStorage/ArrayControllers") // TODO: check this correctly parses into a full URL + url = initialURL + logicalDriveURLs []string + physicalDriveURLs []string + ) + + for { output := getDriveEndpoint(url) - outputs = append(outputs, output) - - // TODO: Add getArrayMetricsEndpoint func here to recursively call drive endpoints before task pool executes - arrayMetricsEndpoint, err := getArrayMetricsEndpoint(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers/", target, retryClient) - if err != nil { - log.Error("error when getting ArrayControllers endpoint from "+DL380, zap.Error(err), zap.Any("trace_id", ctx.Value("traceID"))) - return nil, err - } - - if output == "done" { - done = true + if len(output.Members) > 0 { + for _, member := range output.Members { + newOutput := getDriveEndpoint(member.URL) + if newOutput.Links.URL != "" { + logicalDriveOutput := getDriveEndpoint(newOutput.Links.URL) + if len(logicalDriveOutput.Members) > 0 { + for _, member := range logicalDriveOutput.Members { + logicalDriveURLs = append(logicalDriveURLs, member.URL) + } + } + } else if newOutput.Links.URL != "" { + physicalDriveOutput := getDriveEndpoint(newOutput.Links.URL) + if len(physicalDriveOutput.Members) > 0 { + for _, member := range physicalDriveOutput.Members { + physicalDriveURLs = append(physicalDriveURLs, member.URL) + } + } + } + } } else { - parameter = output - //fmt.Printf("setting parameter to %s\n", output) + break } } + // TODO: Append each of those URLS in LogicalDriveURLs array to the tasks pool with the const LOGICALDRIVE, + // TODO: Append each of those URLS in the PhysicalDriveURLs array to the tasks pool with the const DISKDRIVE + // TODO: Further parse the chassis/1 to get the NVME metrics before adding it to the tasks pool (similar to above.) + // Tasks for pool to perform tasks = append(tasks, pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Thermal", THERMAL, target, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Power", POWER, target, retryClient)), - pool.NewTask(common.Fetch(fqdn.String()+uri+"Chassis/1", NVME, target, retryClient)), - //pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", DISKDRIVE, target, retryClient)), - //pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", LOGICALDRIVE, target, retryClient)), + pool.NewTask(common.Fetch(fqdn.String()+uri+"Chassis/1", NVME, target, retryClient)), // TODO: Logic needs to change here similar to above + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", DISKDRIVE, target, retryClient)), + pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", LOGICALDRIVE, target, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1", MEMORY, target, retryClient))) - // // DRIVES - // // Loop through Members in ArrayControllers using the URL from the ArrayControllers endpoint - // for _, controller := range ac.Members { - // tasks = append(tasks, - // pool.NewTask(common.Fetch(fqdn.String()+controller.URL, ARRAYCONTROLLER, target, retryClient))) - // } - // Prepare the pool of tasks p := pool.NewPool(tasks, 1) @@ -511,7 +512,7 @@ func getDriveEndpoint(url, host string, client *retryablehttp.Client) (GenericDr err = json.Unmarshal(body, &drive) if err != nil { - return drive, fmt.Errorf("Error Unmarshalling S3260M5 Chassis struct - " + err.Error()) + return drive, fmt.Errorf("Error Unmarshalling DL380 drive struct - " + err.Error()) } return drive, nil From 4d084d62813e1b8bf8133ee9ec5178dcc1ff1b2b Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Mon, 19 Feb 2024 09:26:14 -0500 Subject: [PATCH 24/59] Clean up drive.go dl 380 --- hpe/dl380/drive.go | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/hpe/dl380/drive.go b/hpe/dl380/drive.go index f36fcd9..6241c06 100644 --- a/hpe/dl380/drive.go +++ b/hpe/dl380/drive.go @@ -20,8 +20,6 @@ package dl380 // /redfish/v1/chassis/1/ // NVMeMetrics is the top level json object for DL380 NVMe Metrics Metadata -// TODO: Ensure Physical Location maps to the ServiceLabel string within PartLocation -// TODO: Ensure Status maps to the Health string within StatusNVMe type NVMeDriveMetrics struct { ID string `json:"Id"` Model string `json:"Model"` @@ -91,8 +89,6 @@ type DiskDriveMetrics struct { Name string `json:"Name"` Model string `json:"Model"` Status DiskDriveStatus `json:"Status"` - // Check for logical drive, if disk drive, should return nothing. - LogicalDriveName string `json:"LogicalDriveName,omitempty"` } // Disk Drive Status @@ -101,13 +97,13 @@ type DiskDriveStatus struct { State string `json:"State"` } -// ArrayController: /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ (1) like chas Collection +// ArrayController: /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ type ArrayController struct { Members Members `json:"Members"` MembersCount int `json:"@odata.count"` } -// ArrayController Members (2) +// ArrayController Members type Members struct { URL string `json:"@odata.id"` } @@ -116,12 +112,7 @@ type Controller struct { Links Links `json:"Links"` } -// ArrayController LinksInMembers (3) /redfish/v1/Systems/1/SmartStorage/ArrayControllers/{member}/ -type LinksInMembers struct { - Links Links `json:"Links"` -} - -// ArrayController Links (4) +// ArrayController Links type Links struct { LogicalDrives driveURL `json:"LogicalDrives"` DiskDrives driveURL `json:"DiskDrives"` @@ -131,11 +122,3 @@ type Links struct { type driveURL struct { URL string `json:"@odata.id"` } - -// // Collection returns an array of the endpoints from the /ArrayControllers endpoint -// type Collection struct { -// Members []struct { -// URL string `json:"@odata.id"` -// } `json:"Members"` -// MembersCount int `json:"Members@odata.count"` -// } From 0ebf550670dbbd0357bded1d2ed66ff338f96a8c Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Mon, 19 Feb 2024 10:35:41 -0500 Subject: [PATCH 25/59] Update exporter.go --- hpe/dl380/exporter.go | 103 +++++++++++++++++++++++++----------------- 1 file changed, 61 insertions(+), 42 deletions(-) diff --git a/hpe/dl380/exporter.go b/hpe/dl380/exporter.go index f62774e..6f9cfe0 100644 --- a/hpe/dl380/exporter.go +++ b/hpe/dl380/exporter.go @@ -131,24 +131,31 @@ func NewExporter(ctx context.Context, target, uri string) *Exporter { var ( initialURL = (fqdn.String() + uri + "/Systems/1/SmartStorage/ArrayControllers") // TODO: check this correctly parses into a full URL url = initialURL + chassis_url = (fqdn.String() + uri + "/Chassis/1") logicalDriveURLs []string physicalDriveURLs []string + nvmeDriveURLs []string ) + // parse to find logical drives and physical disk drives - for { - output := getDriveEndpoint(url) + output, err := getDriveEndpoint(url, target, retryClient) if len(output.Members) > 0 { for _, member := range output.Members { - newOutput := getDriveEndpoint(member.URL) - if newOutput.Links.URL != "" { - logicalDriveOutput := getDriveEndpoint(newOutput.Links.URL) + newOutput, err := getDriveEndpoint(member.URL, target, retryClient) + + // If LogicalDrives is present, parse logical drive endpoint until all urls are found + if newOutput.Links.LogicalDrives != nil { + logicalDriveOutput, err := getDriveEndpoint(newOutput.Links.LogicalDrives.URL, target, retryClient) if len(logicalDriveOutput.Members) > 0 { for _, member := range logicalDriveOutput.Members { logicalDriveURLs = append(logicalDriveURLs, member.URL) } } - } else if newOutput.Links.URL != "" { - physicalDriveOutput := getDriveEndpoint(newOutput.Links.URL) + } + + // If PhysicalDrives is present, parse physical drive endpoint until all urls are found + if newOutput.Links.PhysicalDrives != nil { + physicalDriveOutput, err := getDriveEndpoint(newOutput.Links.PhysicalDrives.URL, target, retryClient) if len(physicalDriveOutput.Members) > 0 { for _, member := range physicalDriveOutput.Members { physicalDriveURLs = append(physicalDriveURLs, member.URL) @@ -156,22 +163,34 @@ func NewExporter(ctx context.Context, target, uri string) *Exporter { } } } - } else { - break + + // parse to find NVME drives + chassis_output, err := getDriveEndpoint(chassis_url, host, client) + // parse through "Links" to find "Drives" array + if len(chassis_output.Links.Drives) > 0 { + // loop through drives array and append each odata.id url to nvmeDriveURLs list + for _, drive := range chassis_output.Links.Drives { + nvmeDriveURLs = append(nvmeDriveURLs, chassis_output.Links.Drives.URL) } } - // TODO: Append each of those URLS in LogicalDriveURLs array to the tasks pool with the const LOGICALDRIVE, - // TODO: Append each of those URLS in the PhysicalDriveURLs array to the tasks pool with the const DISKDRIVE - // TODO: Further parse the chassis/1 to get the NVME metrics before adding it to the tasks pool (similar to above.) + // Loop through logicalDriveURLs, physicalDriveURLs, and nvmeDriveURLs and append each URL to the tasks pool + for _, url := range logicalDriveURLs { + tasks = append(tasks, pool.NewTask(common.Fetch(url, LOGICALDRIVE, target, retryClient))) + } + + for _, url := range physicalDriveURLs { + tasks = append(tasks, pool.NewTask(common.Fetch(url, DISKDRIVE, target, retryClient))) + } + + for _, url := range nvmeDriveURLs { + tasks = append(tasks, pool.NewTask(common.Fetch(url, NVME, target, retryClient))) + } - // Tasks for pool to perform + // Additional tasks for pool to perform tasks = append(tasks, pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Thermal", THERMAL, target, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"/Chassis/1/Power", POWER, target, retryClient)), - pool.NewTask(common.Fetch(fqdn.String()+uri+"Chassis/1", NVME, target, retryClient)), // TODO: Logic needs to change here similar to above - pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", DISKDRIVE, target, retryClient)), - pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1/SmartStorage/ArrayControllers", LOGICALDRIVE, target, retryClient)), pool.NewTask(common.Fetch(fqdn.String()+uri+"/Systems/1", MEMORY, target, retryClient))) // Prepare the pool of tasks @@ -313,6 +332,32 @@ func (e *Exporter) exportLogicalDriveMetrics(body []byte) error { return nil } +// exportNVMeDriveMetrics collects the DL380 NVME drive metrics in json format and sets the prometheus gauges +func (e *Exporter) exportNVMeDriveMetrics(body []byte) error { + + var state float64 + var dlnvme NVMeDriveMetrics + var dlnvmedrive = (*e.deviceMetrics)["nvmeDriveMetrics"] + err := json.Unmarshal(body, &dlnvme) + if err != nil { + return fmt.Errorf("Error Unmarshalling DL380 NVMeDriveMetrics - " + err.Error()) + } + // Check logical drive is enabled then check status and convert string to numeric values + if dlnvme.Status.State == "Enabled" { + if dlnvme.Status.Health == "OK" { + state = OK + } else { + state = BAD + } + } else { + state = DISABLED + } + + (*dlnvmedrive)["nvmeDriveStatus"].WithLabelValues(dlnvme.Protocol, dlnvme.ID, dlnvme.Name).Set(state) + + return nil +} + // exportPowerMetrics collects the DL380's power metrics in json format and sets the prometheus gauges func (e *Exporter) exportPowerMetrics(body []byte) error { @@ -386,32 +431,6 @@ func (e *Exporter) exportThermalMetrics(body []byte) error { return nil } -// exportNVMeDriveMetrics collects the DL380 NVME drive metrics in json format and sets the prometheus gauges -func (e *Exporter) exportNVMeDriveMetrics(body []byte) error { - - var state float64 - var dlnvme NVMeDriveMetrics - var dlnvmedrive = (*e.deviceMetrics)["nvmeDriveMetrics"] - err := json.Unmarshal(body, &dlnvme) - if err != nil { - return fmt.Errorf("Error Unmarshalling DL380 NVMeDriveMetrics - " + err.Error()) - } - // Check logical drive is enabled then check status and convert string to numeric values - if dlnvme.Status.State == "Enabled" { - if dlnvme.Status.Health == "OK" { - state = OK - } else { - state = BAD - } - } else { - state = DISABLED - } - - (*dlnvmedrive)["nvmeDriveStatus"].WithLabelValues(dlnvme.Protocol, dlnvme.ID, dlnvme.Name).Set(state) - - return nil -} - // exportMemoryMetrics collects the DL380 drive metrics in json format and sets the prometheus gauges func (e *Exporter) exportMemoryMetrics(body []byte) error { From 9a905ab02e436713f4f0fc8b6aedd7659f937e51 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Mon, 19 Feb 2024 10:36:48 -0500 Subject: [PATCH 26/59] Update exporter.go --- hpe/dl380/exporter.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hpe/dl380/exporter.go b/hpe/dl380/exporter.go index 6f9cfe0..3ce603e 100644 --- a/hpe/dl380/exporter.go +++ b/hpe/dl380/exporter.go @@ -165,7 +165,7 @@ func NewExporter(ctx context.Context, target, uri string) *Exporter { } // parse to find NVME drives - chassis_output, err := getDriveEndpoint(chassis_url, host, client) + chassis_output, err := getDriveEndpoint(chassis_url, target, retryClient) // parse through "Links" to find "Drives" array if len(chassis_output.Links.Drives) > 0 { // loop through drives array and append each odata.id url to nvmeDriveURLs list From 3f20723e1508fdca1282a11c712889283627a2f9 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Mon, 19 Feb 2024 10:47:56 -0500 Subject: [PATCH 27/59] Update drive.go to include MembersCount in GenericDrive struct --- hpe/dl380/drive.go | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/hpe/dl380/drive.go b/hpe/dl380/drive.go index 6241c06..3a2eaba 100644 --- a/hpe/dl380/drive.go +++ b/hpe/dl380/drive.go @@ -75,9 +75,11 @@ type LogicalDriveStatus struct { State string `json:"Enabled"` } +// GenericDrive is used to iterate over differing drive endpoints type GenericDrive struct { - Members Members `json:"Members,omitempty"` - Links Links `json:"Links,omitempty"` + Members Members `json:"Members,omitempty"` + Links Links `json:"Links,omitempty"` + MembersCount int `json:"@odata.count,omitempty"` } // Disk Drives @@ -114,8 +116,9 @@ type Controller struct { // ArrayController Links type Links struct { - LogicalDrives driveURL `json:"LogicalDrives"` - DiskDrives driveURL `json:"DiskDrives"` + LogicalDrives driveURL `json:"LogicalDrives,omitempty"` + PhysicalDrives driveURL `json:"PhysicalDrives,omitempty"` // aka DiskDrives + Drives driveURL `json:"Drives,omitempty"` // used for chassis endpoint for NVME only } // URL string from within Logical Drives or Disk Drives From ca9da167ff24f7a8e022a17a4dd7e63f9b1bf8ac Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Mon, 19 Feb 2024 11:11:05 -0500 Subject: [PATCH 28/59] Update drive.go with nested GenericDrive struct --- hpe/dl380/drive.go | 77 +++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 29 deletions(-) diff --git a/hpe/dl380/drive.go b/hpe/dl380/drive.go index 3a2eaba..73b654a 100644 --- a/hpe/dl380/drive.go +++ b/hpe/dl380/drive.go @@ -77,9 +77,21 @@ type LogicalDriveStatus struct { // GenericDrive is used to iterate over differing drive endpoints type GenericDrive struct { - Members Members `json:"Members,omitempty"` - Links Links `json:"Links,omitempty"` - MembersCount int `json:"@odata.count,omitempty"` + Members []struct { + URL string `json:"@odata.id"` + } `json:"Members,omitempty"` + Links struct { + Drives []struct { + URL string `json:"@odata.id"` + } `json:"Drives,omitempty"` + LogicalDrives struct { + URL string `json:"@odata.id"` + } `json:"LogicalDrives,omitempty"` + PhysicalDrives struct { + URL string `json:"@odata.id"` + } `json:"PhysicalDrives,omitempty"` + } `json:"Links,omitempty"` + MembersCount int `json:"@odata.count,omitempty"` } // Disk Drives @@ -99,29 +111,36 @@ type DiskDriveStatus struct { State string `json:"State"` } -// ArrayController: /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ -type ArrayController struct { - Members Members `json:"Members"` - MembersCount int `json:"@odata.count"` -} - -// ArrayController Members -type Members struct { - URL string `json:"@odata.id"` -} - -type Controller struct { - Links Links `json:"Links"` -} - -// ArrayController Links -type Links struct { - LogicalDrives driveURL `json:"LogicalDrives,omitempty"` - PhysicalDrives driveURL `json:"PhysicalDrives,omitempty"` // aka DiskDrives - Drives driveURL `json:"Drives,omitempty"` // used for chassis endpoint for NVME only -} - -// URL string from within Logical Drives or Disk Drives -type driveURL struct { - URL string `json:"@odata.id"` -} +// // ArrayController: /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ +// type ArrayController struct { +// Members Members `json:"Members"` +// MembersCount int `json:"@odata.count"` +// } + +// // ArrayController Members +// type Members struct { +// URL string `json:"@odata.id"` +// } + +// type Controller struct { +// Links Links `json:"Links"` +// } + +// // ArrayController Links +// type Links struct { +// LogicalDrives driveURL `json:"LogicalDrives,omitempty"` +// PhysicalDrives driveURL `json:"PhysicalDrives,omitempty"` // aka DiskDrives +// Drives driveURL `json:"Drives,omitempty"` // used for chassis endpoint for NVME only +// } + +// // URL string from within Logical Drives or Disk Drives +// type driveURL struct { +// URL string `json:"@odata.id"` +// } + +// // GenericDrive is used to iterate over differing drive endpoints +// type GenericDrive struct { +// Members Members `json:"Members,omitempty"` +// Links Links `json:"Links,omitempty"` +// MembersCount int `json:"@odata.count,omitempty"` +// } From cb57e2be4b380c24a3fab689a0f5f2463be3ffd0 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Mon, 19 Feb 2024 11:12:38 -0500 Subject: [PATCH 29/59] Improve logic in exporter.go to match GenericDrive struct --- hpe/dl380/exporter.go | 61 ++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 21 deletions(-) diff --git a/hpe/dl380/exporter.go b/hpe/dl380/exporter.go index 3ce603e..73eac3f 100644 --- a/hpe/dl380/exporter.go +++ b/hpe/dl380/exporter.go @@ -136,33 +136,52 @@ func NewExporter(ctx context.Context, target, uri string) *Exporter { physicalDriveURLs []string nvmeDriveURLs []string ) - // parse to find logical drives and physical disk drives - - output, err := getDriveEndpoint(url, target, retryClient) - if len(output.Members) > 0 { - for _, member := range output.Members { - newOutput, err := getDriveEndpoint(member.URL, target, retryClient) - - // If LogicalDrives is present, parse logical drive endpoint until all urls are found - if newOutput.Links.LogicalDrives != nil { - logicalDriveOutput, err := getDriveEndpoint(newOutput.Links.LogicalDrives.URL, target, retryClient) - if len(logicalDriveOutput.Members) > 0 { - for _, member := range logicalDriveOutput.Members { - logicalDriveURLs = append(logicalDriveURLs, member.URL) - } + + // PARSING DRIVE ENDPOINTS + // Get initial JSON return of /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ set to output + output, err := getDriveEndpoint(url, target, retryClient) + // Loop through Members to get ArrayController URLs + if output.MembersCount > 0 { + for _, member := range output.Members { + // for each ArrayController URL, get the JSON object + newOutput, err := getDriveEndpoint(member.URL, target, retryClient) + if err != nil { + // TODO: error handle + continue + } + + // If LogicalDrives is present, parse logical drive endpoint until all urls are found + if len(newOutput.Links.LogicalDrives.URL) > 0 { + logicalDriveOutput, err := getDriveEndpoint(newOutput.Links.LogicalDrives.URL, target, retryClient) + if err != nil { + // TODO: error handle + continue + } + + if logicalDriveOutput.MembersCount > 0 { + // loop through each Member in the "LogicalDrive" field + for _, member := range logicalDriveOutput.Members { + // append each URL in the Members array to the logicalDriveURLs array. + logicalDriveURLs = append(logicalDriveURLs, member.URL) } } + } - // If PhysicalDrives is present, parse physical drive endpoint until all urls are found - if newOutput.Links.PhysicalDrives != nil { - physicalDriveOutput, err := getDriveEndpoint(newOutput.Links.PhysicalDrives.URL, target, retryClient) - if len(physicalDriveOutput.Members) > 0 { - for _, member := range physicalDriveOutput.Members { - physicalDriveURLs = append(physicalDriveURLs, member.URL) - } + // If PhysicalDrives is present, parse physical drive endpoint until all urls are found + if len(newOutput.Links.PhysicalDrives.URL) > 0 { + physicalDriveOutput, err := getDriveEndpoint(newOutput.Links.PhysicalDrives.URL, target, retryClient) + if err != nil { + // TODO: error handle + continue + } + if physicalDriveOutput.MembersCount > 0 { + for _, member := range physicalDriveOutput.Members { + physicalDriveURLs = append(physicalDriveURLs, member.URL) } } } + } + } // parse to find NVME drives chassis_output, err := getDriveEndpoint(chassis_url, target, retryClient) From fd1359597e2491e3c0350ece25b5aa799aefa200 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Mon, 19 Feb 2024 11:16:57 -0500 Subject: [PATCH 30/59] Update exporter.go --- hpe/dl380/exporter.go | 50 ++++++------------------------------------- 1 file changed, 6 insertions(+), 44 deletions(-) diff --git a/hpe/dl380/exporter.go b/hpe/dl380/exporter.go index 73eac3f..3625851 100644 --- a/hpe/dl380/exporter.go +++ b/hpe/dl380/exporter.go @@ -185,11 +185,16 @@ func NewExporter(ctx context.Context, target, uri string) *Exporter { // parse to find NVME drives chassis_output, err := getDriveEndpoint(chassis_url, target, retryClient) + if err != nil { + // TODO: error handle + return nil + } + // parse through "Links" to find "Drives" array if len(chassis_output.Links.Drives) > 0 { // loop through drives array and append each odata.id url to nvmeDriveURLs list for _, drive := range chassis_output.Links.Drives { - nvmeDriveURLs = append(nvmeDriveURLs, chassis_output.Links.Drives.URL) + nvmeDriveURLs = append(nvmeDriveURLs, drive.URL) } } @@ -472,49 +477,6 @@ func (e *Exporter) exportMemoryMetrics(body []byte) error { return nil } -// getArrayMetricsEndpoint that gets the initial json response to loop through. -func getArrayMetricsEndpoint(endpoint, host string, client *retryablehttp.Client) (ArrayController, error) { - var ac ArrayController - var resp *http.Response - var err error - retryCount := 0 - req := common.BuildRequest(endpoint, host) - - resp, err = common.DoRequest(client, req) - if err != nil { - return ac, err - } - defer resp.Body.Close() - if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { - if resp.StatusCode == http.StatusNotFound { - for retryCount < 3 && resp.StatusCode == http.StatusNotFound { - time.Sleep(client.RetryWaitMin) - resp, err = common.DoRequest(client, req) - retryCount = retryCount + 1 - } - if err != nil { - return ac, err - } else if !(resp.StatusCode >= http.StatusOK && resp.StatusCode < http.StatusMultipleChoices) { - return ac, fmt.Errorf("HTTP status %d", resp.StatusCode) - } - } else { - return ac, fmt.Errorf("HTTP status %d", resp.StatusCode) - } - } - - body, err := ioutil.ReadAll(resp.Body) - if err != nil { - return ac, fmt.Errorf("Error reading response body - " + err.Error()) - } - - err = json.Unmarshal(body, &ac) - if err != nil { - return ac, fmt.Errorf("Error Unmarshalling DL380 ArrayController struct - " + err.Error()) - } - - return ac, nil -} - func getDriveEndpoint(url, host string, client *retryablehttp.Client) (GenericDrive, error) { var drive GenericDrive var resp *http.Response From 04b1f694d1f807aac0dd515a41cd386c63621344 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Mon, 19 Feb 2024 11:32:12 -0500 Subject: [PATCH 31/59] Update exporter.go with finished drive exporter logic --- hpe/dl380/exporter.go | 52 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 46 insertions(+), 6 deletions(-) diff --git a/hpe/dl380/exporter.go b/hpe/dl380/exporter.go index 3625851..604d0fd 100644 --- a/hpe/dl380/exporter.go +++ b/hpe/dl380/exporter.go @@ -52,8 +52,6 @@ const ( DISKDRIVE = "DiskDriveMetrics" // LOGICALDRIVE represents the Logical drive metric endpoint LOGICALDRIVE = "LogicalDriveMetrics" - // ARRAYCONTROLLER represents the Array Controller metric endpoint - ARRAYCONTROLLER = "ArrayControllerMetrics" // MEMORY represents the memory metric endpoints MEMORY = "MemoryMetrics" // OK is a string representation of the float 1.0 for device status @@ -141,6 +139,11 @@ func NewExporter(ctx context.Context, target, uri string) *Exporter { // Get initial JSON return of /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ set to output output, err := getDriveEndpoint(url, target, retryClient) // Loop through Members to get ArrayController URLs + if err != nil { + // TODO: error handle + return + } + if output.MembersCount > 0 { for _, member := range output.Members { // for each ArrayController URL, get the JSON object @@ -344,15 +347,52 @@ func (e *Exporter) scrape() { } -// TODO: Fill out this export function // exportPhysicalDriveMetrics collects the DL380's physical drive metrics in json format and sets the prometheus gauges func (e *Exporter) exportPhysicalDriveMetrics(body []byte) error { + + var state float64 + var dlphysical DiskDriveMetrics + var dlphysicaldrive = (*e.deviceMetrics)["diskDriveMetrics"] + err := json.Unmarshal(body, &dlphysical) + if err != nil { + return fmt.Errorf("Error Unmarshalling DL380 DiskDriveMetrics - " + err.Error()) + } + // Check physical drive is enabled then check status and convert string to numeric values + if dlphysical.Status.State == "Enabled" { + if dlphysical.Status.Health == "OK" { + state = OK + } else { + state = BAD + } + } else { + state = DISABLED + } + + (*dlphysicaldrive)["DiskDriveMetrics"].WithLabelValues(dlphysical.Name, dlphysical.Id).Set(state) return nil } -// TODO: Fill out this export function -// exportPhysicalDriveMetrics collects the DL380's physical drive metrics in json format and sets the prometheus gauges +// exportLogicalDriveMetrics collects the DL380's physical drive metrics in json format and sets the prometheus gauges func (e *Exporter) exportLogicalDriveMetrics(body []byte) error { + var state float64 + var dllogical LogicalDriveMetrics + var dllogicaldrive = (*e.deviceMetrics)["logicalDriveMetrics"] + err := json.Unmarshal(body, &dllogical) + if err != nil { + return fmt.Errorf("Error Unmarshalling DL380 LogicalDriveMetrics - " + err.Error()) + } + // Check physical drive is enabled then check status and convert string to numeric values + if dllogical.Status.State == "Enabled" { + if dllogical.Status.Health == "OK" { + state = OK + } else { + state = BAD + } + } else { + state = DISABLED + } + + (*dllogicaldrive)["LogicalDriveMetrics"].WithLabelValues(dllogical.Name, dllogical.Id, dllogical.Raid).Set(state) return nil } @@ -366,7 +406,7 @@ func (e *Exporter) exportNVMeDriveMetrics(body []byte) error { if err != nil { return fmt.Errorf("Error Unmarshalling DL380 NVMeDriveMetrics - " + err.Error()) } - // Check logical drive is enabled then check status and convert string to numeric values + // Check nvme drive is enabled then check status and convert string to numeric values if dlnvme.Status.State == "Enabled" { if dlnvme.Status.Health == "OK" { state = OK From 9b3a0715c111f739e455b322661c69fd51099ebf Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Mon, 19 Feb 2024 11:32:52 -0500 Subject: [PATCH 32/59] Update drive.go with cleaned up drive structs --- hpe/dl380/drive.go | 34 ---------------------------------- 1 file changed, 34 deletions(-) diff --git a/hpe/dl380/drive.go b/hpe/dl380/drive.go index 73b654a..8fde27d 100644 --- a/hpe/dl380/drive.go +++ b/hpe/dl380/drive.go @@ -110,37 +110,3 @@ type DiskDriveStatus struct { Health string `json:"Health"` State string `json:"State"` } - -// // ArrayController: /redfish/v1/Systems/1/SmartStorage/ArrayControllers/ -// type ArrayController struct { -// Members Members `json:"Members"` -// MembersCount int `json:"@odata.count"` -// } - -// // ArrayController Members -// type Members struct { -// URL string `json:"@odata.id"` -// } - -// type Controller struct { -// Links Links `json:"Links"` -// } - -// // ArrayController Links -// type Links struct { -// LogicalDrives driveURL `json:"LogicalDrives,omitempty"` -// PhysicalDrives driveURL `json:"PhysicalDrives,omitempty"` // aka DiskDrives -// Drives driveURL `json:"Drives,omitempty"` // used for chassis endpoint for NVME only -// } - -// // URL string from within Logical Drives or Disk Drives -// type driveURL struct { -// URL string `json:"@odata.id"` -// } - -// // GenericDrive is used to iterate over differing drive endpoints -// type GenericDrive struct { -// Members Members `json:"Members,omitempty"` -// Links Links `json:"Links,omitempty"` -// MembersCount int `json:"@odata.count,omitempty"` -// } From 64b2425cc613bdc3b32c774dfdfdb4ab3763f785 Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Mon, 19 Feb 2024 11:34:33 -0500 Subject: [PATCH 33/59] Update exporter.go --- hpe/dl380/exporter.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hpe/dl380/exporter.go b/hpe/dl380/exporter.go index 604d0fd..70a9009 100644 --- a/hpe/dl380/exporter.go +++ b/hpe/dl380/exporter.go @@ -141,7 +141,7 @@ func NewExporter(ctx context.Context, target, uri string) *Exporter { // Loop through Members to get ArrayController URLs if err != nil { // TODO: error handle - return + return nil } if output.MembersCount > 0 { From 940ddcff1ae104f7c598ed6a01e6e68bc671e30e Mon Sep 17 00:00:00 2001 From: Jennifer Kaiser <65861760+jenniferKaiser21@users.noreply.github.com> Date: Mon, 19 Feb 2024 11:38:21 -0500 Subject: [PATCH 34/59] Update templates.go with DL380 option --- cmd/fishymetrics/templates.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmd/fishymetrics/templates.go b/cmd/fishymetrics/templates.go index 485e767..fbb7b40 100644 --- a/cmd/fishymetrics/templates.go +++ b/cmd/fishymetrics/templates.go @@ -1,5 +1,5 @@ /* - * Copyright 2023 Comcast Cable Communications Management, LLC + * Copyright 2024 Comcast Cable Communications Management, LLC * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,6 +56,7 @@ const indexTmpl string = `