Skip to content

Commit

Permalink
Store Chaos-Exporter metrics into AWS CloudWatch (#72)
Browse files Browse the repository at this point in the history
* Introduce AWS CloudExporter

authored-by: nrusinko <Nikolay Rusinko>
  • Loading branch information
nrusinko authored Sep 15, 2020
1 parent b308de9 commit 2fe2fa8
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 2 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
FROM ubuntu:16.04
RUN apt-get update && apt-get install -y ca-certificates && rm -rf /var/cache/apk/*

COPY ./exporter /

Expand Down
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
[![Go Report Card](https://goreportcard.com/badge/github.com/litmuschaos/chaos-exporter)](https://goreportcard.com/report/github.com/litmuschaos/chaos-exporter)
[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bgithub.com%2Flitmuschaos%2Fchaos-exporter.svg?type=shield)](https://app.fossa.io/projects/git%2Bgithub.com%2Flitmuschaos%2Fchaos-exporter?ref=badge_shield)

- This is a custom prometheus exporter to expose Litmus Chaos metrics.
- This is a custom Prometheus and CloudWatch exporter to expose Litmus Chaos metrics.
To learn more about Litmus Chaos Experiments & the Litmus Chaos Operator,
visit this link: [Litmus Docs](https://docs.litmuschaos.io/)

Expand All @@ -23,6 +23,9 @@
- The metrics are of type Gauge, w/ each of the status metrics mapped to a
numeric value(not-executed:0, fail:1, running:2, pass:3)

- The CloudWatch metrics are of type Count, w/ each of the status metrics mapped to a
numeric value(not-executed:0, fail:1, running:2, pass:3)

## Steps to build & deploy:

### Running Litmus Chaos Experiments in order to generate metrics
Expand Down Expand Up @@ -52,18 +55,22 @@
# HELP chaosengine_experiments_count Total number of experiments executed by the chaos engine
# TYPE chaosengine_experiments_count gauge
chaosengine_experiments_count{engine_name="engine-nginx",engine_namespace="litmus"} 1
[ "ContainerInsights/ChaosMonitor", "chaosengine_experiments_count", "ClusterName", "sandbox", "Service", "chaos-monitor" ]
# HELP chaosengine_failed_experiments Total number of failed experiments by the chaos engine
# TYPE chaosengine_failed_experiments gauge
chaosengine_failed_experiments{engine_name="engine-nginx",engine_namespace="litmus"} 0
[ "ContainerInsights/ChaosMonitor", "chaosengine_failed_experiments", "ClusterName", "sandbox", "Service", "chaos-monitor" ]
# HELP chaosengine_passed_experiments Total number of passed experiments by the chaos engine
# TYPE chaosengine_passed_experiments gauge
chaosengine_passed_experiments{engine_name="engine-nginx",engine_namespace="litmus"} 1
[ "ContainerInsights/ChaosMonitor", "chaosengine_passed_experiments", "ClusterName", "sandbox", "Service", "chaos-monitor" ]
# HELP chaosengine_waiting_experiments Total number of waiting experiments by the chaos engine
# TYPE chaosengine_waiting_experiments gauge
chaosengine_waiting_experiments{engine_name="engine-nginx",engine_namespace="litmus"} 0
[ "ContainerInsights/ChaosMonitor", "chaosengine_waiting_experiments", "ClusterName", "sandbox", "Service", "chaos-monitor" ]
# HELP cluster_overall_RunningExperiment Running Experiment with ChaosEngine Details
# TYPE cluster_overall_RunningExperiment gauge
Expand All @@ -72,14 +79,17 @@ cluster_overall_RunningExperiment{engine_name="engine-nginx",engine_namespace="l
# HELP cluster_overall_experiments_count Total number of experiments executed in the Cluster
# TYPE cluster_overall_experiments_count gauge
cluster_overall_experiments_count 1
[ "ContainerInsights/ChaosMonitor", "cluster_experiments_count", "ClusterName", "sandbox", "Service", "chaos-monitor" ]
# HELP cluster_overall_failed_experiments Total number of failed experiments in the Cluster
# TYPE cluster_overall_failed_experiments gauge
cluster_overall_failed_experiments 0
[ "ContainerInsights/ChaosMonitor", "cluster_failed_experiments", "ClusterName", "sandbox", "Service", "chaos-monitor" ]
# HELP cluster_overall_passed_experiments Total number of passed experiments in the Cluster
# TYPE cluster_overall_passed_experiments gauge
cluster_overall_passed_experiments 1
[ "ContainerInsights/ChaosMonitor", "cluster_passed_experiments", "ClusterName", "sandbox", "Service", "chaos-monitor" ]
...
```

Expand Down
73 changes: 72 additions & 1 deletion controller/scrape.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,23 @@ package controller

import (
"fmt"
"os"
"github.com/pkg/errors"
"strings"

metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
_ "k8s.io/client-go/plugin/pkg/client/auth/gcp"
"k8s.io/klog"

"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/cloudwatch"
litmuschaosv1alpha1 "github.com/litmuschaos/chaos-operator/pkg/apis/litmuschaos/v1alpha1"
clientV1alpha1 "github.com/litmuschaos/chaos-operator/pkg/client/clientset/versioned"
)

// Holds list of experiments in a chaosengine
var chaosExperimentList []string

// Holds the chaosresult of the running experiment
var experimentStatusMap = make(map[string]bool)

Expand Down Expand Up @@ -61,8 +66,11 @@ func GetLitmusChaosMetrics(clientSet *clientV1alpha1.Clientset) error {
pass += passedEngine
fail += failedEngine
setEngineChaosMetrics(engineDetails, &chaosEngine)
setAwsEngineChaosMetrics(engineDetails, &chaosEngine)
}

setClusterChaosMetrics(total, pass, fail)
setAwsClusterChaosMetrics(total, pass, fail)
return nil
}

Expand All @@ -78,13 +86,31 @@ func setEngineChaosMetrics(engineDetails ChaosEngineDetail, chaosEngine *litmusc
EngineFailedExperiments.WithLabelValues(engineDetails.Namespace, engineDetails.Name).Set(engineDetails.FailedExp)
EngineWaitingExperiments.WithLabelValues(engineDetails.Namespace, engineDetails.Name).Set(engineDetails.AwaitedExp)
}
func setAwsEngineChaosMetrics(engineDetails ChaosEngineDetail, chaosEngine *litmuschaosv1alpha1.ChaosEngine) {
sess := session.Must(session.NewSessionWithOptions(session.Options{
SharedConfigState: session.SharedConfigEnable,
}))
putAwsMetricData(sess, "chaosengine_passed_experiments", "Count", engineDetails.PassedExp)
putAwsMetricData(sess, "chaosengine_failed_experiments", "Count", engineDetails.FailedExp)
putAwsMetricData(sess, "chaosengine_experiments_count", "Count", engineDetails.TotalExp)
putAwsMetricData(sess, "chaosengine_waiting_experiments", "Count", engineDetails.AwaitedExp)
}

func setAwsClusterChaosMetrics(total float64, pass float64, fail float64) {
sess := session.Must(session.NewSessionWithOptions(session.Options{
SharedConfigState: session.SharedConfigEnable,
}))
putAwsMetricData(sess, "cluster_passed_experiments", "Count", pass)
putAwsMetricData(sess, "cluster_failed_experiments", "Count", fail)
putAwsMetricData(sess, "cluster_experiments_count", "Count", total)
}

func getExperimentMetricsFromEngine(chaosEngine *litmuschaosv1alpha1.ChaosEngine) (float64, float64, float64, float64) {
var total, passed, failed, waiting float64
expStatusList := chaosEngine.Status.Experiments
total = float64(len(expStatusList))

for i :=0; i < len(expStatusList); i++ {
for i := 0; i < len(expStatusList); i++ {
verdict := strings.ToLower(expStatusList[i].Verdict)
fmt.Println(verdict)
switch verdict {
Expand Down Expand Up @@ -120,3 +146,48 @@ func filterMonitoringEnabledEngines(engineList *litmuschaosv1alpha1.ChaosEngineL
}
return &filteredEngineList
}

func putAwsMetricData(sess *session.Session, metricName string, unit string, value float64) error {
// Create new Amazon CloudWatch client
// snippet-start:[cloudwatch.go.create_custom_metric.call]
dimension1 := "ClusterName"
dimension2 := "Service"
svc := cloudwatch.New(sess)
namespace := os.Getenv("AWS_CLOUDWATCH_METRIC_NAMESPACE")
clusterName := os.Getenv("CLUSTER_NAME")
serviceName := os.Getenv("APP_NAME")

if namespace == "" || serviceName == "" || clusterName == "" {
return errors.Errorf("You must supply a namespace, clusterName and serviceName values")
}

klog.V(0).Infof("Putting new AWS metric: Namespace %v, Metric %v", namespace, metricName)

_, err := svc.PutMetricData(&cloudwatch.PutMetricDataInput{
Namespace: &namespace,
MetricData: []*cloudwatch.MetricDatum{
&cloudwatch.MetricDatum{
MetricName: &metricName,
Unit: &unit,
Value: &value,
Dimensions: []*cloudwatch.Dimension{
&cloudwatch.Dimension{
Name: &dimension1,
Value: &clusterName,
},
&cloudwatch.Dimension{
Name: &dimension2,
Value: &serviceName,
},
},
},
},
})
// snippet-end:[cloudwatch.go.create_custom_metric.call]
if err != nil {
klog.V(0).Infof("Error during putting metrics to CloudWatch: %v", err)
return err
}

return nil
}
59 changes: 59 additions & 0 deletions deploy/chaos-exporter-cloudwatch.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
## The ENV variables specified in this manifest MUST be provided with appropriate values to obtain metrics on AWS CloudWatch
---
apiVersion: apps/v1
kind: Deployment
metadata:
labels:
app: chaos-monitor
name: chaos-monitor
namespace: litmus
spec:
replicas: 1
selector:
matchLabels:
app: chaos-monitor
template:
metadata:
labels:
app: chaos-monitor
spec:
containers:
- image: litmuschaos/chaos-exporter:ci
imagePullPolicy: Always
name: chaos-exporter
env:
# Namespace in CloudWatch
- name: AWS_CLOUDWATCH_METRIC_NAMESPACE
value:
# Name of a Kubernetes cluster to collect metrics from
- name: CLUSTER_NAME
value:
- name: APP_NAME
value: chaos-monitor
# Valid access key for you AWS account
- name: AWS_ACCESS_KEY_ID
value:
# Valid secret key for you AWS account
- name: AWS_SECRET_ACCESS_KEY
value:
# Region where you want to store CloudWatch metrics
- name: AWS_REGION
value:
serviceAccount: litmus
serviceAccountName: litmus
---
apiVersion: v1
kind: Service
metadata:
labels:
app: chaos-monitor
name: chaos-monitor
namespace: litmus
spec:
ports:
- port: 8080
protocol: TCP
targetPort: 8080
selector:
app: chaos-monitor
type: ClusterIP
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/litmuschaos/chaos-exporter
go 1.13

require (
github.com/aws/aws-sdk-go v1.34.18
github.com/golang/protobuf v1.4.1 // indirect
github.com/imdario/mergo v0.3.9 // indirect
github.com/litmuschaos/chaos-operator v0.0.0-20200502085045-ae0a262d3baa
Expand Down
8 changes: 8 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuy
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/asaskevich/govalidator v0.0.0-20180720115003-f9ffefc3facf/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY=
github.com/aws/aws-sdk-go v1.34.18 h1:Mo/Clq3u1dQFzpg8YQqBii8m+Vl3fWIfHi6kXs5wpuM=
github.com/aws/aws-sdk-go v1.34.18/go.mod h1:5zCpMtNQVjRREroY7sYe8lOMRSxkhG6MZveU8YkpAk0=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
Expand Down Expand Up @@ -75,6 +77,7 @@ github.com/go-openapi/swag v0.17.2 h1:K/ycE/XTUDFltNHSO32cGRUhrVGJD64o8WgAIZNyc3
github.com/go-openapi/swag v0.17.2/go.mod h1:AByQ+nYG6gQg71GINrmuDXCPWdL640yX49/kXLo40Tg=
github.com/go-openapi/validate v0.17.0/go.mod h1:Uh4HdOzKt19xGIGm1qHf/ofbX1YQ4Y+MYsct2VUrAJ4=
github.com/go-openapi/validate v0.18.0/go.mod h1:Uh4HdOzKt19xGIGm1qHf/ofbX1YQ4Y+MYsct2VUrAJ4=
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/gogo/protobuf v0.0.0-20171007142547-342cbe0a0415/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/gogo/protobuf v1.1.1 h1:72R+M5VuhED/KujmZVcIquuo8mBgX4oVda//DQb3PXo=
Expand Down Expand Up @@ -119,6 +122,8 @@ github.com/imdario/mergo v0.3.6/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJ
github.com/imdario/mergo v0.3.9 h1:UauaLniWCFHWd+Jp9oCEkTBj8VO/9DKg3PV3VCNMDIg=
github.com/imdario/mergo v0.3.9/go.mod h1:2EnlNZ0deacrJVfApfmtdGgDfMuh/nq6Ok1EcJh5FfA=
github.com/inconshreveable/mousetrap v1.0.0/go.mod h1:PxqpIevigyE2G7u3NXJIT2ANytuPF1OarO4DADm73n8=
github.com/jmespath/go-jmespath v0.3.0 h1:OS12ieG61fsCg5+qLJ+SsW9NicxNkg3b25OyT2yCeUc=
github.com/jmespath/go-jmespath v0.3.0/go.mod h1:9QtRXoHjLGCJ5IBSaohpXITPlowMeeYCZ7fLUTSywik=
github.com/jonboulle/clockwork v0.0.0-20141017032234-72f9bd7c4e0c/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo=
github.com/json-iterator/go v0.0.0-20180701071628-ab8a2e0c74be/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
Expand Down Expand Up @@ -162,6 +167,7 @@ github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1 h1:iURUrRGxPUNPdy5/HRSm+Yj6okJ6UtLINN0Q9M4+h3I=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/pquerna/cachecontrol v0.0.0-20171018203845-0dec1b30a021/go.mod h1:prYjPmNq4d1NPVmpShWobRqXY3q7Vp+80DqgxxUrUIA=
Expand Down Expand Up @@ -198,6 +204,7 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U=
github.com/xiang90/probing v0.0.0-20160813154853-07dd2e8dfe18/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
go.uber.org/atomic v0.0.0-20181018215023-8dc6146f7569/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
Expand Down Expand Up @@ -227,6 +234,7 @@ golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20190812203447-cdfb69ac37fc h1:gkKoSkUmnU6bpS/VhkuO27bzQeSA51uaEfbOW5dNb68=
golang.org/x/net v0.0.0-20190812203447-cdfb69ac37fc/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b h1:0mm1VjtFUOIlE1SbDlwjYaDxZVDP2S5ou6y0gSgXHu8=
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/oauth2 v0.0.0-20190402181905-9f3314589c9a/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
Expand Down

0 comments on commit 2fe2fa8

Please sign in to comment.