Skip to content

Commit

Permalink
implement azuretracing metrics
Browse files Browse the repository at this point in the history
Signed-off-by: Markus Blaschke <[email protected]>
  • Loading branch information
mblaschke committed Feb 11, 2022
1 parent 1881c48 commit 1c28eb8
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 52 deletions.
37 changes: 25 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
Azure Janitor
==============================
# Azure Janitor

[![license](https://img.shields.io/github/license/webdevops/azure-janitor.svg)](https://github.com/webdevops/azure-janitor/blob/master/LICENSE)
[![DockerHub](https://img.shields.io/badge/DockerHub-webdevops%2Fazure--janitor-blue)](https://hub.docker.com/r/webdevops/azure-janitor/)
Expand All @@ -14,8 +13,7 @@ Janitor tasks:
- ResourceGroup Deployment cleanup based on TTL and limit (count)
- RoleAssignments cleanup based on RoleDefinitionIds and TTL

Usage
-----
## Usage

```
Usage:
Expand Down Expand Up @@ -61,8 +59,7 @@ Help Options:

for Azure API authentication (using ENV vars) see https://docs.microsoft.com/en-us/azure/developer/go/azure-sdk-authentication

Azure tag
---------
## Azure tag

By default the Azure Janitor is using `ttl` as tag and sets the expiry timestamp to `ttl_expiry`.
Based on timestamp in `ttl_expiry` it will trigger the cleanup of the corresponding resource if expired.
Expand Down Expand Up @@ -97,8 +94,7 @@ Supported relative timestamps
- 1mo (1 month)
- 1y (1 year)

RoleAssignments
---------------
## RoleAssignments

**General RoleAssignment TTL**

Expand Down Expand Up @@ -154,8 +150,7 @@ RoleAssignment example with ttl in description:
},
```

ARM template usage
------------------
## ARM template usage

Using relative time (duration):
```
Expand Down Expand Up @@ -233,8 +228,7 @@ Using absolute calculated time:
```

Metrics
-------
## Metrics

| Metric | Type | Description |
|------------------------------------------------|--------------|---------------------------------------------------------------------------------------|
Expand All @@ -243,3 +237,22 @@ Metrics
| `azurejanitor_roleassignment_ttl` | Gauge | List of Azure RoleAssignments with expiry timestamp as value |
| `azurejanitor_resources_deleted` | Counter | Number of deleted resources (by resource type) |
| `azurejanitor_errors` | Counter | Number of failed deleted resources (by resource type) |

### Azuretracing metrics

(with 22.2.0 and later)

Azuretracing metrics collects latency and latency from azure-sdk-for-go and creates metrics and is controllable using
environment variables (eg. setting buckets, disabling metrics or disable autoreset).

| Metric | Description |
|------------------------------------------|----------------------------------------------------------------------------------------|
| `azurerm_api_ratelimit` | Azure ratelimit metrics (only on /metrics, resets after query due to limited validity) |
| `azurerm_api_request_*` | Azure request count and latency as histogram |

| Environment variable | Example | Description |
|------------------------------------------|----------------------------------|----------------------------------------------------------|
| `METRIC_AZURERM_API_REQUEST_BUCKETS` | `1, 2.5, 5, 10, 30, 60, 90, 120` | Sets buckets for `azurerm_api_request` histogram metric |
| `METRIC_AZURERM_API_REQUEST_DISABLE` | `false` | Disables `azurerm_api_request_*` metric |
| `METRIC_AZURERM_API_RATELIMIT_DISABLE` | `false` | Disables `azurerm_api_ratelimit` metric |
| `METRIC_AZURERM_API_RATELIMIT_AUTORESET` | `false` | Disables `azurerm_api_ratelimit` autoreset after fetch |
4 changes: 2 additions & 2 deletions janitor/deployments.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@ func (j *Janitor) runDeployments(ctx context.Context, subscription subscriptions
contextLogger := log.WithField("task", "deployment")

client := resources.NewGroupsClientWithBaseURI(j.Azure.Environment.ResourceManagerEndpoint, *subscription.SubscriptionID)
client.Authorizer = j.Azure.Authorizer
j.decorateAzureAutorest(&client.Client)

resourceTtl := prometheusCommon.NewMetricsList()

deploymentClient := resources.NewDeploymentsClientWithBaseURI(j.Azure.Environment.ResourceManagerEndpoint, *subscription.SubscriptionID)
deploymentClient.Authorizer = j.Azure.Authorizer
j.decorateAzureAutorest(&deploymentClient.Client)

resourceType := "Microsoft.Resources/deployments"

Expand Down
52 changes: 50 additions & 2 deletions janitor/janitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
log "github.com/sirupsen/logrus"
"github.com/webdevops/azure-janitor/config"
prometheusCommon "github.com/webdevops/go-prometheus-common"
"github.com/webdevops/go-prometheus-common/azuretracing"
"strings"
"sync"
"time"
Expand All @@ -30,6 +31,8 @@ type (
Conf config.Opts
Azure JanitorAzureConfig

UserAgent string

Prometheus struct {
MetricDuration *prometheus.GaugeVec
MetricTtlResources *prometheus.GaugeVec
Expand Down Expand Up @@ -70,6 +73,41 @@ var (
)

func (j *Janitor) Init() {
j.initPrometheus()
j.initAzure()
}

func (j *Janitor) initAzure() {
ctx := context.Background()
j.Azure.Subscriptions = []subscriptions.Subscription{}

client := subscriptions.NewClientWithBaseURI(j.Azure.Environment.ResourceManagerEndpoint)
j.decorateAzureAutorest(&client.Client)

if len(j.Conf.Azure.Subscription) == 0 {
// auto lookup subscriptions
listResult, err := client.List(ctx)
if err != nil {
panic(err)
}
j.Azure.Subscriptions = listResult.Values()

if len(j.Azure.Subscriptions) == 0 {
log.Panic("no Azure Subscriptions found via auto detection, does this ServicePrincipal have read permissions to the subcriptions?")
}
} else {
// fixed subscription list
for _, subId := range j.Conf.Azure.Subscription {
result, err := client.Get(ctx, subId)
if err != nil {
panic(err)
}
j.Azure.Subscriptions = append(j.Azure.Subscriptions, result)
}
}
}

func (j *Janitor) initPrometheus() {
j.Prometheus.MetricDuration = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "azurejanitor_duration",
Expand Down Expand Up @@ -237,7 +275,8 @@ func (j *Janitor) initAuzreApiVersions() {

// fetch location translation map
locationClient := subscriptions.NewClientWithBaseURI(j.Azure.Environment.ResourceManagerEndpoint)
locationClient.Authorizer = j.Azure.Authorizer
j.decorateAzureAutorest(&locationClient.Client)

locationResult, err := locationClient.ListLocations(ctx, subscriptionId, nil)
if err != nil {
panic(err)
Expand All @@ -252,7 +291,7 @@ func (j *Janitor) initAuzreApiVersions() {

// fetch providers
providersClient := resources.NewProvidersClientWithBaseURI(j.Azure.Environment.ResourceManagerEndpoint, *subscription.SubscriptionID)
providersClient.Authorizer = j.Azure.Authorizer
j.decorateAzureAutorest(&providersClient.Client)

result, err := providersClient.ListComplete(ctx, nil, "")
if err != nil {
Expand Down Expand Up @@ -451,3 +490,12 @@ func (j *Janitor) checkExpiryDate(value string) (parsedTime *time.Time, expired

return
}

func (j *Janitor) decorateAzureAutorest(client *autorest.Client) {
client.Authorizer = j.Azure.Authorizer
if err := client.AddToUserAgent(j.UserAgent); err != nil {
log.Panic(err)
}

azuretracing.DecoreAzureAutoRest(client)
}
2 changes: 1 addition & 1 deletion janitor/resourcegroups.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func (j *Janitor) runResourceGroups(ctx context.Context, subscription subscripti
resourceType := "Microsoft.Resources/resourceGroups"

client := resources.NewGroupsClientWithBaseURI(j.Azure.Environment.ResourceManagerEndpoint, *subscription.SubscriptionID)
client.Authorizer = j.Azure.Authorizer
j.decorateAzureAutorest(&client.Client)

resourceTtl := prometheusCommon.NewMetricsList()

Expand Down
2 changes: 1 addition & 1 deletion janitor/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ func (j *Janitor) runResources(ctx context.Context, subscription subscriptions.S
contextLogger := log.WithField("task", "resource")

client := resources.NewClientWithBaseURI(j.Azure.Environment.ResourceManagerEndpoint, *subscription.SubscriptionID)
client.Authorizer = j.Azure.Authorizer
j.decorateAzureAutorest(&client.Client)

resourceTtl := prometheusCommon.NewMetricsList()

Expand Down
2 changes: 1 addition & 1 deletion janitor/roleassignments.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ func (j *Janitor) runRoleAssignments(ctx context.Context, subscription subscript
resourceType := "Microsoft.Authorization/roleAssignments"

client := authorization.NewRoleAssignmentsClientWithBaseURI(j.Azure.Environment.ResourceManagerEndpoint, *subscription.SubscriptionID)
client.Authorizer = j.Azure.Authorizer
j.decorateAzureAutorest(&client.Client)

result, err := client.ListComplete(ctx, filter, "")
if err != nil {
Expand Down
42 changes: 9 additions & 33 deletions main.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package main

import (
"context"
"fmt"
"github.com/Azure/azure-sdk-for-go/profiles/latest/resources/mgmt/subscriptions"
"github.com/Azure/go-autorest/autorest"
Expand All @@ -12,6 +11,7 @@ import (
log "github.com/sirupsen/logrus"
"github.com/webdevops/azure-janitor/config"
"github.com/webdevops/azure-janitor/janitor"
"github.com/webdevops/go-prometheus-common/azuretracing"
"net/http"
"os"
"path"
Expand All @@ -22,6 +22,8 @@ import (

const (
Author = "webdevops.io"

UserAgent = "azure-janitor/"
)

var (
Expand Down Expand Up @@ -49,11 +51,11 @@ func main() {

log.Infof("init Janitor")
j := janitor.Janitor{
Conf: opts,
Conf: opts,
UserAgent: UserAgent + gitTag,
Azure: janitor.JanitorAzureConfig{
Authorizer: azureAuthorizer,
Subscriptions: azureSubscriptions,
Environment: azureEnvironment,
Authorizer: azureAuthorizer,
Environment: azureEnvironment,
},
}
j.Init()
Expand Down Expand Up @@ -177,7 +179,6 @@ func checkForDeprecations() {
// Init and build Azure authorzier
func initAzureConnection() {
var err error
ctx := context.Background()

// get environment
azureEnvironment, err = azure.EnvironmentFromName(*opts.Azure.Environment)
Expand All @@ -190,32 +191,6 @@ func initAzureConnection() {
if err != nil {
panic(err)
}

subscriptionsClient := subscriptions.NewClientWithBaseURI(azureEnvironment.ResourceManagerEndpoint)
subscriptionsClient.Authorizer = azureAuthorizer

if len(opts.Azure.Subscription) == 0 {
// auto lookup subscriptions
listResult, err := subscriptionsClient.List(ctx)
if err != nil {
panic(err)
}
azureSubscriptions = listResult.Values()

if len(azureSubscriptions) == 0 {
log.Panic("no Azure Subscriptions found via auto detection, does this ServicePrincipal have read permissions to the subcriptions?")
}
} else {
// fixed subscription list
azureSubscriptions = []subscriptions.Subscription{}
for _, subId := range opts.Azure.Subscription {
result, err := subscriptionsClient.Get(ctx, subId)
if err != nil {
panic(err)
}
azureSubscriptions = append(azureSubscriptions, result)
}
}
}

// start and handle prometheus handler
Expand All @@ -227,6 +202,7 @@ func startHttpServer() {
}
})

http.Handle("/metrics", promhttp.Handler())
http.Handle("/metrics", azuretracing.RegisterAzureMetricAutoClean(promhttp.Handler()))

log.Fatal(http.ListenAndServe(opts.ServerBind, nil))
}

0 comments on commit 1c28eb8

Please sign in to comment.