Skip to content
This repository has been archived by the owner on Jul 28, 2021. It is now read-only.

Commit

Permalink
Merge pull request #11 from keptn-contrib/release-0.1.0
Browse files Browse the repository at this point in the history
Release 0.1.0
  • Loading branch information
bacherfl authored Nov 21, 2019
2 parents 2f3cf66 + 7ce6673 commit 821eeac
Show file tree
Hide file tree
Showing 25 changed files with 2,115 additions and 2 deletions.
5 changes: 5 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.vscode/
.idea/
docs/
deploy/
Dockerfile
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
.vscode

.idea/*
# allow shared run configurations
!.idea/runConfigurations/

vendor

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

118 changes: 118 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
dist: xenial
language: bash
services:
- docker
env:
global:
- GO111MODULE=on
before_install:
- export TZ=Europe/Vienna
- IMAGE=keptn/prometheus-sli-service
- REPO_URL="$(git remote get-url --all origin)"
# get the last tag
- GIT_LAST_TAG=$(git describe --tags $(git rev-list --tags) || echo "0.0.0")
- GIT_NUM_COMMITS_SINCE_LAST_TAG=$(git rev-list ${GIT_LAST_TAG}..HEAD --count || echo "trunk")
# get current branch name
- GIT_BRANCH_NAME=$(git branch | grep \* | cut -d ' ' -f2)
- GIT_SHA=$(git rev-parse --short HEAD)
- GIT_BRANCH_AND_COMMIT=$(git describe --exact-match 2> /dev/null || echo "`git symbolic-ref HEAD 2> /dev/null | cut -b 12-`-`git log --pretty=format:\"%h\" -1`")
# find out if we are on a tag (= exact match)
# if not: use tag + number of commit + commit hash
- VERSION="$(cat version | tr -d '[:space:]')"
- DATE="$(date +'%Y%m%d.%H%M')"
- ./writeManifest.sh
- cat MANIFEST
# uncomment certain lines from Dockerfile that are for travis builds only
- sed -i '/#travis-uncomment/s/^#travis-uncomment //g' Dockerfile
jobs:
include:
- stage: codestyle
# Check Codestyle using go fmt
services: []
language: go
go:
- 1.12.x
# skip install
install: true
script:
- echo "Checking code style..."
- unformatted=$(gofmt -l .)
- |
if [ ! -z "$unformatted" ]; then
echo "Code Style Check failed for the following files: ${unformatted}".
echo "Please run: gofmt -w ."
echo "After that ammend your commit (e.g.: git add ${unformatted} && git commit --amend --no-edit) and force push the changes (git push -f)."
travis_terminate 1
fi
- stage: tests
# Run tests
services: []
language: go
go:
- 1.12.x
# cache some go files
cache:
directories:
- $HOME/.cache/go-build
- $HOME/gopath/pkg/mod
addons:
sonarcloud:
organization: "keptn-contrib"
script:
- sonar-scanner
- go build
- go test -race -v ./...

- stage: feature/bug/hotfix/patch
# build docker images for feature/bug/hotfix/patch branches
if: branch =~ ^feature.*$ OR branch =~ ^bug.*$ OR branch =~ ^hotfix.*$ OR branch =~ ^patch.*$
script:
- echo $TRAVIS_BUILD_STAGE_NAME
- TYPE="$(echo $TRAVIS_BRANCH | cut -d'/' -f1)"
- NUMBER="$(echo $TRAVIS_BRANCH | cut -d'/' -f2)"
- docker build . -t "${IMAGE}:${GIT_SHA}"
- docker tag "${IMAGE}:${GIT_SHA}" "${IMAGE}:${TYPE}.${NUMBER}.${DATE}"
after_success:
- echo "$REGISTRY_PASSWORD" | docker login --username $REGISTRY_USER --password-stdin
- docker push "${IMAGE}:${GIT_SHA}"
- docker push "${IMAGE}:${TYPE}.${NUMBER}.${DATE}"


- stage: develop
# build docker images for develop branch
if: branch = develop AND NOT type = pull_request
script:
- echo $TRAVIS_BUILD_STAGE_NAME
- docker build . -t "${IMAGE}:${GIT_SHA}"
- docker tag "${IMAGE}:${GIT_SHA}" "${IMAGE}:${DATE}"
- docker tag "${IMAGE}:${GIT_SHA}" "${IMAGE}:latest"
after_success:
- echo "$REGISTRY_PASSWORD" | docker login --username $REGISTRY_USER --password-stdin
- docker push "${IMAGE}:${GIT_SHA}"
- docker push "${IMAGE}:${DATE}"
- docker push "${IMAGE}:latest"

- stage: release-branch
# build docker images for release branches
if: branch =~ ^release.*$ AND NOT type = pull_request
script:
- echo $TRAVIS_BUILD_STAGE_NAME
- docker build . -t "${IMAGE}:${GIT_SHA}"
- docker tag "${IMAGE}:${GIT_SHA}" "${IMAGE}:${VERSION}.${DATE}"
- docker tag "${IMAGE}:${GIT_SHA}" "${IMAGE}:${VERSION}.latest"
after_success:
- echo "$REGISTRY_PASSWORD" | docker login --username $REGISTRY_USER --password-stdin
- docker push "${IMAGE}:${GIT_SHA}"
- docker push "${IMAGE}:${VERSION}.${DATE}"
- docker push "${IMAGE}:${VERSION}.latest"

- stage: tags
# build docker images for tags
if: tag IS present
script:
- echo $TRAVIS_BUILD_STAGE_NAME
- docker build . -t "${IMAGE}:${VERSION}"
after_success:
- echo "$REGISTRY_PASSWORD" | docker login --username $REGISTRY_USER --password-stdin
- docker push "${IMAGE}:${VERSION}"
3 changes: 2 additions & 1 deletion CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# Lines starting with '#' are comments.
# Each line is a file pattern followed by one or more owners.

# These owners will be the default owners for everything in the repo.
# These owners will be the default owners for everything in the repo.
* @christian-kreuzberger-dtx @bacherfl
54 changes: 54 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# from https://skaffold.dev/docs/workflows/debug/
# Use the offical Golang image to create a build artifact.
# This is based on Debian and sets the GOPATH to /go.
# https://hub.docker.com/_/golang
FROM golang:1.12 as builder

WORKDIR /go/src/github.com/keptn-contrib/prometheus-sli-service

ENV GO111MODULE=on
ENV BUILDFLAGS=""

# Copy `go.mod` for definitions and `go.sum` to invalidate the next layer
# in case of a change in the dependencies
COPY go.mod go.sum ./

# download dependencies
RUN go mod download

ARG debugBuild

# set buildflags for debug build
RUN if [ ! -z "$debugBuild" ]; then export BUILDFLAGS='-gcflags "all=-N -l"'; fi

# finally Copy local code to the container image.
COPY . .

# Build the command inside the container.
# (You may fetch or manage dependencies here, either manually or with a tool like "godep".)
RUN CGO_ENABLED=0 GOOS=linux go build $BUILDFLAGS -v -o prometheus-sli-service

# Use a Docker multi-stage build to create a lean production image.
# https://docs.docker.com/develop/develop-images/multistage-build/#use-multi-stage-builds
FROM alpine:3.7
RUN apk add --no-cache ca-certificates

ARG debugBuild

# IF we are debugging, we need to install libc6-compat for delve to work on alpine based containers
RUN if [ ! -z "$debugBuild" ]; then apk add --no-cache libc6-compat; fi

# Copy the binary to the production image from the builder stage.
COPY --from=builder /go/src/github.com/keptn-contrib/prometheus-sli-service/prometheus-sli-service /prometheus-sli-service

EXPOSE 8080

# required for external tools to detect this as a go binary
ENV GOTRACEBACK=all

# KEEP THE FOLLOWING LINES COMMENTED OUT!!! (they will be included within the travis-ci build)
#travis-uncomment ADD MANIFEST /
#travis-uncomment COPY entrypoint.sh /
#travis-uncomment ENTRYPOINT ["/entrypoint.sh"]

CMD ["/prometheus-sli-service"]
8 changes: 8 additions & 0 deletions MANIFEST
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
##########
branch: MANIFEST_BRANCH
repository: MANIFEST_REPOSITORY
commitlink: MANIFEST_REPOSITORY/commit/MANIFEST_COMMIT
repolink: MANIFEST_REPOSITORY/tree/MANIFEST_COMMIT
travisbuild: MANIFEST_TRAVIS_JOB_URL
timestamp: MANIFEST_DATE
##########
99 changes: 98 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,103 @@
# Prometheus SLI Service
![GitHub release (latest by date)](https://img.shields.io/github/v/release/keptn-contrib/prometheus-sli-service)
[![Build Status](https://travis-ci.org/keptn-contrib/prometheus-sli-service.svg?branch=master)](https://travis-ci.org/keptn-contrib/prometheus-sli-service)
[![Go Report Card](https://goreportcard.com/badge/github.com/keptn-contrib/prometheus-sli-service)](https://goreportcard.com/report/github.com/keptn-contrib/prometheus-sli-service)

The *prometheus-sli-service* is a Keptn service that is responsible for retrieving the values of Keptn-supported SLIs from a Prometheus API endpoint.
This service is used for retrieving service level indicators (SLIs) from a prometheus API endpoint. Per default, it fetches metrics from the prometheus instance set up by Keptn
(`prometheus-service.monitoring.svc.cluster.local:8080`), but it can also be configures to use any reachable Prometheus endpoint using basic authentication by providing the credentials
via a secret in the `keptn` namespace of the cluster.

The supported default SLIs are:

- throughput
- error_rate
- response_time_p50
- response_time_p90
- response_time_p95

The provided SLIs are based on the [RED metrics](https://grafana.com/files/grafanacon_eu_2018/Tom_Wilkie_GrafanaCon_EU_2018.pdf)

## Basic Usage

Per default, the service works with the following assumptions regarding the setup of the Prometheus instance:

- Each **service** within a **stage** of a **project** has a Prometheus scrape job definition with the name: `<service>-<project>-<stage>`

For example, if `project=sockshop`, `stage=production` and `service=carts`, the scrape job name would have to be `carts-sockshop-production`.

- Every service provides the following Metrics for its corresponding scrape job:
- http_response_time_milliseconds (Histogram)
- http_requests_total (Counter)

This metric has to contain the `status` label, indicating the HTTP response code of the requests handled by the service.
It is highly recommended that this metric also provides a label to query metric values for specific endpoints, e.g. `handler`

An example of an entry would look like this: `http_requests_total{method="GET",handler="VersionController.getInformation",status="200",} 4.0`

- Based on those metrics, the queries for the SLIs are built as follows:

- **throughput**: `sum(rate(http_requests_total{job="<service>-<project>-<stage>-canary"}[<test_duration_in_seconds>s]))`
- **error_rate**: `sum(rate(http_requests_total{job="<service>-<project>-<stage>-canary",status!~'2..'}[<test_duration_in_seconds>s]))/sum(rate(http_requests_total{job="<service>-<project>-<stage>-canary"}[<test_duration_in_seconds>s]))`
- **response_time_p50**: `histogram_quantile(0.50, sum(rate(http_response_time_milliseconds_bucket{job='<service>-<project>-<stage>-canary'}[<test_duration_in_seconds>s])) by (le))`
- **response_time_p90**: `histogram_quantile(0.90, sum(rate(http_response_time_milliseconds_bucket{job='<service>-<project>-<stage>-canary'}[<test_duration_in_seconds>s])) by (le))`
- **response_time_p95**: `histogram_quantile(0.95, sum(rate(http_response_time_milliseconds_bucket{job='<service>-<project>-<stage>-canary'}[<test_duration_in_seconds>s])) by (le))`

## Advanced Usage

### Using an external Prometheus instance
To use a Prometheus instance other than the one that's being managed by Keptn for a certain project, a secret containing the URL and the access credentials has to be deployed into the `keptn` namespace. The secret must have the following format:

```yaml
user: test
password: test
url: http://prometheus-service.monitoring.svc.cluster.local:8080
```
If this information is stored in a file, e.g. `prometheus-creds.yaml`, it can be stored with the following command (don't forget to replace the `<project>` placeholder with the name of your project:

```bash
kubectl create secret -n keptn generic prometheus-credentials-<project> --from-file=prometheus-credentials=./mock_secret.yaml
```

Please note that there is a naming convention for the secret, because this can be configured per **project**. Therefore, the secret has to have the name `prometheus-credentials-<project>`


### Custom SLI queries

Users can override the predefined queries, as well as add custom SLI queries by creating a `ConfigMap` with the name `prometheus-sli-config-<project>` in the `keptn` namespace.
In this ConfigMap, a YAML object containing the queries can be defined, e.g.:

```yaml
kind: ConfigMap
apiVersion: v1
metadata:
name: prometheus-sli-config-sockshop
namespace: keptn
data:
custom-queries: |
throughput: "rate(my_custom_metric{job='$SERVICE-$PROJECT-$STAGE',handler=~'$handler'}[$DURATION_SECONDS])"
error_rate: "sum(rate(my_custom_metric{job='$SERVICE-$PROJECT-$STAGE',handler=~'$handler',status!~'2..'}[1s]))/sum(rate(my_custom_metric{job='$SERVICE-$PROJECT-$STAGE',handler=~'$handler'}[$DURATION_SECONDS]))"
response_time_p50: "histogram_quantile(0.50,sum(rate(my_custom_response_time_metric{job='$SERVICE-$PROJECT-$STAGE'}[$DURATION_SECONDS]))by(le))"
response_time_p90: "histogram_quantile(0.90,sum(rate(my_custom_response_time_metric{job='$SERVICE-$PROJECT-$STAGE'}[$DURATION_SECONDS]))by(le))"
response_time_p95: "histogram_quantile(0.95,sum(rate(my_custom_response_time_metric{job='$SERVICE-$PROJECT-$STAGE'}[$DURATION_SECONDS]))by(le))"
# Example for a custom SLI that is not part of the default SLIs
cpu_usage: avg(rate(container_cpu_usage_seconds_total{namespace="$PROJECT-$STAGE",pod_name=~"$SERVICE-primary-.*"}[5m]))
```

Note that, similarly, to the custom endpoint configuration, the name of the ConfigMap has to be `prometheus-sli-config-<project>`, and has to be stored in the `keptn` namespace.

Within the user-defined queries, the following variables can be used to dynamically build the query, depending on the project/stage/service, and the time frame:

- $PROJECT: will be replaced with the name of the project
- $STAGE: will be replaced with the name of the stage
- $SERVICE: will be replaced with the name of the service
- $DURATION_SECONDS: will be replaced with the test run duration, e.g. 30s

For example, if an evaluation for the service **carts** in the stage **production** of the project **sockshop** is triggered, and the tests ran for 30s these will be the resulting queries:

```
rate(my_custom_metric{job='$SERVICE-$PROJECT-$STAGE',handler=~'$handler'}[$DURATION_SECONDS]) => rate(my_custom_metric{job='carts-sockshop-production',handler=~'$handler'}[30s])
```

## Installation

Expand Down
36 changes: 36 additions & 0 deletions deploy/distributor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
---
## prometheus-sli-service sh.keptn.internal.event.get-sli-distributor
apiVersion: apps/v1
kind: Deployment
metadata:
name: prometheus-sli-service-monitoring-configure-distributor
namespace: keptn
spec:
selector:
matchLabels:
run: distributor
replicas: 1
template:
metadata:
labels:
run: distributor
spec:
containers:
- name: distributor
image: keptn/distributor:latest
ports:
- containerPort: 8080
resources:
requests:
memory: "32Mi"
cpu: "50m"
limits:
memory: "128Mi"
cpu: "500m"
env:
- name: PUBSUB_URL
value: 'nats://keptn-nats-cluster'
- name: PUBSUB_TOPIC
value: 'sh.keptn.internal.event.get-sli'
- name: PUBSUB_RECIPIENT
value: 'prometheus-sli-service'
Loading

0 comments on commit 821eeac

Please sign in to comment.